clang  20.0.0git
Parsing.cpp
Go to the documentation of this file.
1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "clang/AST/Expr.h"
12 #include "clang/Basic/CharInfo.h"
14 #include "clang/Lex/Lexer.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/Error.h"
21 #include <optional>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 using namespace clang;
27 using namespace transformer;
28 
29 // FIXME: This implementation is entirely separate from that of the AST
30 // matchers. Given the similarity of the languages and uses of the two parsers,
31 // the two should share a common parsing infrastructure, as should other
32 // Transformer types. We intend to unify this implementation soon to share as
33 // much as possible with the AST Matchers parsing.
34 
35 namespace {
36 using llvm::Expected;
37 
38 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
39 
40 struct ParseState {
41  // The remaining input to be processed.
42  StringRef Input;
43  // The original input. Not modified during parsing; only for reference in
44  // error reporting.
45  StringRef OriginalInput;
46 };
47 
48 // Represents an intermediate result returned by a parsing function. Functions
49 // that don't generate values should use `std::nullopt`
50 template <typename ResultType> struct ParseProgress {
51  ParseState State;
52  // Intermediate result generated by the Parser.
53  ResultType Value;
54 };
55 
56 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
57 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
58 
59 class ParseError : public llvm::ErrorInfo<ParseError> {
60 public:
61  // Required field for all ErrorInfo derivatives.
62  static char ID;
63 
64  ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
65  : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
66  Excerpt(std::move(InputExcerpt)) {}
67 
68  void log(llvm::raw_ostream &OS) const override {
69  OS << "parse error at position (" << Pos << "): " << ErrorMsg
70  << ": " + Excerpt;
71  }
72 
73  std::error_code convertToErrorCode() const override {
74  return llvm::inconvertibleErrorCode();
75  }
76 
77  // Position of the error in the input string.
78  size_t Pos;
79  std::string ErrorMsg;
80  // Excerpt of the input starting at the error position.
81  std::string Excerpt;
82 };
83 
84 char ParseError::ID;
85 } // namespace
86 
87 static const llvm::StringMap<RangeSelectorOp<std::string>> &
89  static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
90  {"name", name},
91  {"node", node},
92  {"statement", statement},
93  {"statements", statements},
94  {"member", member},
95  {"callArgs", callArgs},
96  {"elseBranch", elseBranch},
97  {"initListElements", initListElements}};
98  return M;
99 }
100 
101 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
103  static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
104  {"before", before}, {"after", after}, {"expansion", expansion}};
105  return M;
106 }
107 
108 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
110  static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
111  {"encloseNodes", encloseNodes}};
112  return M;
113 }
114 
115 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
117  static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
118  M = {{"enclose", enclose}, {"between", between}};
119  return M;
120 }
121 
122 template <typename Element>
123 std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
124  llvm::StringRef Key) {
125  auto it = Map.find(Key);
126  if (it == Map.end())
127  return std::nullopt;
128  return it->second;
129 }
130 
131 template <typename ResultType>
132 ParseProgress<ResultType> makeParseProgress(ParseState State,
133  ResultType Result) {
134  return ParseProgress<ResultType>{State, std::move(Result)};
135 }
136 
137 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
138  size_t Pos = S.OriginalInput.size() - S.Input.size();
139  return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
140  S.OriginalInput.substr(Pos, 20).str());
141 }
142 
143 // Returns a new ParseState that advances \c S by \c N characters.
144 static ParseState advance(ParseState S, size_t N) {
145  S.Input = S.Input.drop_front(N);
146  return S;
147 }
148 
149 static StringRef consumeWhitespace(StringRef S) {
150  return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
151 }
152 
153 // Parses a single expected character \c c from \c State, skipping preceding
154 // whitespace. Error if the expected character isn't found.
155 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
156  State.Input = consumeWhitespace(State.Input);
157  if (State.Input.empty() || State.Input.front() != c)
158  return makeParseError(State,
159  ("expected char not found: " + llvm::Twine(c)).str());
160  return makeParseProgress(advance(State, 1), std::nullopt);
161 }
162 
163 // Parses an identitifer "token" -- handles preceding whitespace.
164 static ExpectedProgress<std::string> parseId(ParseState State) {
165  State.Input = consumeWhitespace(State.Input);
166  auto Id = State.Input.take_while(
167  [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
168  if (Id.empty())
169  return makeParseError(State, "failed to parse name");
170  return makeParseProgress(advance(State, Id.size()), Id.str());
171 }
172 
173 // For consistency with the AST matcher parser and C++ code, node ids are
174 // written as strings. However, we do not support escaping in the string.
175 static ExpectedProgress<std::string> parseStringId(ParseState State) {
176  State.Input = consumeWhitespace(State.Input);
177  if (State.Input.empty())
178  return makeParseError(State, "unexpected end of input");
179  if (!State.Input.consume_front("\""))
180  return makeParseError(
181  State,
182  "expecting string, but encountered other character or end of input");
183 
184  StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
185  if (State.Input.size() == Id.size())
186  return makeParseError(State, "unterminated string");
187  // Advance past the trailing quote as well.
188  return makeParseProgress(advance(State, Id.size() + 1), Id.str());
189 }
190 
191 // Parses a single element surrounded by parens. `Op` is applied to the parsed
192 // result to create the result of this function call.
193 template <typename T>
194 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
195  RangeSelectorOp<T> Op,
196  ParseState State) {
197  auto P = parseChar('(', State);
198  if (!P)
199  return P.takeError();
200 
201  auto E = ParseElement(P->State);
202  if (!E)
203  return E.takeError();
204 
205  P = parseChar(')', E->State);
206  if (!P)
207  return P.takeError();
208 
209  return makeParseProgress(P->State, Op(std::move(E->Value)));
210 }
211 
212 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
213 // is applied to the parsed results to create the result of this function call.
214 template <typename T>
215 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
216  RangeSelectorOp<T, T> Op,
217  ParseState State) {
218  auto P = parseChar('(', State);
219  if (!P)
220  return P.takeError();
221 
222  auto Left = ParseElement(P->State);
223  if (!Left)
224  return Left.takeError();
225 
226  P = parseChar(',', Left->State);
227  if (!P)
228  return P.takeError();
229 
230  auto Right = ParseElement(P->State);
231  if (!Right)
232  return Right.takeError();
233 
234  P = parseChar(')', Right->State);
235  if (!P)
236  return P.takeError();
237 
238  return makeParseProgress(P->State,
239  Op(std::move(Left->Value), std::move(Right->Value)));
240 }
241 
242 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
243 // Id operator). Returns StencilType representing the operator on success and
244 // error if it fails to parse input for an operator.
245 static ExpectedProgress<RangeSelector>
247  auto Id = parseId(State);
248  if (!Id)
249  return Id.takeError();
250 
251  std::string OpName = std::move(Id->Value);
252  if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
253  return parseSingle(parseStringId, *Op, Id->State);
254 
255  if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
256  return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
257 
258  if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
259  return parsePair(parseStringId, *Op, Id->State);
260 
261  if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
262  return parsePair(parseRangeSelectorImpl, *Op, Id->State);
263 
264  return makeParseError(State, "unknown selector name: " + OpName);
265 }
266 
268  ParseState State = {Input, Input};
269  ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
270  if (!Result)
271  return Result.takeError();
272  State = Result->State;
273  // Discard any potentially trailing whitespace.
274  State.Input = consumeWhitespace(State.Input);
275  if (State.Input.empty())
276  return Result->Value;
277  return makeParseError(State, "unexpected input after selector");
278 }
StringRef P
static char ID
Definition: Arena.cpp:183
Expr * E
static const llvm::StringMap< RangeSelectorOp< RangeSelector, RangeSelector > > & getBinaryRangeSelectors()
Definition: Parsing.cpp:116
static ExpectedProgress< std::nullopt_t > parseChar(char c, ParseState State)
Definition: Parsing.cpp:155
ExpectedProgress< RangeSelector > parseSingle(ParseFunction< T > ParseElement, RangeSelectorOp< T > Op, ParseState State)
Definition: Parsing.cpp:194
static const llvm::StringMap< RangeSelectorOp< RangeSelector > > & getUnaryRangeSelectors()
Definition: Parsing.cpp:102
static StringRef consumeWhitespace(StringRef S)
Definition: Parsing.cpp:149
static ExpectedProgress< RangeSelector > parseRangeSelectorImpl(ParseState State)
Definition: Parsing.cpp:246
ParseProgress< ResultType > makeParseProgress(ParseState State, ResultType Result)
Definition: Parsing.cpp:132
static ExpectedProgress< std::string > parseId(ParseState State)
Definition: Parsing.cpp:164
static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg)
Definition: Parsing.cpp:137
static ParseState advance(ParseState S, size_t N)
Definition: Parsing.cpp:144
static const llvm::StringMap< RangeSelectorOp< std::string, std::string > > & getBinaryStringSelectors()
Definition: Parsing.cpp:109
static const llvm::StringMap< RangeSelectorOp< std::string > > & getUnaryStringSelectors()
Definition: Parsing.cpp:88
ExpectedProgress< RangeSelector > parsePair(ParseFunction< T > ParseElement, RangeSelectorOp< T, T > Op, ParseState State)
Definition: Parsing.cpp:215
static ExpectedProgress< std::string > parseStringId(ParseState State)
Definition: Parsing.cpp:175
std::optional< Element > findOptional(const llvm::StringMap< Element > &Map, llvm::StringRef Key)
Definition: Parsing.cpp:123
Defines parsing functions for Transformer types.
Defines a combinator library supporting the definition of selectors, which select source ranges based...
uint32_t Id
Definition: SemaARM.cpp:1144
Defines the clang::SourceLocation class and associated facilities.
LineState State
__device__ __2f16 float c
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
RangeSelector between(RangeSelector R1, RangeSelector R2)
Selects the range between R1 and `R2.
Definition: RangeSelector.h:60
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
llvm::Expected< RangeSelector > parseRangeSelector(llvm::StringRef Input)
Parses a string representation of a RangeSelector.
Definition: Parsing.cpp:267
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
Definition: CharInfo.h:41
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
Definition: CharInfo.h:61
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
Definition: CharInfo.h:108
#define log(__x)
Definition: tgmath.h:460