clang  19.0.0git
Parser.cpp
Go to the documentation of this file.
1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Recursive parser implementation for the matcher expression grammar.
11 ///
12 //===----------------------------------------------------------------------===//
13 
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <cerrno>
25 #include <cstddef>
26 #include <cstdlib>
27 #include <optional>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 namespace clang {
33 namespace ast_matchers {
34 namespace dynamic {
35 
36 /// Simple structure to hold information for one token from the parser.
38  /// Different possible tokens.
39  enum TokenKind {
51  };
52 
53  /// Some known identifiers.
54  static const char* const ID_Bind;
55  static const char *const ID_With;
56 
57  TokenInfo() = default;
58 
59  StringRef Text;
63 };
64 
65 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 const char *const Parser::TokenInfo::ID_With = "with";
67 
68 /// Simple tokenizer for the parser.
70 public:
71  explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
72  : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
73  NextToken = getNextToken();
74  }
75 
76  CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
77  unsigned CodeCompletionOffset)
78  : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
79  CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
80  NextToken = getNextToken();
81  }
82 
83  /// Returns but doesn't consume the next token.
84  const TokenInfo &peekNextToken() const { return NextToken; }
85 
86  /// Consumes and returns the next token.
88  TokenInfo ThisToken = NextToken;
89  NextToken = getNextToken();
90  return ThisToken;
91  }
92 
94  while (NextToken.Kind == TokenInfo::TK_NewLine)
95  NextToken = getNextToken();
96  return NextToken;
97  }
98 
100  SkipNewlines();
101  if (NextToken.Kind == TokenInfo::TK_Eof)
102  return NextToken;
103  return consumeNextToken();
104  }
105 
106  TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
107 
108 private:
109  TokenInfo getNextToken() {
110  consumeWhitespace();
111  TokenInfo Result;
112  Result.Range.Start = currentLocation();
113 
114  if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
115  Result.Kind = TokenInfo::TK_CodeCompletion;
116  Result.Text = StringRef(CodeCompletionLocation, 0);
117  CodeCompletionLocation = nullptr;
118  return Result;
119  }
120 
121  if (Code.empty()) {
122  Result.Kind = TokenInfo::TK_Eof;
123  Result.Text = "";
124  return Result;
125  }
126 
127  switch (Code[0]) {
128  case '#':
129  Code = Code.drop_until([](char c) { return c == '\n'; });
130  return getNextToken();
131  case ',':
132  Result.Kind = TokenInfo::TK_Comma;
133  Result.Text = Code.substr(0, 1);
134  Code = Code.drop_front();
135  break;
136  case '.':
137  Result.Kind = TokenInfo::TK_Period;
138  Result.Text = Code.substr(0, 1);
139  Code = Code.drop_front();
140  break;
141  case '\n':
142  ++Line;
143  StartOfLine = Code.drop_front();
144  Result.Kind = TokenInfo::TK_NewLine;
145  Result.Text = Code.substr(0, 1);
146  Code = Code.drop_front();
147  break;
148  case '(':
149  Result.Kind = TokenInfo::TK_OpenParen;
150  Result.Text = Code.substr(0, 1);
151  Code = Code.drop_front();
152  break;
153  case ')':
154  Result.Kind = TokenInfo::TK_CloseParen;
155  Result.Text = Code.substr(0, 1);
156  Code = Code.drop_front();
157  break;
158 
159  case '"':
160  case '\'':
161  // Parse a string literal.
162  consumeStringLiteral(&Result);
163  break;
164 
165  case '0': case '1': case '2': case '3': case '4':
166  case '5': case '6': case '7': case '8': case '9':
167  // Parse an unsigned and float literal.
168  consumeNumberLiteral(&Result);
169  break;
170 
171  default:
172  if (isAlphanumeric(Code[0])) {
173  // Parse an identifier
174  size_t TokenLength = 1;
175  while (true) {
176  // A code completion location in/immediately after an identifier will
177  // cause the portion of the identifier before the code completion
178  // location to become a code completion token.
179  if (CodeCompletionLocation == Code.data() + TokenLength) {
180  CodeCompletionLocation = nullptr;
181  Result.Kind = TokenInfo::TK_CodeCompletion;
182  Result.Text = Code.substr(0, TokenLength);
183  Code = Code.drop_front(TokenLength);
184  return Result;
185  }
186  if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
187  break;
188  ++TokenLength;
189  }
190  if (TokenLength == 4 && Code.starts_with("true")) {
191  Result.Kind = TokenInfo::TK_Literal;
192  Result.Value = true;
193  } else if (TokenLength == 5 && Code.starts_with("false")) {
194  Result.Kind = TokenInfo::TK_Literal;
195  Result.Value = false;
196  } else {
197  Result.Kind = TokenInfo::TK_Ident;
198  Result.Text = Code.substr(0, TokenLength);
199  }
200  Code = Code.drop_front(TokenLength);
201  } else {
202  Result.Kind = TokenInfo::TK_InvalidChar;
203  Result.Text = Code.substr(0, 1);
204  Code = Code.drop_front(1);
205  }
206  break;
207  }
208 
209  Result.Range.End = currentLocation();
210  return Result;
211  }
212 
213  /// Consume an unsigned and float literal.
214  void consumeNumberLiteral(TokenInfo *Result) {
215  bool isFloatingLiteral = false;
216  unsigned Length = 1;
217  if (Code.size() > 1) {
218  // Consume the 'x' or 'b' radix modifier, if present.
219  switch (toLowercase(Code[1])) {
220  case 'x': case 'b': Length = 2;
221  }
222  }
223  while (Length < Code.size() && isHexDigit(Code[Length]))
224  ++Length;
225 
226  // Try to recognize a floating point literal.
227  while (Length < Code.size()) {
228  char c = Code[Length];
229  if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
230  isFloatingLiteral = true;
231  Length++;
232  } else {
233  break;
234  }
235  }
236 
237  Result->Text = Code.substr(0, Length);
238  Code = Code.drop_front(Length);
239 
240  if (isFloatingLiteral) {
241  char *end;
242  errno = 0;
243  std::string Text = Result->Text.str();
244  double doubleValue = strtod(Text.c_str(), &end);
245  if (*end == 0 && errno == 0) {
246  Result->Kind = TokenInfo::TK_Literal;
247  Result->Value = doubleValue;
248  return;
249  }
250  } else {
251  unsigned Value;
252  if (!Result->Text.getAsInteger(0, Value)) {
253  Result->Kind = TokenInfo::TK_Literal;
254  Result->Value = Value;
255  return;
256  }
257  }
258 
259  SourceRange Range;
260  Range.Start = Result->Range.Start;
261  Range.End = currentLocation();
262  Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
263  Result->Kind = TokenInfo::TK_Error;
264  }
265 
266  /// Consume a string literal.
267  ///
268  /// \c Code must be positioned at the start of the literal (the opening
269  /// quote). Consumed until it finds the same closing quote character.
270  void consumeStringLiteral(TokenInfo *Result) {
271  bool InEscape = false;
272  const char Marker = Code[0];
273  for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
274  if (InEscape) {
275  InEscape = false;
276  continue;
277  }
278  if (Code[Length] == '\\') {
279  InEscape = true;
280  continue;
281  }
282  if (Code[Length] == Marker) {
283  Result->Kind = TokenInfo::TK_Literal;
284  Result->Text = Code.substr(0, Length + 1);
285  Result->Value = Code.substr(1, Length - 1);
286  Code = Code.drop_front(Length + 1);
287  return;
288  }
289  }
290 
291  StringRef ErrorText = Code;
292  Code = Code.drop_front(Code.size());
293  SourceRange Range;
294  Range.Start = Result->Range.Start;
295  Range.End = currentLocation();
296  Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
297  Result->Kind = TokenInfo::TK_Error;
298  }
299 
300  /// Consume all leading whitespace from \c Code.
301  void consumeWhitespace() {
302  // Don't trim newlines.
303  Code = Code.ltrim(" \t\v\f\r");
304  }
305 
306  SourceLocation currentLocation() {
307  SourceLocation Location;
308  Location.Line = Line;
309  Location.Column = Code.data() - StartOfLine.data() + 1;
310  return Location;
311  }
312 
313  StringRef &Code;
314  StringRef StartOfLine;
315  unsigned Line = 1;
316  Diagnostics *Error;
317  TokenInfo NextToken;
318  const char *CodeCompletionLocation = nullptr;
319 };
320 
321 Parser::Sema::~Sema() = default;
322 
324  llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
325  return {};
326 }
327 
328 std::vector<MatcherCompletion>
330  return {};
331 }
332 
335 
337  P->ContextStack.push_back(std::make_pair(C, 0u));
338  }
339 
341  P->ContextStack.pop_back();
342  }
343 
344  void nextArg() {
345  ++P->ContextStack.back().second;
346  }
347 };
348 
349 /// Parse expressions that start with an identifier.
350 ///
351 /// This function can parse named values and matchers.
352 /// In case of failure it will try to determine the user's intent to give
353 /// an appropriate error message.
354 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
355  const TokenInfo NameToken = Tokenizer->consumeNextToken();
356 
357  if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
358  // Parse as a named value.
359  if (const VariantValue NamedValue =
360  NamedValues ? NamedValues->lookup(NameToken.Text)
361  : VariantValue()) {
362 
363  if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
364  *Value = NamedValue;
365  return true;
366  }
367 
368  std::string BindID;
369  Tokenizer->consumeNextToken();
370  TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
371  if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
372  addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
373  return false;
374  }
375 
376  if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
377  (ChainCallToken.Text != TokenInfo::ID_Bind &&
378  ChainCallToken.Text != TokenInfo::ID_With)) {
379  Error->addError(ChainCallToken.Range,
381  return false;
382  }
383  if (ChainCallToken.Text == TokenInfo::ID_With) {
384 
385  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
386  NameToken.Text, NameToken.Range);
387 
388  Error->addError(ChainCallToken.Range,
390  return false;
391  }
392  if (!parseBindID(BindID))
393  return false;
394 
395  assert(NamedValue.isMatcher());
396  std::optional<DynTypedMatcher> Result =
397  NamedValue.getMatcher().getSingleMatcher();
398  if (Result) {
399  std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
400  if (Bound) {
402  return true;
403  }
404  }
405  return false;
406  }
407 
408  if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
409  Error->addError(Tokenizer->peekNextToken().Range,
410  Error->ET_ParserNoOpenParen)
411  << "NewLine";
412  return false;
413  }
414 
415  // If the syntax is correct and the name is not a matcher either, report
416  // unknown named value.
417  if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
418  Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
419  Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
420  Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
421  !S->lookupMatcherCtor(NameToken.Text)) {
422  Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
423  << NameToken.Text;
424  return false;
425  }
426  // Otherwise, fallback to the matcher parser.
427  }
428 
429  Tokenizer->SkipNewlines();
430 
431  assert(NameToken.Kind == TokenInfo::TK_Ident);
432  TokenInfo OpenToken = Tokenizer->consumeNextToken();
433  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
434  Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
435  << OpenToken.Text;
436  return false;
437  }
438 
439  std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
440 
441  // Parse as a matcher expression.
442  return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
443 }
444 
445 bool Parser::parseBindID(std::string &BindID) {
446  // Parse the parenthesized argument to .bind("foo")
447  const TokenInfo OpenToken = Tokenizer->consumeNextToken();
448  const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449  const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
450 
451  // TODO: We could use different error codes for each/some to be more
452  // explicit about the syntax error.
453  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
454  Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
455  return false;
456  }
457  if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
458  Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
459  return false;
460  }
461  if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
462  Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
463  return false;
464  }
465  BindID = IDToken.Value.getString();
466  return true;
467 }
468 
469 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
470  const TokenInfo &OpenToken,
471  VariantValue *Value) {
472  std::vector<ParserValue> Args;
473  TokenInfo EndToken;
474 
475  Tokenizer->SkipNewlines();
476 
477  {
478  ScopedContextEntry SCE(this, Ctor);
479 
480  while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
481  if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
482  // End of args.
483  EndToken = Tokenizer->consumeNextToken();
484  break;
485  }
486  if (!Args.empty()) {
487  // We must find a , token to continue.
488  TokenInfo CommaToken = Tokenizer->consumeNextToken();
489  if (CommaToken.Kind != TokenInfo::TK_Comma) {
490  Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
491  << CommaToken.Text;
492  return false;
493  }
494  }
495 
496  Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
497  NameToken.Text, NameToken.Range,
498  Args.size() + 1);
499  ParserValue ArgValue;
500  Tokenizer->SkipNewlines();
501 
502  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
503  addExpressionCompletions();
504  return false;
505  }
506 
507  TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
508 
509  if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
510  Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
511  << NameToken.Text;
512  return false;
513  }
514 
515  ArgValue.Text = NodeMatcherToken.Text;
516  ArgValue.Range = NodeMatcherToken.Range;
517 
518  std::optional<MatcherCtor> MappedMatcher =
519  S->lookupMatcherCtor(ArgValue.Text);
520 
521  if (!MappedMatcher) {
522  Error->addError(NodeMatcherToken.Range,
524  << NodeMatcherToken.Text;
525  return false;
526  }
527 
528  ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
529 
530  if (NK.isNone()) {
531  Error->addError(NodeMatcherToken.Range,
533  << NodeMatcherToken.Text;
534  return false;
535  }
536 
537  ArgValue.Value = NK;
538 
539  Tokenizer->SkipNewlines();
540  Args.push_back(ArgValue);
541 
542  SCE.nextArg();
543  }
544  }
545 
546  if (EndToken.Kind == TokenInfo::TK_Eof) {
547  Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
548  return false;
549  }
550 
551  internal::MatcherDescriptorPtr BuiltCtor =
552  S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error);
553 
554  if (!BuiltCtor.get()) {
555  Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
556  << NameToken.Text;
557  return false;
558  }
559 
560  std::string BindID;
561  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
562  Tokenizer->consumeNextToken();
563  TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
564  if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
565  addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
566  addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));
567  return false;
568  }
569  if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
570  (ChainCallToken.Text != TokenInfo::ID_Bind &&
571  ChainCallToken.Text != TokenInfo::ID_With)) {
572  Error->addError(ChainCallToken.Range,
574  return false;
575  }
576  if (ChainCallToken.Text == TokenInfo::ID_Bind) {
577  if (!parseBindID(BindID))
578  return false;
579  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
580  NameToken.Text, NameToken.Range);
581  SourceRange MatcherRange = NameToken.Range;
582  MatcherRange.End = ChainCallToken.Range.End;
583  VariantMatcher Result = S->actOnMatcherExpression(
584  BuiltCtor.get(), MatcherRange, BindID, {}, Error);
585  if (Result.isNull())
586  return false;
587 
588  *Value = Result;
589  return true;
590  } else if (ChainCallToken.Text == TokenInfo::ID_With) {
591  Tokenizer->SkipNewlines();
592 
593  if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
594  StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
595  ? StringRef("EOF")
596  : Tokenizer->peekNextToken().Text;
597  Error->addError(Tokenizer->peekNextToken().Range,
598  Error->ET_ParserNoOpenParen)
599  << ErrTxt;
600  return false;
601  }
602 
603  TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
604 
605  return parseMatcherExpressionImpl(NameToken, WithOpenToken,
606  BuiltCtor.get(), Value);
607  }
608  }
609 
610  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
611  NameToken.Text, NameToken.Range);
612  SourceRange MatcherRange = NameToken.Range;
613  MatcherRange.End = EndToken.Range.End;
614  VariantMatcher Result = S->actOnMatcherExpression(
615  BuiltCtor.get(), MatcherRange, BindID, {}, Error);
616  if (Result.isNull())
617  return false;
618 
619  *Value = Result;
620  return true;
621 }
622 
623 /// Parse and validate a matcher expression.
624 /// \return \c true on success, in which case \c Value has the matcher parsed.
625 /// If the input is malformed, or some argument has an error, it
626 /// returns \c false.
627 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
628  const TokenInfo &OpenToken,
629  std::optional<MatcherCtor> Ctor,
630  VariantValue *Value) {
631  if (!Ctor) {
632  Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
633  << NameToken.Text;
634  // Do not return here. We need to continue to give completion suggestions.
635  }
636 
637  if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
638  return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);
639 
640  std::vector<ParserValue> Args;
641  TokenInfo EndToken;
642 
643  Tokenizer->SkipNewlines();
644 
645  {
646  ScopedContextEntry SCE(this, Ctor.value_or(nullptr));
647 
648  while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
649  if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
650  // End of args.
651  EndToken = Tokenizer->consumeNextToken();
652  break;
653  }
654  if (!Args.empty()) {
655  // We must find a , token to continue.
656  const TokenInfo CommaToken = Tokenizer->consumeNextToken();
657  if (CommaToken.Kind != TokenInfo::TK_Comma) {
658  Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
659  << CommaToken.Text;
660  return false;
661  }
662  }
663 
664  Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
665  NameToken.Text, NameToken.Range,
666  Args.size() + 1);
667  ParserValue ArgValue;
668  Tokenizer->SkipNewlines();
669  ArgValue.Text = Tokenizer->peekNextToken().Text;
670  ArgValue.Range = Tokenizer->peekNextToken().Range;
671  if (!parseExpressionImpl(&ArgValue.Value)) {
672  return false;
673  }
674 
675  Tokenizer->SkipNewlines();
676  Args.push_back(ArgValue);
677  SCE.nextArg();
678  }
679  }
680 
681  if (EndToken.Kind == TokenInfo::TK_Eof) {
682  Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
683  return false;
684  }
685 
686  std::string BindID;
687  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
688  Tokenizer->consumeNextToken();
689  TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
690  if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
691  addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
692  return false;
693  }
694 
695  if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
696  Error->addError(ChainCallToken.Range,
698  return false;
699  }
700  if (ChainCallToken.Text == TokenInfo::ID_With) {
701 
702  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
703  NameToken.Text, NameToken.Range);
704 
705  Error->addError(ChainCallToken.Range,
707  return false;
708  }
709  if (ChainCallToken.Text != TokenInfo::ID_Bind) {
710  Error->addError(ChainCallToken.Range,
712  return false;
713  }
714  if (!parseBindID(BindID))
715  return false;
716  }
717 
718  if (!Ctor)
719  return false;
720 
721  // Merge the start and end infos.
722  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
723  NameToken.Text, NameToken.Range);
724  SourceRange MatcherRange = NameToken.Range;
725  MatcherRange.End = EndToken.Range.End;
726  VariantMatcher Result = S->actOnMatcherExpression(
727  *Ctor, MatcherRange, BindID, Args, Error);
728  if (Result.isNull()) return false;
729 
730  *Value = Result;
731  return true;
732 }
733 
734 // If the prefix of this completion matches the completion token, add it to
735 // Completions minus the prefix.
736 void Parser::addCompletion(const TokenInfo &CompToken,
737  const MatcherCompletion& Completion) {
738  if (StringRef(Completion.TypedText).starts_with(CompToken.Text) &&
739  Completion.Specificity > 0) {
740  Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
741  Completion.MatcherDecl, Completion.Specificity);
742  }
743 }
744 
745 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
746  ArrayRef<ArgKind> AcceptedTypes) {
747  if (!NamedValues) return std::vector<MatcherCompletion>();
748  std::vector<MatcherCompletion> Result;
749  for (const auto &Entry : *NamedValues) {
750  unsigned Specificity;
751  if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
752  std::string Decl =
753  (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
754  Result.emplace_back(Entry.getKey(), Decl, Specificity);
755  }
756  }
757  return Result;
758 }
759 
760 void Parser::addExpressionCompletions() {
761  const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
762  assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
763 
764  // We cannot complete code if there is an invalid element on the context
765  // stack.
766  for (ContextStackTy::iterator I = ContextStack.begin(),
767  E = ContextStack.end();
768  I != E; ++I) {
769  if (!I->first)
770  return;
771  }
772 
773  auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
774  for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
775  addCompletion(CompToken, Completion);
776  }
777 
778  for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
779  addCompletion(CompToken, Completion);
780  }
781 }
782 
783 /// Parse an <Expression>
784 bool Parser::parseExpressionImpl(VariantValue *Value) {
785  switch (Tokenizer->nextTokenKind()) {
787  *Value = Tokenizer->consumeNextToken().Value;
788  return true;
789 
790  case TokenInfo::TK_Ident:
791  return parseIdentifierPrefixImpl(Value);
792 
794  addExpressionCompletions();
795  return false;
796 
797  case TokenInfo::TK_Eof:
798  Error->addError(Tokenizer->consumeNextToken().Range,
799  Error->ET_ParserNoCode);
800  return false;
801 
802  case TokenInfo::TK_Error:
803  // This error was already reported by the tokenizer.
804  return false;
808  case TokenInfo::TK_Comma:
811  const TokenInfo Token = Tokenizer->consumeNextToken();
812  Error->addError(Token.Range, Error->ET_ParserInvalidToken)
813  << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
814  return false;
815  }
816 
817  llvm_unreachable("Unknown token kind.");
818 }
819 
820 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
821 
822 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
823  const NamedValueMap *NamedValues, Diagnostics *Error)
824  : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
825  NamedValues(NamedValues), Error(Error) {}
826 
827 Parser::RegistrySema::~RegistrySema() = default;
828 
829 std::optional<MatcherCtor>
830 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
831  return Registry::lookupMatcherCtor(MatcherName);
832 }
833 
834 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
835  MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
836  ArrayRef<ParserValue> Args, Diagnostics *Error) {
837  if (BindID.empty()) {
838  return Registry::constructMatcher(Ctor, NameRange, Args, Error);
839  } else {
840  return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
841  Error);
842  }
843 }
844 
845 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
846  ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
847  return Registry::getAcceptedCompletionTypes(Context);
848 }
849 
850 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
851  ArrayRef<ArgKind> AcceptedTypes) {
852  return Registry::getMatcherCompletions(AcceptedTypes);
853 }
854 
855 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
856  return Registry::isBuilderMatcher(Ctor);
857 }
858 
859 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
860  return Registry::nodeMatcherType(Ctor);
861 }
862 
864 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
866  Diagnostics *Error) const {
867  return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
868 }
869 
870 bool Parser::parseExpression(StringRef &Code, Sema *S,
871  const NamedValueMap *NamedValues,
872  VariantValue *Value, Diagnostics *Error) {
873  CodeTokenizer Tokenizer(Code, Error);
874  if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
875  return false;
876  auto NT = Tokenizer.peekNextToken();
877  if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
878  Error->addError(Tokenizer.peekNextToken().Range,
879  Error->ET_ParserTrailingCode);
880  return false;
881  }
882  return true;
883 }
884 
885 std::vector<MatcherCompletion>
886 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
887  const NamedValueMap *NamedValues) {
888  Diagnostics Error;
889  CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
890  Parser P(&Tokenizer, S, NamedValues, &Error);
891  VariantValue Dummy;
892  P.parseExpressionImpl(&Dummy);
893 
894  // Sort by specificity, then by name.
895  llvm::sort(P.Completions,
896  [](const MatcherCompletion &A, const MatcherCompletion &B) {
897  if (A.Specificity != B.Specificity)
898  return A.Specificity > B.Specificity;
899  return A.TypedText < B.TypedText;
900  });
901 
902  return P.Completions;
903 }
904 
905 std::optional<DynTypedMatcher>
906 Parser::parseMatcherExpression(StringRef &Code, Sema *S,
907  const NamedValueMap *NamedValues,
908  Diagnostics *Error) {
910  if (!parseExpression(Code, S, NamedValues, &Value, Error))
911  return std::nullopt;
912  if (!Value.isMatcher()) {
913  Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
914  return std::nullopt;
915  }
916  std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
917  if (!Result) {
918  Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
919  << Value.getTypeAsString();
920  }
921  return Result;
922 }
923 
924 } // namespace dynamic
925 } // namespace ast_matchers
926 } // namespace clang
StringRef P
Simple matcher expression parser.
Diagnostics class to manage error messages.
StringRef Text
Definition: Format.cpp:2977
Registry of all known matchers.
SourceRange Range
Definition: SemaObjC.cpp:754
__device__ __2f16 float c
Kind identifier.
Definition: ASTTypeTraits.h:51
Sema - This implements semantic analysis and AST building for C.
Definition: Sema.h:462
Helper class to manage error messages.
Definition: Diagnostics.h:50
ArgStream addError(SourceRange Range, ErrorType Error)
Add an error to the diagnostics.
Definition: Diagnostics.cpp:65
Simple tokenizer for the parser.
Definition: Parser.cpp:69
const TokenInfo & peekNextToken() const
Returns but doesn't consume the next token.
Definition: Parser.cpp:84
CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, unsigned CodeCompletionOffset)
Definition: Parser.cpp:76
TokenInfo::TokenKind nextTokenKind() const
Definition: Parser.cpp:106
CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
Definition: Parser.cpp:71
TokenInfo consumeNextToken()
Consumes and returns the next token.
Definition: Parser.cpp:87
Interface to connect the parser with the registry and more.
Definition: Parser.h:67
virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, ArrayRef< ParserValue > Args, Diagnostics *Error)=0
Process a matcher expression.
virtual internal::MatcherDescriptorPtr buildMatcherCtor(MatcherCtor, SourceRange NameRange, ArrayRef< ParserValue > Args, Diagnostics *Error) const =0
virtual bool isBuilderMatcher(MatcherCtor) const =0
virtual std::vector< ArgKind > getAcceptedCompletionTypes(llvm::ArrayRef< std::pair< MatcherCtor, unsigned >> Context)
Compute the list of completion types for Context.
Definition: Parser.cpp:323
virtual std::vector< MatcherCompletion > getMatcherCompletions(llvm::ArrayRef< ArgKind > AcceptedTypes)
Compute the list of completions that match any of AcceptedTypes.
Definition: Parser.cpp:329
virtual std::optional< MatcherCtor > lookupMatcherCtor(StringRef MatcherName)=0
Look up a matcher by name.
virtual ASTNodeKind nodeMatcherType(MatcherCtor) const =0
Matcher expression parser.
Definition: Parser.h:55
llvm::StringMap< VariantValue > NamedValueMap
Definition: Parser.h:166
static VariantMatcher SingleMatcher(const DynTypedMatcher &Matcher)
Clones the provided matcher.
A smart (owning) pointer for MatcherDescriptor.
Definition: Registry.h:38
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
static llvm::ManagedStatic< Parser::RegistrySema > DefaultRegistrySema
Definition: Parser.cpp:820
const internal::MatcherDescriptor * MatcherCtor
Definition: Registry.h:55
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY char toLowercase(char c)
Converts the given ASCII character to its lowercase equivalent.
Definition: CharInfo.h:225
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:138
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
Definition: CharInfo.h:144
Simple structure to hold information for one token from the parser.
Definition: Parser.cpp:37
static const char *const ID_Bind
Some known identifiers.
Definition: Parser.cpp:54