clang  19.0.0git
BuildTree.cpp
Go to the documentation of this file.
1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "clang/AST/ASTFwd.h"
10 #include "clang/AST/Decl.h"
11 #include "clang/AST/DeclBase.h"
12 #include "clang/AST/DeclCXX.h"
14 #include "clang/AST/Expr.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/AST/IgnoreExpr.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/LLVM.h"
25 #include "clang/Basic/Specifiers.h"
26 #include "clang/Basic/TokenKinds.h"
27 #include "clang/Lex/Lexer.h"
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/PointerUnion.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/ScopeExit.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/Support/Allocator.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/FormatVariadic.h"
43 #include "llvm/Support/MemoryBuffer.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include <cstddef>
46 #include <map>
47 
48 using namespace clang;
49 
50 // Ignores the implicit `CXXConstructExpr` for copy/move constructor calls
51 // generated by the compiler, as well as in implicit conversions like the one
52 // wrapping `1` in `X x = 1;`.
54  if (auto *C = dyn_cast<CXXConstructExpr>(E)) {
55  auto NumArgs = C->getNumArgs();
56  if (NumArgs == 1 || (NumArgs > 1 && isa<CXXDefaultArgExpr>(C->getArg(1)))) {
57  Expr *A = C->getArg(0);
58  if (C->getParenOrBraceRange().isInvalid())
59  return A;
60  }
61  }
62  return E;
63 }
64 
65 // In:
66 // struct X {
67 // X(int)
68 // };
69 // X x = X(1);
70 // Ignores the implicit `CXXFunctionalCastExpr` that wraps
71 // `CXXConstructExpr X(1)`.
73  if (auto *F = dyn_cast<CXXFunctionalCastExpr>(E)) {
74  if (F->getCastKind() == CK_ConstructorConversion)
75  return F->getSubExpr();
76  }
77  return E;
78 }
79 
80 static Expr *IgnoreImplicit(Expr *E) {
84 }
85 
86 LLVM_ATTRIBUTE_UNUSED
87 static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; }
88 
89 namespace {
90 /// Get start location of the Declarator from the TypeLoc.
91 /// E.g.:
92 /// loc of `(` in `int (a)`
93 /// loc of `*` in `int *(a)`
94 /// loc of the first `(` in `int (*a)(int)`
95 /// loc of the `*` in `int *(a)(int)`
96 /// loc of the first `*` in `const int *const *volatile a;`
97 ///
98 /// It is non-trivial to get the start location because TypeLocs are stored
99 /// inside out. In the example above `*volatile` is the TypeLoc returned
100 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`
101 /// returns.
102 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {
103  SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {
104  auto L = Visit(T.getInnerLoc());
105  if (L.isValid())
106  return L;
107  return T.getLParenLoc();
108  }
109 
110  // Types spelled in the prefix part of the declarator.
111  SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {
112  return HandlePointer(T);
113  }
114 
115  SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {
116  return HandlePointer(T);
117  }
118 
119  SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {
120  return HandlePointer(T);
121  }
122 
123  SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {
124  return HandlePointer(T);
125  }
126 
127  SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {
128  return HandlePointer(T);
129  }
130 
131  // All other cases are not important, as they are either part of declaration
132  // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on
133  // existing declarators (e.g. QualifiedTypeLoc). They cannot start the
134  // declarator themselves, but their underlying type can.
135  SourceLocation VisitTypeLoc(TypeLoc T) {
136  auto N = T.getNextTypeLoc();
137  if (!N)
138  return SourceLocation();
139  return Visit(N);
140  }
141 
142  SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {
143  if (T.getTypePtr()->hasTrailingReturn())
144  return SourceLocation(); // avoid recursing into the suffix of declarator.
145  return VisitTypeLoc(T);
146  }
147 
148 private:
149  template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {
150  auto L = Visit(T.getPointeeLoc());
151  if (L.isValid())
152  return L;
153  return T.getLocalSourceRange().getBegin();
154  }
155 };
156 } // namespace
157 
159  auto FirstDefaultArg =
160  llvm::find_if(Args, [](auto It) { return isa<CXXDefaultArgExpr>(It); });
161  return llvm::make_range(Args.begin(), FirstDefaultArg);
162 }
163 
165  switch (E.getOperator()) {
166  // Comparison
167  case OO_EqualEqual:
168  case OO_ExclaimEqual:
169  case OO_Greater:
170  case OO_GreaterEqual:
171  case OO_Less:
172  case OO_LessEqual:
173  case OO_Spaceship:
174  // Assignment
175  case OO_Equal:
176  case OO_SlashEqual:
177  case OO_PercentEqual:
178  case OO_CaretEqual:
179  case OO_PipeEqual:
180  case OO_LessLessEqual:
181  case OO_GreaterGreaterEqual:
182  case OO_PlusEqual:
183  case OO_MinusEqual:
184  case OO_StarEqual:
185  case OO_AmpEqual:
186  // Binary computation
187  case OO_Slash:
188  case OO_Percent:
189  case OO_Caret:
190  case OO_Pipe:
191  case OO_LessLess:
192  case OO_GreaterGreater:
193  case OO_AmpAmp:
194  case OO_PipePipe:
195  case OO_ArrowStar:
196  case OO_Comma:
197  return syntax::NodeKind::BinaryOperatorExpression;
198  case OO_Tilde:
199  case OO_Exclaim:
200  return syntax::NodeKind::PrefixUnaryOperatorExpression;
201  // Prefix/Postfix increment/decrement
202  case OO_PlusPlus:
203  case OO_MinusMinus:
204  switch (E.getNumArgs()) {
205  case 1:
206  return syntax::NodeKind::PrefixUnaryOperatorExpression;
207  case 2:
208  return syntax::NodeKind::PostfixUnaryOperatorExpression;
209  default:
210  llvm_unreachable("Invalid number of arguments for operator");
211  }
212  // Operators that can be unary or binary
213  case OO_Plus:
214  case OO_Minus:
215  case OO_Star:
216  case OO_Amp:
217  switch (E.getNumArgs()) {
218  case 1:
219  return syntax::NodeKind::PrefixUnaryOperatorExpression;
220  case 2:
221  return syntax::NodeKind::BinaryOperatorExpression;
222  default:
223  llvm_unreachable("Invalid number of arguments for operator");
224  }
225  return syntax::NodeKind::BinaryOperatorExpression;
226  // Not yet supported by SyntaxTree
227  case OO_New:
228  case OO_Delete:
229  case OO_Array_New:
230  case OO_Array_Delete:
231  case OO_Coawait:
232  case OO_Subscript:
233  case OO_Arrow:
234  return syntax::NodeKind::UnknownExpression;
235  case OO_Call:
236  return syntax::NodeKind::CallExpression;
237  case OO_Conditional: // not overloadable
239  case OO_None:
240  llvm_unreachable("Not an overloadable operator");
241  }
242  llvm_unreachable("Unknown OverloadedOperatorKind enum");
243 }
244 
245 /// Get the start of the qualified name. In the examples below it gives the
246 /// location of the `^`:
247 /// `int ^a;`
248 /// `int *^a;`
249 /// `int ^a::S::f(){}`
251  assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) &&
252  "only DeclaratorDecl and TypedefNameDecl are supported.");
253 
254  auto DN = D->getDeclName();
255  bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();
256  if (IsAnonymous)
257  return SourceLocation();
258 
259  if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) {
260  if (DD->getQualifierLoc()) {
261  return DD->getQualifierLoc().getBeginLoc();
262  }
263  }
264 
265  return D->getLocation();
266 }
267 
268 /// Gets the range of the initializer inside an init-declarator C++ [dcl.decl].
269 /// `int a;` -> range of ``,
270 /// `int *a = nullptr` -> range of `= nullptr`.
271 /// `int a{}` -> range of `{}`.
272 /// `int a()` -> range of `()`.
274  if (auto *V = dyn_cast<VarDecl>(D)) {
275  auto *I = V->getInit();
276  // Initializers in range-based-for are not part of the declarator
277  if (I && !V->isCXXForRangeDecl())
278  return I->getSourceRange();
279  }
280 
281  return SourceRange();
282 }
283 
284 /// Gets the range of declarator as defined by the C++ grammar. E.g.
285 /// `int a;` -> range of `a`,
286 /// `int *a;` -> range of `*a`,
287 /// `int a[10];` -> range of `a[10]`,
288 /// `int a[1][2][3];` -> range of `a[1][2][3]`,
289 /// `int *a = nullptr` -> range of `*a = nullptr`.
290 /// `int S::f(){}` -> range of `S::f()`.
291 /// FIXME: \p Name must be a source range.
293  SourceLocation Name,
294  SourceRange Initializer) {
295  SourceLocation Start = GetStartLoc().Visit(T);
296  SourceLocation End = T.getEndLoc();
297  if (Name.isValid()) {
298  if (Start.isInvalid())
299  Start = Name;
300  // End of TypeLoc could be invalid if the type is invalid, fallback to the
301  // NameLoc.
302  if (End.isInvalid() || SM.isBeforeInTranslationUnit(End, Name))
303  End = Name;
304  }
305  if (Initializer.isValid()) {
306  auto InitializerEnd = Initializer.getEnd();
307  assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) ||
308  End == InitializerEnd);
309  End = InitializerEnd;
310  }
311  return SourceRange(Start, End);
312 }
313 
314 namespace {
315 /// All AST hierarchy roots that can be represented as pointers.
316 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>;
317 /// Maintains a mapping from AST to syntax tree nodes. This class will get more
318 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs.
319 /// FIXME: expose this as public API.
320 class ASTToSyntaxMapping {
321 public:
322  void add(ASTPtr From, syntax::Tree *To) {
323  assert(To != nullptr);
324  assert(!From.isNull());
325 
326  bool Added = Nodes.insert({From, To}).second;
327  (void)Added;
328  assert(Added && "mapping added twice");
329  }
330 
331  void add(NestedNameSpecifierLoc From, syntax::Tree *To) {
332  assert(To != nullptr);
333  assert(From.hasQualifier());
334 
335  bool Added = NNSNodes.insert({From, To}).second;
336  (void)Added;
337  assert(Added && "mapping added twice");
338  }
339 
340  syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }
341 
342  syntax::Tree *find(NestedNameSpecifierLoc P) const {
343  return NNSNodes.lookup(P);
344  }
345 
346 private:
347  llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes;
348  llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes;
349 };
350 } // namespace
351 
352 /// A helper class for constructing the syntax tree while traversing a clang
353 /// AST.
354 ///
355 /// At each point of the traversal we maintain a list of pending nodes.
356 /// Initially all tokens are added as pending nodes. When processing a clang AST
357 /// node, the clients need to:
358 /// - create a corresponding syntax node,
359 /// - assign roles to all pending child nodes with 'markChild' and
360 /// 'markChildToken',
361 /// - replace the child nodes with the new syntax node in the pending list
362 /// with 'foldNode'.
363 ///
364 /// Note that all children are expected to be processed when building a node.
365 ///
366 /// Call finalize() to finish building the tree and consume the root node.
368 public:
370  : Arena(Arena),
371  TBTM(TBTM),
372  Pending(Arena, TBTM.tokenBuffer()) {
373  for (const auto &T : TBTM.tokenBuffer().expandedTokens())
374  LocationToToken.insert({T.location(), &T});
375  }
376 
377  llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); }
378  const SourceManager &sourceManager() const {
379  return TBTM.sourceManager();
380  }
381 
382  /// Populate children for \p New node, assuming it covers tokens from \p
383  /// Range.
384  void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) {
385  assert(New);
386  Pending.foldChildren(TBTM.tokenBuffer(), Range, New);
387  if (From)
388  Mapping.add(From, New);
389  }
390 
392  // FIXME: add mapping for TypeLocs
393  foldNode(Range, New, nullptr);
394  }
395 
397  NestedNameSpecifierLoc From) {
398  assert(New);
399  Pending.foldChildren(TBTM.tokenBuffer(), Range, New);
400  if (From)
401  Mapping.add(From, New);
402  }
403 
404  /// Populate children for \p New list, assuming it covers tokens from a
405  /// subrange of \p SuperRange.
407  ASTPtr From) {
408  assert(New);
409  auto ListRange = Pending.shrinkToFitList(SuperRange);
410  Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New);
411  if (From)
412  Mapping.add(From, New);
413  }
414 
415  /// Notifies that we should not consume trailing semicolon when computing
416  /// token range of \p D.
418 
419  /// Mark the \p Child node with a corresponding \p Role. All marked children
420  /// should be consumed by foldNode.
421  /// When called on expressions (clang::Expr is derived from clang::Stmt),
422  /// wraps expressions into expression statement.
423  void markStmtChild(Stmt *Child, NodeRole Role);
424  /// Should be called for expressions in non-statement position to avoid
425  /// wrapping into expression statement.
426  void markExprChild(Expr *Child, NodeRole Role);
427  /// Set role for a token starting at \p Loc.
429  /// Set role for \p T.
430  void markChildToken(const syntax::Token *T, NodeRole R);
431 
432  /// Set role for \p N.
433  void markChild(syntax::Node *N, NodeRole R);
434  /// Set role for the syntax node matching \p N.
435  void markChild(ASTPtr N, NodeRole R);
436  /// Set role for the syntax node matching \p N.
438 
439  /// Finish building the tree and consume the root node.
440  syntax::TranslationUnit *finalize() && {
441  auto Tokens = TBTM.tokenBuffer().expandedTokens();
442  assert(!Tokens.empty());
443  assert(Tokens.back().kind() == tok::eof);
444 
445  // Build the root of the tree, consuming all the children.
446  Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(),
447  new (Arena.getAllocator()) syntax::TranslationUnit);
448 
449  auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
450  TU->assertInvariantsRecursive();
451  return TU;
452  }
453 
454  /// Finds a token starting at \p L. The token must exist if \p L is valid.
455  const syntax::Token *findToken(SourceLocation L) const;
456 
457  /// Finds the syntax tokens corresponding to the \p SourceRange.
459  assert(Range.isValid());
460  return getRange(Range.getBegin(), Range.getEnd());
461  }
462 
463  /// Finds the syntax tokens corresponding to the passed source locations.
464  /// \p First is the start position of the first token and \p Last is the start
465  /// position of the last token.
467  SourceLocation Last) const {
468  assert(First.isValid());
469  assert(Last.isValid());
470  assert(First == Last ||
471  TBTM.sourceManager().isBeforeInTranslationUnit(First, Last));
472  return llvm::ArrayRef(findToken(First), std::next(findToken(Last)));
473  }
474 
477  auto Tokens = getRange(D->getSourceRange());
478  return maybeAppendSemicolon(Tokens, D);
479  }
480 
481  /// Returns true if \p D is the last declarator in a chain and is thus
482  /// reponsible for creating SimpleDeclaration for the whole chain.
484  assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) &&
485  "only DeclaratorDecl and TypedefNameDecl are supported.");
486 
487  const Decl *Next = D->getNextDeclInContext();
488 
489  // There's no next sibling, this one is responsible.
490  if (Next == nullptr) {
491  return true;
492  }
493 
494  // Next sibling is not the same type, this one is responsible.
495  if (D->getKind() != Next->getKind()) {
496  return true;
497  }
498  // Next sibling doesn't begin at the same loc, it must be a different
499  // declaration, so this declarator is responsible.
500  if (Next->getBeginLoc() != D->getBeginLoc()) {
501  return true;
502  }
503 
504  // NextT is a member of the same declaration, and we need the last member to
505  // create declaration. This one is not responsible.
506  return false;
507  }
508 
511  // We want to drop the template parameters for specializations.
512  if (const auto *S = dyn_cast<TagDecl>(D))
513  Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc());
514  else
515  Tokens = getRange(D->getSourceRange());
516  return maybeAppendSemicolon(Tokens, D);
517  }
518 
520  return getRange(E->getSourceRange());
521  }
522 
523  /// Find the adjusted range for the statement, consuming the trailing
524  /// semicolon when needed.
526  auto Tokens = getRange(S->getSourceRange());
527  if (isa<CompoundStmt>(S))
528  return Tokens;
529 
530  // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
531  // all statements that end with those. Consume this semicolon here.
532  if (Tokens.back().kind() == tok::semi)
533  return Tokens;
534  return withTrailingSemicolon(Tokens);
535  }
536 
537 private:
538  ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens,
539  const Decl *D) const {
540  if (isa<NamespaceDecl>(D))
541  return Tokens;
542  if (DeclsWithoutSemicolons.count(D))
543  return Tokens;
544  // FIXME: do not consume trailing semicolon on function definitions.
545  // Most declarations own a semicolon in syntax trees, but not in clang AST.
546  return withTrailingSemicolon(Tokens);
547  }
548 
550  withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const {
551  assert(!Tokens.empty());
552  assert(Tokens.back().kind() != tok::eof);
553  // We never consume 'eof', so looking at the next token is ok.
554  if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
555  return llvm::ArrayRef(Tokens.begin(), Tokens.end() + 1);
556  return Tokens;
557  }
558 
559  void setRole(syntax::Node *N, NodeRole R) {
560  assert(N->getRole() == NodeRole::Detached);
561  N->setRole(R);
562  }
563 
564  /// A collection of trees covering the input tokens.
565  /// When created, each tree corresponds to a single token in the file.
566  /// Clients call 'foldChildren' to attach one or more subtrees to a parent
567  /// node and update the list of trees accordingly.
568  ///
569  /// Ensures that added nodes properly nest and cover the whole token stream.
570  struct Forest {
571  Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) {
572  assert(!TB.expandedTokens().empty());
573  assert(TB.expandedTokens().back().kind() == tok::eof);
574  // Create all leaf nodes.
575  // Note that we do not have 'eof' in the tree.
576  for (const auto &T : TB.expandedTokens().drop_back()) {
577  auto *L = new (A.getAllocator())
578  syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T));
579  L->Original = true;
580  L->CanModify = TB.spelledForExpanded(T).has_value();
581  Trees.insert(Trees.end(), {&T, L});
582  }
583  }
584 
585  void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) {
586  assert(!Range.empty());
587  auto It = Trees.lower_bound(Range.begin());
588  assert(It != Trees.end() && "no node found");
589  assert(It->first == Range.begin() && "no child with the specified range");
590  assert((std::next(It) == Trees.end() ||
591  std::next(It)->first == Range.end()) &&
592  "no child with the specified range");
593  assert(It->second->getRole() == NodeRole::Detached &&
594  "re-assigning role for a child");
595  It->second->setRole(Role);
596  }
597 
598  /// Shrink \p Range to a subrange that only contains tokens of a list.
599  /// List elements and delimiters should already have correct roles.
600  ArrayRef<syntax::Token> shrinkToFitList(ArrayRef<syntax::Token> Range) {
601  auto BeginChildren = Trees.lower_bound(Range.begin());
602  assert((BeginChildren == Trees.end() ||
603  BeginChildren->first == Range.begin()) &&
604  "Range crosses boundaries of existing subtrees");
605 
606  auto EndChildren = Trees.lower_bound(Range.end());
607  assert(
608  (EndChildren == Trees.end() || EndChildren->first == Range.end()) &&
609  "Range crosses boundaries of existing subtrees");
610 
611  auto BelongsToList = [](decltype(Trees)::value_type KV) {
612  auto Role = KV.second->getRole();
613  return Role == syntax::NodeRole::ListElement ||
615  };
616 
617  auto BeginListChildren =
618  std::find_if(BeginChildren, EndChildren, BelongsToList);
619 
620  auto EndListChildren =
621  std::find_if_not(BeginListChildren, EndChildren, BelongsToList);
622 
623  return ArrayRef<syntax::Token>(BeginListChildren->first,
624  EndListChildren->first);
625  }
626 
627  /// Add \p Node to the forest and attach child nodes based on \p Tokens.
628  void foldChildren(const syntax::TokenBuffer &TB,
630  // Attach children to `Node`.
631  assert(Node->getFirstChild() == nullptr && "node already has children");
632 
633  auto *FirstToken = Tokens.begin();
634  auto BeginChildren = Trees.lower_bound(FirstToken);
635 
636  assert((BeginChildren == Trees.end() ||
637  BeginChildren->first == FirstToken) &&
638  "fold crosses boundaries of existing subtrees");
639  auto EndChildren = Trees.lower_bound(Tokens.end());
640  assert(
641  (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
642  "fold crosses boundaries of existing subtrees");
643 
644  for (auto It = BeginChildren; It != EndChildren; ++It) {
645  auto *C = It->second;
646  if (C->getRole() == NodeRole::Detached)
647  C->setRole(NodeRole::Unknown);
648  Node->appendChildLowLevel(C);
649  }
650 
651  // Mark that this node came from the AST and is backed by the source code.
652  Node->Original = true;
653  Node->CanModify =
654  TB.spelledForExpanded(Tokens).has_value();
655 
656  Trees.erase(BeginChildren, EndChildren);
657  Trees.insert({FirstToken, Node});
658  }
659 
660  // EXPECTS: all tokens were consumed and are owned by a single root node.
661  syntax::Node *finalize() && {
662  assert(Trees.size() == 1);
663  auto *Root = Trees.begin()->second;
664  Trees = {};
665  return Root;
666  }
667 
668  std::string str(const syntax::TokenBufferTokenManager &STM) const {
669  std::string R;
670  for (auto It = Trees.begin(); It != Trees.end(); ++It) {
671  unsigned CoveredTokens =
672  It != Trees.end()
673  ? (std::next(It)->first - It->first)
674  : STM.tokenBuffer().expandedTokens().end() - It->first;
675 
676  R += std::string(
677  formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(),
678  It->first->text(STM.sourceManager()), CoveredTokens));
679  R += It->second->dump(STM);
680  }
681  return R;
682  }
683 
684  private:
685  /// Maps from the start token to a subtree starting at that token.
686  /// Keys in the map are pointers into the array of expanded tokens, so
687  /// pointer order corresponds to the order of preprocessor tokens.
688  std::map<const syntax::Token *, syntax::Node *> Trees;
689  };
690 
691  /// For debugging purposes.
692  std::string str() { return Pending.str(TBTM); }
693 
694  syntax::Arena &Arena;
695  TokenBufferTokenManager& TBTM;
696  /// To quickly find tokens by their start location.
697  llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken;
698  Forest Pending;
699  llvm::DenseSet<Decl *> DeclsWithoutSemicolons;
700  ASTToSyntaxMapping Mapping;
701 };
702 
703 namespace {
704 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
705 public:
706  explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder)
707  : Builder(Builder), Context(Context) {}
708 
709  bool shouldTraversePostOrder() const { return true; }
710 
711  bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {
712  return processDeclaratorAndDeclaration(DD);
713  }
714 
715  bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) {
716  return processDeclaratorAndDeclaration(TD);
717  }
718 
719  bool VisitDecl(Decl *D) {
720  assert(!D->isImplicit());
721  Builder.foldNode(Builder.getDeclarationRange(D),
722  new (allocator()) syntax::UnknownDeclaration(), D);
723  return true;
724  }
725 
726  // RAV does not call WalkUpFrom* on explicit instantiations, so we have to
727  // override Traverse.
728  // FIXME: make RAV call WalkUpFrom* instead.
729  bool
730  TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) {
731  if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C))
732  return false;
733  if (C->isExplicitSpecialization())
734  return true; // we are only interested in explicit instantiations.
735  auto *Declaration =
736  cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C));
737  foldExplicitTemplateInstantiation(
738  Builder.getTemplateRange(C),
739  Builder.findToken(C->getExternKeywordLoc()),
740  Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);
741  return true;
742  }
743 
744  bool WalkUpFromTemplateDecl(TemplateDecl *S) {
745  foldTemplateDeclaration(
746  Builder.getDeclarationRange(S),
747  Builder.findToken(S->getTemplateParameters()->getTemplateLoc()),
748  Builder.getDeclarationRange(S->getTemplatedDecl()), S);
749  return true;
750  }
751 
752  bool WalkUpFromTagDecl(TagDecl *C) {
753  // FIXME: build the ClassSpecifier node.
754  if (!C->isFreeStanding()) {
755  assert(C->getNumTemplateParameterLists() == 0);
756  return true;
757  }
758  handleFreeStandingTagDecl(C);
759  return true;
760  }
761 
762  syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) {
763  assert(C->isFreeStanding());
764  // Class is a declaration specifier and needs a spanning declaration node.
765  auto DeclarationRange = Builder.getDeclarationRange(C);
766  syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration;
767  Builder.foldNode(DeclarationRange, Result, nullptr);
768 
769  // Build TemplateDeclaration nodes if we had template parameters.
770  auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) {
771  const auto *TemplateKW = Builder.findToken(L.getTemplateLoc());
772  auto R = llvm::ArrayRef(TemplateKW, DeclarationRange.end());
773  Result =
774  foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr);
775  DeclarationRange = R;
776  };
777  if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C))
778  ConsumeTemplateParameters(*S->getTemplateParameters());
779  for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I)
780  ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1));
781  return Result;
782  }
783 
784  bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
785  // We do not want to call VisitDecl(), the declaration for translation
786  // unit is built by finalize().
787  return true;
788  }
789 
790  bool WalkUpFromCompoundStmt(CompoundStmt *S) {
791  using NodeRole = syntax::NodeRole;
792 
793  Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen);
794  for (auto *Child : S->body())
795  Builder.markStmtChild(Child, NodeRole::Statement);
796  Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);
797 
798  Builder.foldNode(Builder.getStmtRange(S),
799  new (allocator()) syntax::CompoundStatement, S);
800  return true;
801  }
802 
803  // Some statements are not yet handled by syntax trees.
804  bool WalkUpFromStmt(Stmt *S) {
805  Builder.foldNode(Builder.getStmtRange(S),
806  new (allocator()) syntax::UnknownStatement, S);
807  return true;
808  }
809 
810  bool TraverseIfStmt(IfStmt *S) {
811  bool Result = [&, this]() {
812  if (S->getInit() && !TraverseStmt(S->getInit())) {
813  return false;
814  }
815  // In cases where the condition is an initialized declaration in a
816  // statement, we want to preserve the declaration and ignore the
817  // implicit condition expression in the syntax tree.
818  if (S->hasVarStorage()) {
819  if (!TraverseStmt(S->getConditionVariableDeclStmt()))
820  return false;
821  } else if (S->getCond() && !TraverseStmt(S->getCond()))
822  return false;
823 
824  if (S->getThen() && !TraverseStmt(S->getThen()))
825  return false;
826  if (S->getElse() && !TraverseStmt(S->getElse()))
827  return false;
828  return true;
829  }();
830  WalkUpFromIfStmt(S);
831  return Result;
832  }
833 
834  bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
835  // We override to traverse range initializer as VarDecl.
836  // RAV traverses it as a statement, we produce invalid node kinds in that
837  // case.
838  // FIXME: should do this in RAV instead?
839  bool Result = [&, this]() {
840  if (S->getInit() && !TraverseStmt(S->getInit()))
841  return false;
842  if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))
843  return false;
844  if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))
845  return false;
846  if (S->getBody() && !TraverseStmt(S->getBody()))
847  return false;
848  return true;
849  }();
850  WalkUpFromCXXForRangeStmt(S);
851  return Result;
852  }
853 
854  bool TraverseStmt(Stmt *S) {
855  if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) {
856  // We want to consume the semicolon, make sure SimpleDeclaration does not.
857  for (auto *D : DS->decls())
858  Builder.noticeDeclWithoutSemicolon(D);
859  } else if (auto *E = dyn_cast_or_null<Expr>(S)) {
861  }
863  }
864 
865  bool TraverseOpaqueValueExpr(OpaqueValueExpr *VE) {
866  // OpaqueValue doesn't correspond to concrete syntax, ignore it.
867  return true;
868  }
869 
870  // Some expressions are not yet handled by syntax trees.
871  bool WalkUpFromExpr(Expr *E) {
872  assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");
873  Builder.foldNode(Builder.getExprRange(E),
874  new (allocator()) syntax::UnknownExpression, E);
875  return true;
876  }
877 
878  bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) {
879  // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node
880  // referencing the location of the UDL suffix (`_w` in `1.2_w`). The
881  // UDL suffix location does not point to the beginning of a token, so we
882  // can't represent the UDL suffix as a separate syntax tree node.
883 
884  return WalkUpFromUserDefinedLiteral(S);
885  }
886 
887  syntax::UserDefinedLiteralExpression *
888  buildUserDefinedLiteral(UserDefinedLiteral *S) {
889  switch (S->getLiteralOperatorKind()) {
891  return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;
893  return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
895  return new (allocator()) syntax::CharUserDefinedLiteralExpression;
897  return new (allocator()) syntax::StringUserDefinedLiteralExpression;
900  // For raw literal operator and numeric literal operator template we
901  // cannot get the type of the operand in the semantic AST. We get this
902  // information from the token. As integer and floating point have the same
903  // token kind, we run `NumericLiteralParser` again to distinguish them.
904  auto TokLoc = S->getBeginLoc();
905  auto TokSpelling =
906  Builder.findToken(TokLoc)->text(Context.getSourceManager());
907  auto Literal =
908  NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(),
909  Context.getLangOpts(), Context.getTargetInfo(),
910  Context.getDiagnostics());
911  if (Literal.isIntegerLiteral())
912  return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;
913  else {
914  assert(Literal.isFloatingLiteral());
915  return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
916  }
917  }
918  llvm_unreachable("Unknown literal operator kind.");
919  }
920 
921  bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {
922  Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
923  Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S);
924  return true;
925  }
926 
927  // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the
928  // `DependentTemplateSpecializationType` case.
929  /// Given a nested-name-specifier return the range for the last name
930  /// specifier.
931  ///
932  /// e.g. `std::T::template X<U>::` => `template X<U>::`
933  SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) {
934  auto SR = NNSLoc.getLocalSourceRange();
935 
936  // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should*
937  // return the desired `SourceRange`, but there is a corner case. For a
938  // `DependentTemplateSpecializationType` this method returns its
939  // qualifiers as well, in other words in the example above this method
940  // returns `T::template X<U>::` instead of only `template X<U>::`
941  if (auto TL = NNSLoc.getTypeLoc()) {
942  if (auto DependentTL =
944  // The 'template' keyword is always present in dependent template
945  // specializations. Except in the case of incorrect code
946  // TODO: Treat the case of incorrect code.
947  SR.setBegin(DependentTL.getTemplateKeywordLoc());
948  }
949  }
950 
951  return SR;
952  }
953 
954  syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) {
955  switch (NNS.getKind()) {
957  return syntax::NodeKind::GlobalNameSpecifier;
961  return syntax::NodeKind::IdentifierNameSpecifier;
963  return syntax::NodeKind::SimpleTemplateNameSpecifier;
965  const auto *NNSType = NNS.getAsType();
966  assert(NNSType);
967  if (isa<DecltypeType>(NNSType))
968  return syntax::NodeKind::DecltypeNameSpecifier;
969  if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>(
970  NNSType))
971  return syntax::NodeKind::SimpleTemplateNameSpecifier;
972  return syntax::NodeKind::IdentifierNameSpecifier;
973  }
974  default:
975  // FIXME: Support Microsoft's __super
976  llvm::report_fatal_error("We don't yet support the __super specifier",
977  true);
978  }
979  }
980 
981  syntax::NameSpecifier *
982  buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) {
983  assert(NNSLoc.hasQualifier());
984  auto NameSpecifierTokens =
985  Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back();
986  switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) {
987  case syntax::NodeKind::GlobalNameSpecifier:
988  return new (allocator()) syntax::GlobalNameSpecifier;
989  case syntax::NodeKind::IdentifierNameSpecifier: {
990  assert(NameSpecifierTokens.size() == 1);
991  Builder.markChildToken(NameSpecifierTokens.begin(),
993  auto *NS = new (allocator()) syntax::IdentifierNameSpecifier;
994  Builder.foldNode(NameSpecifierTokens, NS, nullptr);
995  return NS;
996  }
997  case syntax::NodeKind::SimpleTemplateNameSpecifier: {
998  // TODO: Build `SimpleTemplateNameSpecifier` children and implement
999  // accessors to them.
1000  // Be aware, we cannot do that simply by calling `TraverseTypeLoc`,
1001  // some `TypeLoc`s have inside them the previous name specifier and
1002  // we want to treat them independently.
1003  auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier;
1004  Builder.foldNode(NameSpecifierTokens, NS, nullptr);
1005  return NS;
1006  }
1007  case syntax::NodeKind::DecltypeNameSpecifier: {
1008  const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>();
1009  if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL))
1010  return nullptr;
1011  auto *NS = new (allocator()) syntax::DecltypeNameSpecifier;
1012  // TODO: Implement accessor to `DecltypeNameSpecifier` inner
1013  // `DecltypeTypeLoc`.
1014  // For that add mapping from `TypeLoc` to `syntax::Node*` then:
1015  // Builder.markChild(TypeLoc, syntax::NodeRole);
1016  Builder.foldNode(NameSpecifierTokens, NS, nullptr);
1017  return NS;
1018  }
1019  default:
1020  llvm_unreachable("getChildKind() does not return this value");
1021  }
1022  }
1023 
1024  // To build syntax tree nodes for NestedNameSpecifierLoc we override
1025  // Traverse instead of WalkUpFrom because we want to traverse the children
1026  // ourselves and build a list instead of a nested tree of name specifier
1027  // prefixes.
1028  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) {
1029  if (!QualifierLoc)
1030  return true;
1031  for (auto It = QualifierLoc; It; It = It.getPrefix()) {
1032  auto *NS = buildNameSpecifier(It);
1033  if (!NS)
1034  return false;
1035  Builder.markChild(NS, syntax::NodeRole::ListElement);
1036  Builder.markChildToken(It.getEndLoc(), syntax::NodeRole::ListDelimiter);
1037  }
1038  Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()),
1039  new (allocator()) syntax::NestedNameSpecifier,
1040  QualifierLoc);
1041  return true;
1042  }
1043 
1044  syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc,
1045  SourceLocation TemplateKeywordLoc,
1046  SourceRange UnqualifiedIdLoc,
1047  ASTPtr From) {
1048  if (QualifierLoc) {
1049  Builder.markChild(QualifierLoc, syntax::NodeRole::Qualifier);
1050  if (TemplateKeywordLoc.isValid())
1051  Builder.markChildToken(TemplateKeywordLoc,
1053  }
1054 
1055  auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId;
1056  Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId,
1057  nullptr);
1058  Builder.markChild(TheUnqualifiedId, syntax::NodeRole::UnqualifiedId);
1059 
1060  auto IdExpressionBeginLoc =
1061  QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin();
1062 
1063  auto *TheIdExpression = new (allocator()) syntax::IdExpression;
1064  Builder.foldNode(
1065  Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()),
1066  TheIdExpression, From);
1067 
1068  return TheIdExpression;
1069  }
1070 
1071  bool WalkUpFromMemberExpr(MemberExpr *S) {
1072  // For `MemberExpr` with implicit `this->` we generate a simple
1073  // `id-expression` syntax node, beacuse an implicit `member-expression` is
1074  // syntactically undistinguishable from an `id-expression`
1075  if (S->isImplicitAccess()) {
1076  buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(),
1077  SourceRange(S->getMemberLoc(), S->getEndLoc()), S);
1078  return true;
1079  }
1080 
1081  auto *TheIdExpression = buildIdExpression(
1082  S->getQualifierLoc(), S->getTemplateKeywordLoc(),
1083  SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr);
1084 
1085  Builder.markChild(TheIdExpression, syntax::NodeRole::Member);
1086 
1087  Builder.markExprChild(S->getBase(), syntax::NodeRole::Object);
1088  Builder.markChildToken(S->getOperatorLoc(), syntax::NodeRole::AccessToken);
1089 
1090  Builder.foldNode(Builder.getExprRange(S),
1091  new (allocator()) syntax::MemberExpression, S);
1092  return true;
1093  }
1094 
1095  bool WalkUpFromDeclRefExpr(DeclRefExpr *S) {
1096  buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(),
1097  SourceRange(S->getLocation(), S->getEndLoc()), S);
1098 
1099  return true;
1100  }
1101 
1102  // Same logic as DeclRefExpr.
1103  bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) {
1104  buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(),
1105  SourceRange(S->getLocation(), S->getEndLoc()), S);
1106 
1107  return true;
1108  }
1109 
1110  bool WalkUpFromCXXThisExpr(CXXThisExpr *S) {
1111  if (!S->isImplicit()) {
1112  Builder.markChildToken(S->getLocation(),
1114  Builder.foldNode(Builder.getExprRange(S),
1115  new (allocator()) syntax::ThisExpression, S);
1116  }
1117  return true;
1118  }
1119 
1120  bool WalkUpFromParenExpr(ParenExpr *S) {
1121  Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen);
1122  Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::SubExpression);
1123  Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen);
1124  Builder.foldNode(Builder.getExprRange(S),
1125  new (allocator()) syntax::ParenExpression, S);
1126  return true;
1127  }
1128 
1129  bool WalkUpFromIntegerLiteral(IntegerLiteral *S) {
1130  Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
1131  Builder.foldNode(Builder.getExprRange(S),
1132  new (allocator()) syntax::IntegerLiteralExpression, S);
1133  return true;
1134  }
1135 
1136  bool WalkUpFromCharacterLiteral(CharacterLiteral *S) {
1137  Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
1138  Builder.foldNode(Builder.getExprRange(S),
1139  new (allocator()) syntax::CharacterLiteralExpression, S);
1140  return true;
1141  }
1142 
1143  bool WalkUpFromFloatingLiteral(FloatingLiteral *S) {
1144  Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
1145  Builder.foldNode(Builder.getExprRange(S),
1146  new (allocator()) syntax::FloatingLiteralExpression, S);
1147  return true;
1148  }
1149 
1150  bool WalkUpFromStringLiteral(StringLiteral *S) {
1151  Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);
1152  Builder.foldNode(Builder.getExprRange(S),
1153  new (allocator()) syntax::StringLiteralExpression, S);
1154  return true;
1155  }
1156 
1157  bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) {
1158  Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
1159  Builder.foldNode(Builder.getExprRange(S),
1160  new (allocator()) syntax::BoolLiteralExpression, S);
1161  return true;
1162  }
1163 
1164  bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) {
1165  Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);
1166  Builder.foldNode(Builder.getExprRange(S),
1167  new (allocator()) syntax::CxxNullPtrExpression, S);
1168  return true;
1169  }
1170 
1171  bool WalkUpFromUnaryOperator(UnaryOperator *S) {
1172  Builder.markChildToken(S->getOperatorLoc(),
1174  Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::Operand);
1175 
1176  if (S->isPostfix())
1177  Builder.foldNode(Builder.getExprRange(S),
1178  new (allocator()) syntax::PostfixUnaryOperatorExpression,
1179  S);
1180  else
1181  Builder.foldNode(Builder.getExprRange(S),
1182  new (allocator()) syntax::PrefixUnaryOperatorExpression,
1183  S);
1184 
1185  return true;
1186  }
1187 
1188  bool WalkUpFromBinaryOperator(BinaryOperator *S) {
1189  Builder.markExprChild(S->getLHS(), syntax::NodeRole::LeftHandSide);
1190  Builder.markChildToken(S->getOperatorLoc(),
1192  Builder.markExprChild(S->getRHS(), syntax::NodeRole::RightHandSide);
1193  Builder.foldNode(Builder.getExprRange(S),
1194  new (allocator()) syntax::BinaryOperatorExpression, S);
1195  return true;
1196  }
1197 
1198  /// Builds `CallArguments` syntax node from arguments that appear in source
1199  /// code, i.e. not default arguments.
1201  buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) {
1202  auto Args = dropDefaultArgs(ArgsAndDefaultArgs);
1203  for (auto *Arg : Args) {
1204  Builder.markExprChild(Arg, syntax::NodeRole::ListElement);
1205  const auto *DelimiterToken =
1206  std::next(Builder.findToken(Arg->getEndLoc()));
1207  if (DelimiterToken->kind() == clang::tok::TokenKind::comma)
1208  Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter);
1209  }
1210 
1211  auto *Arguments = new (allocator()) syntax::CallArguments;
1212  if (!Args.empty())
1213  Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(),
1214  (*(Args.end() - 1))->getEndLoc()),
1215  Arguments, nullptr);
1216 
1217  return Arguments;
1218  }
1219 
1220  bool WalkUpFromCallExpr(CallExpr *S) {
1221  Builder.markExprChild(S->getCallee(), syntax::NodeRole::Callee);
1222 
1223  const auto *LParenToken =
1224  std::next(Builder.findToken(S->getCallee()->getEndLoc()));
1225  // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed
1226  // the test on decltype desctructors.
1227  if (LParenToken->kind() == clang::tok::l_paren)
1228  Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen);
1229 
1230  Builder.markChild(buildCallArguments(S->arguments()),
1232 
1233  Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen);
1234 
1235  Builder.foldNode(Builder.getRange(S->getSourceRange()),
1236  new (allocator()) syntax::CallExpression, S);
1237  return true;
1238  }
1239 
1240  bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) {
1241  // Ignore the implicit calls to default constructors.
1242  if ((S->getNumArgs() == 0 || isa<CXXDefaultArgExpr>(S->getArg(0))) &&
1243  S->getParenOrBraceRange().isInvalid())
1244  return true;
1245  return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S);
1246  }
1247 
1248  bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
1249  // To construct a syntax tree of the same shape for calls to built-in and
1250  // user-defined operators, ignore the `DeclRefExpr` that refers to the
1251  // operator and treat it as a simple token. Do that by traversing
1252  // arguments instead of children.
1253  for (auto *child : S->arguments()) {
1254  // A postfix unary operator is declared as taking two operands. The
1255  // second operand is used to distinguish from its prefix counterpart. In
1256  // the semantic AST this "phantom" operand is represented as a
1257  // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this
1258  // operand because it does not correspond to anything written in source
1259  // code.
1260  if (child->getSourceRange().isInvalid()) {
1261  assert(getOperatorNodeKind(*S) ==
1262  syntax::NodeKind::PostfixUnaryOperatorExpression);
1263  continue;
1264  }
1265  if (!TraverseStmt(child))
1266  return false;
1267  }
1268  return WalkUpFromCXXOperatorCallExpr(S);
1269  }
1270 
1271  bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) {
1272  switch (getOperatorNodeKind(*S)) {
1273  case syntax::NodeKind::BinaryOperatorExpression:
1274  Builder.markExprChild(S->getArg(0), syntax::NodeRole::LeftHandSide);
1275  Builder.markChildToken(S->getOperatorLoc(),
1277  Builder.markExprChild(S->getArg(1), syntax::NodeRole::RightHandSide);
1278  Builder.foldNode(Builder.getExprRange(S),
1279  new (allocator()) syntax::BinaryOperatorExpression, S);
1280  return true;
1281  case syntax::NodeKind::PrefixUnaryOperatorExpression:
1282  Builder.markChildToken(S->getOperatorLoc(),
1284  Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand);
1285  Builder.foldNode(Builder.getExprRange(S),
1286  new (allocator()) syntax::PrefixUnaryOperatorExpression,
1287  S);
1288  return true;
1289  case syntax::NodeKind::PostfixUnaryOperatorExpression:
1290  Builder.markChildToken(S->getOperatorLoc(),
1292  Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand);
1293  Builder.foldNode(Builder.getExprRange(S),
1294  new (allocator()) syntax::PostfixUnaryOperatorExpression,
1295  S);
1296  return true;
1297  case syntax::NodeKind::CallExpression: {
1298  Builder.markExprChild(S->getArg(0), syntax::NodeRole::Callee);
1299 
1300  const auto *LParenToken =
1301  std::next(Builder.findToken(S->getArg(0)->getEndLoc()));
1302  // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have
1303  // fixed the test on decltype desctructors.
1304  if (LParenToken->kind() == clang::tok::l_paren)
1305  Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen);
1306 
1307  Builder.markChild(buildCallArguments(CallExpr::arg_range(
1308  S->arg_begin() + 1, S->arg_end())),
1310 
1311  Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen);
1312 
1313  Builder.foldNode(Builder.getRange(S->getSourceRange()),
1314  new (allocator()) syntax::CallExpression, S);
1315  return true;
1316  }
1317  case syntax::NodeKind::UnknownExpression:
1318  return WalkUpFromExpr(S);
1319  default:
1320  llvm_unreachable("getOperatorNodeKind() does not return this value");
1321  }
1322  }
1323 
1324  bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; }
1325 
1326  bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {
1327  auto Tokens = Builder.getDeclarationRange(S);
1328  if (Tokens.front().kind() == tok::coloncolon) {
1329  // Handle nested namespace definitions. Those start at '::' token, e.g.
1330  // namespace a^::b {}
1331  // FIXME: build corresponding nodes for the name of this namespace.
1332  return true;
1333  }
1334  Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S);
1335  return true;
1336  }
1337 
1338  // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test
1339  // results. Find test coverage or remove it.
1340  bool TraverseParenTypeLoc(ParenTypeLoc L) {
1341  // We reverse order of traversal to get the proper syntax structure.
1342  if (!WalkUpFromParenTypeLoc(L))
1343  return false;
1344  return TraverseTypeLoc(L.getInnerLoc());
1345  }
1346 
1347  bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {
1348  Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
1349  Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
1350  Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),
1351  new (allocator()) syntax::ParenDeclarator, L);
1352  return true;
1353  }
1354 
1355  // Declarator chunks, they are produced by type locs and some clang::Decls.
1356  bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {
1357  Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);
1358  Builder.markExprChild(L.getSizeExpr(), syntax::NodeRole::Size);
1359  Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);
1360  Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),
1361  new (allocator()) syntax::ArraySubscript, L);
1362  return true;
1363  }
1364 
1366  buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) {
1367  for (auto *P : Params) {
1368  Builder.markChild(P, syntax::NodeRole::ListElement);
1369  const auto *DelimiterToken = std::next(Builder.findToken(P->getEndLoc()));
1370  if (DelimiterToken->kind() == clang::tok::TokenKind::comma)
1371  Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter);
1372  }
1373  auto *Parameters = new (allocator()) syntax::ParameterDeclarationList;
1374  if (!Params.empty())
1375  Builder.foldNode(Builder.getRange(Params.front()->getBeginLoc(),
1376  Params.back()->getEndLoc()),
1377  Parameters, nullptr);
1378  return Parameters;
1379  }
1380 
1381  bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {
1382  Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);
1383 
1384  Builder.markChild(buildParameterDeclarationList(L.getParams()),
1386 
1387  Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);
1388  Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),
1389  new (allocator()) syntax::ParametersAndQualifiers, L);
1390  return true;
1391  }
1392 
1393  bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {
1394  if (!L.getTypePtr()->hasTrailingReturn())
1395  return WalkUpFromFunctionTypeLoc(L);
1396 
1397  auto *TrailingReturnTokens = buildTrailingReturn(L);
1398  // Finish building the node for parameters.
1399  Builder.markChild(TrailingReturnTokens, syntax::NodeRole::TrailingReturn);
1400  return WalkUpFromFunctionTypeLoc(L);
1401  }
1402 
1403  bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) {
1404  // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds
1405  // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to
1406  // "(Y::*mp)" We thus reverse the order of traversal to get the proper
1407  // syntax structure.
1408  if (!WalkUpFromMemberPointerTypeLoc(L))
1409  return false;
1410  return TraverseTypeLoc(L.getPointeeLoc());
1411  }
1412 
1413  bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {
1414  auto SR = L.getLocalSourceRange();
1415  Builder.foldNode(Builder.getRange(SR),
1416  new (allocator()) syntax::MemberPointer, L);
1417  return true;
1418  }
1419 
1420  // The code below is very regular, it could even be generated with some
1421  // preprocessor magic. We merely assign roles to the corresponding children
1422  // and fold resulting nodes.
1423  bool WalkUpFromDeclStmt(DeclStmt *S) {
1424  Builder.foldNode(Builder.getStmtRange(S),
1425  new (allocator()) syntax::DeclarationStatement, S);
1426  return true;
1427  }
1428 
1429  bool WalkUpFromNullStmt(NullStmt *S) {
1430  Builder.foldNode(Builder.getStmtRange(S),
1431  new (allocator()) syntax::EmptyStatement, S);
1432  return true;
1433  }
1434 
1435  bool WalkUpFromSwitchStmt(SwitchStmt *S) {
1436  Builder.markChildToken(S->getSwitchLoc(),
1438  Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
1439  Builder.foldNode(Builder.getStmtRange(S),
1440  new (allocator()) syntax::SwitchStatement, S);
1441  return true;
1442  }
1443 
1444  bool WalkUpFromCaseStmt(CaseStmt *S) {
1445  Builder.markChildToken(S->getKeywordLoc(),
1447  Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseValue);
1448  Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
1449  Builder.foldNode(Builder.getStmtRange(S),
1450  new (allocator()) syntax::CaseStatement, S);
1451  return true;
1452  }
1453 
1454  bool WalkUpFromDefaultStmt(DefaultStmt *S) {
1455  Builder.markChildToken(S->getKeywordLoc(),
1457  Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);
1458  Builder.foldNode(Builder.getStmtRange(S),
1459  new (allocator()) syntax::DefaultStatement, S);
1460  return true;
1461  }
1462 
1463  bool WalkUpFromIfStmt(IfStmt *S) {
1464  Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword);
1465  Stmt *ConditionStatement = S->getCond();
1466  if (S->hasVarStorage())
1467  ConditionStatement = S->getConditionVariableDeclStmt();
1468  Builder.markStmtChild(ConditionStatement, syntax::NodeRole::Condition);
1469  Builder.markStmtChild(S->getThen(), syntax::NodeRole::ThenStatement);
1470  Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::ElseKeyword);
1471  Builder.markStmtChild(S->getElse(), syntax::NodeRole::ElseStatement);
1472  Builder.foldNode(Builder.getStmtRange(S),
1473  new (allocator()) syntax::IfStatement, S);
1474  return true;
1475  }
1476 
1477  bool WalkUpFromForStmt(ForStmt *S) {
1478  Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
1479  Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
1480  Builder.foldNode(Builder.getStmtRange(S),
1481  new (allocator()) syntax::ForStatement, S);
1482  return true;
1483  }
1484 
1485  bool WalkUpFromWhileStmt(WhileStmt *S) {
1486  Builder.markChildToken(S->getWhileLoc(),
1488  Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
1489  Builder.foldNode(Builder.getStmtRange(S),
1490  new (allocator()) syntax::WhileStatement, S);
1491  return true;
1492  }
1493 
1494  bool WalkUpFromContinueStmt(ContinueStmt *S) {
1495  Builder.markChildToken(S->getContinueLoc(),
1497  Builder.foldNode(Builder.getStmtRange(S),
1498  new (allocator()) syntax::ContinueStatement, S);
1499  return true;
1500  }
1501 
1502  bool WalkUpFromBreakStmt(BreakStmt *S) {
1503  Builder.markChildToken(S->getBreakLoc(),
1505  Builder.foldNode(Builder.getStmtRange(S),
1506  new (allocator()) syntax::BreakStatement, S);
1507  return true;
1508  }
1509 
1510  bool WalkUpFromReturnStmt(ReturnStmt *S) {
1511  Builder.markChildToken(S->getReturnLoc(),
1513  Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnValue);
1514  Builder.foldNode(Builder.getStmtRange(S),
1515  new (allocator()) syntax::ReturnStatement, S);
1516  return true;
1517  }
1518 
1519  bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {
1520  Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);
1521  Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);
1522  Builder.foldNode(Builder.getStmtRange(S),
1523  new (allocator()) syntax::RangeBasedForStatement, S);
1524  return true;
1525  }
1526 
1527  bool WalkUpFromEmptyDecl(EmptyDecl *S) {
1528  Builder.foldNode(Builder.getDeclarationRange(S),
1529  new (allocator()) syntax::EmptyDeclaration, S);
1530  return true;
1531  }
1532 
1533  bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) {
1534  Builder.markExprChild(S->getAssertExpr(), syntax::NodeRole::Condition);
1535  Builder.markExprChild(S->getMessage(), syntax::NodeRole::Message);
1536  Builder.foldNode(Builder.getDeclarationRange(S),
1537  new (allocator()) syntax::StaticAssertDeclaration, S);
1538  return true;
1539  }
1540 
1541  bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {
1542  Builder.foldNode(Builder.getDeclarationRange(S),
1543  new (allocator()) syntax::LinkageSpecificationDeclaration,
1544  S);
1545  return true;
1546  }
1547 
1548  bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {
1549  Builder.foldNode(Builder.getDeclarationRange(S),
1550  new (allocator()) syntax::NamespaceAliasDefinition, S);
1551  return true;
1552  }
1553 
1554  bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {
1555  Builder.foldNode(Builder.getDeclarationRange(S),
1556  new (allocator()) syntax::UsingNamespaceDirective, S);
1557  return true;
1558  }
1559 
1560  bool WalkUpFromUsingDecl(UsingDecl *S) {
1561  Builder.foldNode(Builder.getDeclarationRange(S),
1562  new (allocator()) syntax::UsingDeclaration, S);
1563  return true;
1564  }
1565 
1566  bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {
1567  Builder.foldNode(Builder.getDeclarationRange(S),
1568  new (allocator()) syntax::UsingDeclaration, S);
1569  return true;
1570  }
1571 
1572  bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {
1573  Builder.foldNode(Builder.getDeclarationRange(S),
1574  new (allocator()) syntax::UsingDeclaration, S);
1575  return true;
1576  }
1577 
1578  bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {
1579  Builder.foldNode(Builder.getDeclarationRange(S),
1580  new (allocator()) syntax::TypeAliasDeclaration, S);
1581  return true;
1582  }
1583 
1584 private:
1585  /// Folds SimpleDeclarator node (if present) and in case this is the last
1586  /// declarator in the chain it also folds SimpleDeclaration node.
1587  template <class T> bool processDeclaratorAndDeclaration(T *D) {
1588  auto Range = getDeclaratorRange(
1589  Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(),
1591 
1592  // There doesn't have to be a declarator (e.g. `void foo(int)` only has
1593  // declaration, but no declarator).
1594  if (!Range.getBegin().isValid()) {
1595  Builder.markChild(new (allocator()) syntax::DeclaratorList,
1597  Builder.foldNode(Builder.getDeclarationRange(D),
1598  new (allocator()) syntax::SimpleDeclaration, D);
1599  return true;
1600  }
1601 
1602  auto *N = new (allocator()) syntax::SimpleDeclarator;
1603  Builder.foldNode(Builder.getRange(Range), N, nullptr);
1604  Builder.markChild(N, syntax::NodeRole::ListElement);
1605 
1606  if (!Builder.isResponsibleForCreatingDeclaration(D)) {
1607  // If this is not the last declarator in the declaration we expect a
1608  // delimiter after it.
1609  const auto *DelimiterToken = std::next(Builder.findToken(Range.getEnd()));
1610  if (DelimiterToken->kind() == clang::tok::TokenKind::comma)
1611  Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter);
1612  } else {
1613  auto *DL = new (allocator()) syntax::DeclaratorList;
1614  auto DeclarationRange = Builder.getDeclarationRange(D);
1615  Builder.foldList(DeclarationRange, DL, nullptr);
1616 
1617  Builder.markChild(DL, syntax::NodeRole::Declarators);
1618  Builder.foldNode(DeclarationRange,
1619  new (allocator()) syntax::SimpleDeclaration, D);
1620  }
1621  return true;
1622  }
1623 
1624  /// Returns the range of the built node.
1625  syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) {
1626  assert(L.getTypePtr()->hasTrailingReturn());
1627 
1628  auto ReturnedType = L.getReturnLoc();
1629  // Build node for the declarator, if any.
1630  auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(ReturnedType),
1631  ReturnedType.getEndLoc());
1632  syntax::SimpleDeclarator *ReturnDeclarator = nullptr;
1633  if (ReturnDeclaratorRange.isValid()) {
1634  ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator;
1635  Builder.foldNode(Builder.getRange(ReturnDeclaratorRange),
1636  ReturnDeclarator, nullptr);
1637  }
1638 
1639  // Build node for trailing return type.
1640  auto Return = Builder.getRange(ReturnedType.getSourceRange());
1641  const auto *Arrow = Return.begin() - 1;
1642  assert(Arrow->kind() == tok::arrow);
1643  auto Tokens = llvm::ArrayRef(Arrow, Return.end());
1644  Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken);
1645  if (ReturnDeclarator)
1646  Builder.markChild(ReturnDeclarator, syntax::NodeRole::Declarator);
1647  auto *R = new (allocator()) syntax::TrailingReturnType;
1648  Builder.foldNode(Tokens, R, L);
1649  return R;
1650  }
1651 
1652  void foldExplicitTemplateInstantiation(
1653  ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW,
1654  const syntax::Token *TemplateKW,
1655  syntax::SimpleDeclaration *InnerDeclaration, Decl *From) {
1656  assert(!ExternKW || ExternKW->kind() == tok::kw_extern);
1657  assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
1658  Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword);
1659  Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
1660  Builder.markChild(InnerDeclaration, syntax::NodeRole::Declaration);
1661  Builder.foldNode(
1662  Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From);
1663  }
1664 
1665  syntax::TemplateDeclaration *foldTemplateDeclaration(
1666  ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW,
1667  ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) {
1668  assert(TemplateKW && TemplateKW->kind() == tok::kw_template);
1669  Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);
1670 
1671  auto *N = new (allocator()) syntax::TemplateDeclaration;
1672  Builder.foldNode(Range, N, From);
1673  Builder.markChild(N, syntax::NodeRole::Declaration);
1674  return N;
1675  }
1676 
1677  /// A small helper to save some typing.
1678  llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
1679 
1680  syntax::TreeBuilder &Builder;
1681  const ASTContext &Context;
1682 };
1683 } // namespace
1684 
1686  DeclsWithoutSemicolons.insert(D);
1687 }
1688 
1690  if (Loc.isInvalid())
1691  return;
1692  Pending.assignRole(*findToken(Loc), Role);
1693 }
1694 
1696  if (!T)
1697  return;
1698  Pending.assignRole(*T, R);
1699 }
1700 
1702  assert(N);
1703  setRole(N, R);
1704 }
1705 
1707  auto *SN = Mapping.find(N);
1708  assert(SN != nullptr);
1709  setRole(SN, R);
1710 }
1712  auto *SN = Mapping.find(NNSLoc);
1713  assert(SN != nullptr);
1714  setRole(SN, R);
1715 }
1716 
1718  if (!Child)
1719  return;
1720 
1721  syntax::Tree *ChildNode;
1722  if (Expr *ChildExpr = dyn_cast<Expr>(Child)) {
1723  // This is an expression in a statement position, consume the trailing
1724  // semicolon and form an 'ExpressionStatement' node.
1725  markExprChild(ChildExpr, NodeRole::Expression);
1726  ChildNode = new (allocator()) syntax::ExpressionStatement;
1727  // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
1728  Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode);
1729  } else {
1730  ChildNode = Mapping.find(Child);
1731  }
1732  assert(ChildNode != nullptr);
1733  setRole(ChildNode, Role);
1734 }
1735 
1737  if (!Child)
1738  return;
1739  Child = IgnoreImplicit(Child);
1740 
1741  syntax::Tree *ChildNode = Mapping.find(Child);
1742  assert(ChildNode != nullptr);
1743  setRole(ChildNode, Role);
1744 }
1745 
1747  if (L.isInvalid())
1748  return nullptr;
1749  auto It = LocationToToken.find(L);
1750  assert(It != LocationToToken.end());
1751  return It->second;
1752 }
1753 
1754 syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A,
1756  ASTContext &Context) {
1757  TreeBuilder Builder(A, TBTM);
1758  BuildTreeVisitor(Context, Builder).TraverseAST(Context);
1759  return std::move(Builder).finalize();
1760 }
#define V(N, I)
Definition: ASTContext.h:3299
Forward declaration of all AST node types.
BoundNodesTreeBuilder Nodes
DynTypedNode Node
StringRef P
#define SM(sm)
Definition: Cuda.cpp:83
static Expr * IgnoreImplicit(Expr *E)
Definition: BuildTree.cpp:80
static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, SourceLocation Name, SourceRange Initializer)
Gets the range of declarator as defined by the C++ grammar.
Definition: BuildTree.cpp:292
static Expr * IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E)
Definition: BuildTree.cpp:72
static Expr * IgnoreImplicitConstructorSingleStep(Expr *E)
Definition: BuildTree.cpp:53
static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args)
Definition: BuildTree.cpp:158
static LLVM_ATTRIBUTE_UNUSED bool isImplicitExpr(Expr *E)
Definition: BuildTree.cpp:87
static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E)
Definition: BuildTree.cpp:164
static SourceLocation getQualifiedNameStart(NamedDecl *D)
Get the start of the qualified name.
Definition: BuildTree.cpp:250
static SourceRange getInitializerRange(Decl *D)
Gets the range of the initializer inside an init-declarator C++ [dcl.decl].
Definition: BuildTree.cpp:273
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
Defines the clang::Expr interface and subclasses for C++ expressions.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
SourceRange Range
Definition: SemaObjC.cpp:754
SourceLocation Loc
Definition: SemaObjC.cpp:755
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines various enumerations that describe declaration and type specifiers.
Defines the clang::TokenKind enum and support functions.
Defines the clang::TypeLoc interface and its subclasses.
SourceLocation End
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:185
SourceManager & getSourceManager()
Definition: ASTContext.h:708
const LangOptions & getLangOpts() const
Definition: ASTContext.h:778
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:760
DiagnosticsEngine & getDiagnostics() const
Wrapper for source info for arrays.
Definition: TypeLoc.h:1561
SourceLocation getLBracketLoc() const
Definition: TypeLoc.h:1563
Expr * getSizeExpr() const
Definition: TypeLoc.h:1583
SourceLocation getRBracketLoc() const
Definition: TypeLoc.h:1571
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:3892
Wrapper for source info for block pointers.
Definition: TypeLoc.h:1314
BreakStmt - This represents a break.
Definition: Stmt.h:2980
A boolean literal, per ([C++ lex.bool] Boolean literals).
Definition: ExprCXX.h:720
Represents a call to a C++ constructor.
Definition: ExprCXX.h:1542
A default argument (C++ [dcl.fct.default]).
Definition: ExprCXX.h:1264
CXXForRangeStmt - This represents C++0x [stmt.ranged]'s ranged for statement, represented as 'for (ra...
Definition: StmtCXX.h:135
The null pointer literal (C++11 [lex.nullptr])
Definition: ExprCXX.h:765
A call to an overloaded operator written using operator syntax.
Definition: ExprCXX.h:81
OverloadedOperatorKind getOperator() const
Returns the kind of overloaded operator that this expression refers to.
Definition: ExprCXX.h:111
Represents the this expression in C++.
Definition: ExprCXX.h:1148
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2872
llvm::iterator_range< arg_iterator > arg_range
Definition: Expr.h:3108
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:3050
CaseStmt - Represent a case statement.
Definition: Stmt.h:1801
Represents a class template specialization, which refers to a class template with a given set of temp...
SourceRange getSourceRange() const override LLVM_READONLY
Source range that this declaration covers.
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition: Stmt.h:1606
ContinueStmt - This represents a continue.
Definition: Stmt.h:2950
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
DeclStmt - Adaptor class for mixing declarations with statements and expressions.
Definition: Stmt.h:1497
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:599
Decl * getNextDeclInContext()
Definition: DeclBase.h:451
SourceLocation getLocation() const
Definition: DeclBase.h:445
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:437
Kind getKind() const
Definition: DeclBase.h:448
virtual SourceRange getSourceRange() const LLVM_READONLY
Source range that this declaration covers.
Definition: DeclBase.h:433
bool isIdentifier() const
Predicate functions for querying what type of name this is.
Represents a ValueDecl that came out of a declarator.
Definition: Decl.h:771
A qualified reference to a name whose declaration cannot yet be resolved.
Definition: ExprCXX.h:3316
Represents an empty-declaration.
Definition: Decl.h:4928
This represents one expression.
Definition: Expr.h:110
ForStmt - This represents a 'for (init;cond;inc)' stmt.
Definition: Stmt.h:2781
bool hasTrailingReturn() const
Whether this function prototype has a trailing return type.
Definition: Type.h:5040
Wrapper for source info for functions.
Definition: TypeLoc.h:1428
ArrayRef< ParmVarDecl * > getParams() const
Definition: TypeLoc.h:1491
TypeLoc getReturnLoc() const
Definition: TypeLoc.h:1509
SourceLocation getLParenLoc() const
Definition: TypeLoc.h:1460
SourceLocation getRParenLoc() const
Definition: TypeLoc.h:1468
IfStmt - This represents an if/then/else.
Definition: Stmt.h:2138
const TypeClass * getTypePtr() const
Definition: TypeLoc.h:514
Represents a linkage specification.
Definition: DeclCXX.h:2934
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3224
Wrapper for source info for member pointers.
Definition: TypeLoc.h:1332
SourceRange getLocalSourceRange() const
Definition: TypeLoc.h:1359
This represents a decl that may have a name.
Definition: Decl.h:249
DeclarationName getDeclName() const
Get the actual, stored name of the declaration, which may be a special name.
Definition: Decl.h:315
Represents a C++ namespace alias.
Definition: DeclCXX.h:3120
Represent a C++ namespace.
Definition: Decl.h:548
A C++ nested-name-specifier augmented with source location information.
SourceLocation getBeginLoc() const
Retrieve the location of the beginning of this nested-name-specifier.
TypeLoc getTypeLoc() const
For a nested-name-specifier that refers to a type, retrieve the type with source-location information...
SourceRange getLocalSourceRange() const
Retrieve the source range covering just the last part of this nested-name-specifier,...
NestedNameSpecifier * getNestedNameSpecifier() const
Retrieve the nested-name-specifier to which this instance refers.
bool hasQualifier() const
Evaluates true when this nested-name-specifier location is non-empty.
SourceRange getSourceRange() const LLVM_READONLY
Retrieve the source range covering the entirety of this nested-name-specifier.
Represents a C++ nested name specifier, such as "\::std::vector<int>::".
SpecifierKind getKind() const
Determine what kind of nested name specifier is stored.
const Type * getAsType() const
Retrieve the type stored in this nested name specifier.
@ NamespaceAlias
A namespace alias, stored as a NamespaceAliasDecl*.
@ TypeSpec
A type, stored as a Type*.
@ TypeSpecWithTemplate
A type that was preceded by the 'template' keyword, stored as a Type*.
@ Identifier
An identifier, stored as an IdentifierInfo*.
@ Global
The global specifier '::'. There is no stored value.
@ Namespace
A namespace, stored as a NamespaceDecl*.
NullStmt - This is the null statement ";": C99 6.8.3p3.
Definition: Stmt.h:1569
NumericLiteralParser - This performs strict semantic analysis of the content of a ppnumber,...
Wraps an ObjCPointerType with source location information.
Definition: TypeLoc.h:1370
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1168
ParenExpr - This represents a parethesized expression, e.g.
Definition: Expr.h:2182
SourceLocation getRParenLoc() const
Definition: TypeLoc.h:1195
SourceLocation getLParenLoc() const
Definition: TypeLoc.h:1191
TypeLoc getInnerLoc() const
Definition: TypeLoc.h:1216
TypeLoc getPointeeLoc() const
Definition: TypeLoc.h:1282
Wrapper for source info for pointers.
Definition: TypeLoc.h:1301
A class that does preorder or postorder depth-first traversal on the entire Clang AST and visits each...
bool TraverseStmt(Stmt *S, DataRecursionQueue *Queue=nullptr)
Recursively visit a statement or expression, by dispatching to Traverse*() based on the argument's dy...
ReturnStmt - This represents a return, optionally of an expression: return; return 4;.
Definition: Stmt.h:3019
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
bool isValid() const
Represents a C++11 static_assert declaration.
Definition: DeclCXX.h:4058
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
StringLiteral - This represents a string literal expression, e.g.
Definition: Expr.h:1773
SwitchStmt - This represents a 'switch' stmt.
Definition: Stmt.h:2388
Represents the declaration of a struct/union/class/enum.
Definition: Decl.h:3587
The base class of all kinds of template declarations (e.g., class, function, etc.).
Definition: DeclTemplate.h:394
Stores a list of template parameters for a TemplateDecl and its derived classes.
Definition: DeclTemplate.h:73
The top declaration context.
Definition: Decl.h:84
Represents the declaration of a typedef-name via a C++11 alias-declaration.
Definition: Decl.h:3558
Base wrapper for a particular "section" of type source info.
Definition: TypeLoc.h:59
T castAs() const
Convert to the specified TypeLoc type, asserting that this TypeLoc is of the desired type.
Definition: TypeLoc.h:78
SourceLocation getEndLoc() const
Get the end source location.
Definition: TypeLoc.cpp:235
Base class for declarations which introduce a typedef-name.
Definition: Decl.h:3435
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Definition: Expr.h:2235
Represents a dependent using declaration which was marked with typename.
Definition: DeclCXX.h:3959
Represents a dependent using declaration which was not marked with typename.
Definition: DeclCXX.h:3862
A call to a literal operator (C++11 [over.literal]) written as a user-defined literal (C++11 [lit....
Definition: ExprCXX.h:637
@ LOK_String
operator "" X (const CharT *, size_t)
Definition: ExprCXX.h:679
@ LOK_Raw
Raw form: operator "" X (const char *)
Definition: ExprCXX.h:667
@ LOK_Floating
operator "" X (long double)
Definition: ExprCXX.h:676
@ LOK_Integer
operator "" X (unsigned long long)
Definition: ExprCXX.h:673
@ LOK_Template
Raw form: operator "" X<cs...> ()
Definition: ExprCXX.h:670
@ LOK_Character
operator "" X (CharT)
Definition: ExprCXX.h:682
Represents a C++ using-declaration.
Definition: DeclCXX.h:3512
Represents C++ using-directive.
Definition: DeclCXX.h:3015
WhileStmt - This represents a 'while' stmt.
Definition: Stmt.h:2584
A memory arena for syntax trees.
Definition: Tree.h:36
llvm::BumpPtrAllocator & getAllocator()
Definition: Tree.h:38
Array size specified inside a declarator.
Definition: Nodes.h:515
<lhs> <operator> <rhs>
Definition: Nodes.h:198
Models arguments of a function call.
Definition: Nodes.h:146
{ statement1; statement2; … }
Definition: Nodes.h:340
E.g. 'int a, b = 10;'.
Definition: Nodes.h:224
A declaration that can appear at the top-level.
Definition: Nodes.h:354
A semicolon in the top-level context. Does not declare anything.
Definition: Nodes.h:368
The no-op statement, i.e. ';'.
Definition: Nodes.h:231
template <declaration> Examples: template struct X<int> template void foo<int>() template int var<dou...
Definition: Nodes.h:427
Expression in a statement position, e.g.
Definition: Nodes.h:332
for (<init>; <cond>; <increment>) <body>
Definition: Nodes.h:278
if (cond) <then-statement> else <else-statement> FIXME: add condition that models 'expression or vari...
Definition: Nodes.h:267
A leaf node points to a single token.
Definition: Tree.h:132
extern <string-literal> declaration extern <string-literal> { <decls> }
Definition: Nodes.h:386
A list of Elements separated or terminated by a fixed token.
Definition: Tree.h:254
Member pointer inside a declarator E.g.
Definition: Nodes.h:572
namespace <name> = <namespace-reference>
Definition: Nodes.h:445
namespace <name> { <decls> }
Definition: Nodes.h:438
Models a nested-name-specifier.
Definition: Nodes.h:116
A node in a syntax tree.
Definition: Tree.h:54
NodeRole getRole() const
Definition: Tree.h:71
Models a parameter-declaration-list which appears within parameters-and-qualifiers.
Definition: Nodes.h:540
Parameter list for a function type and a trailing return type, if the function has one.
Definition: Nodes.h:560
Declarator inside parentheses.
Definition: Nodes.h:503
for (<decl> : <init>) <body>
Definition: Nodes.h:322
return <expr>; return;
Definition: Nodes.h:313
Groups multiple declarators (e.g.
Definition: Nodes.h:405
A top-level declarator without parentheses.
Definition: Nodes.h:494
static_assert(<condition>, <message>) static_assert(<condition>)
Definition: Nodes.h:376
switch (<cond>) <body>
Definition: Nodes.h:238
template <template-parameters> <declaration>
Definition: Nodes.h:414
A TokenBuffer-powered token manager.
A list of tokens obtained by preprocessing a text buffer and operations to map between the expanded a...
Definition: Tokens.h:174
std::optional< llvm::ArrayRef< syntax::Token > > spelledForExpanded(llvm::ArrayRef< syntax::Token > Expanded) const
Returns the subrange of spelled tokens corresponding to AST node spanning Expanded.
Definition: Tokens.cpp:403
llvm::ArrayRef< syntax::Token > expandedTokens() const
All tokens produced by the preprocessor after all macro replacements, directives, etc.
Definition: Tokens.h:190
uintptr_t Key
A key to identify a specific token.
Definition: TokenManager.h:40
A token coming directly from a file or from a macro invocation.
Definition: Tokens.h:103
tok::TokenKind kind() const
Definition: Tokens.h:109
Trailing return type after the parameter list, including the arrow token.
Definition: Nodes.h:527
A node that has children and represents a syntactic language construct.
Definition: Tree.h:144
using <name> = <type>
Definition: Nodes.h:468
Declaration of an unknown kind, e.g. not yet supported in syntax trees.
Definition: Nodes.h:361
An expression of an unknown kind, i.e.
Definition: Nodes.h:135
A statement of an unknown kind, i.e.
Definition: Nodes.h:217
Models an unqualified-id.
Definition: Nodes.h:127
using <scope>::<name> using typename <scope>::<name>
Definition: Nodes.h:461
using namespace <name>
Definition: Nodes.h:453
while (<cond>) <body>
Definition: Nodes.h:287
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:367
void foldList(ArrayRef< syntax::Token > SuperRange, syntax::List *New, ASTPtr From)
Populate children for New list, assuming it covers tokens from a subrange of SuperRange.
Definition: BuildTree.cpp:406
void markExprChild(Expr *Child, NodeRole Role)
Should be called for expressions in non-statement position to avoid wrapping into expression statemen...
Definition: BuildTree.cpp:1736
ArrayRef< syntax::Token > getExprRange(const Expr *E) const
Definition: BuildTree.cpp:519
void markChildToken(SourceLocation Loc, NodeRole R)
Set role for a token starting at Loc.
Definition: BuildTree.cpp:1689
void markChild(syntax::Node *N, NodeRole R)
Set role for N.
Definition: BuildTree.cpp:1701
void foldNode(ArrayRef< syntax::Token > Range, syntax::Tree *New, TypeLoc L)
Definition: BuildTree.cpp:391
ArrayRef< syntax::Token > getRange(SourceLocation First, SourceLocation Last) const
Finds the syntax tokens corresponding to the passed source locations.
Definition: BuildTree.cpp:466
const syntax::Token * findToken(SourceLocation L) const
Finds a token starting at L. The token must exist if L is valid.
Definition: BuildTree.cpp:1746
ArrayRef< syntax::Token > getRange(SourceRange Range) const
Finds the syntax tokens corresponding to the SourceRange.
Definition: BuildTree.cpp:458
const SourceManager & sourceManager() const
Definition: BuildTree.cpp:378
void noticeDeclWithoutSemicolon(Decl *D)
Notifies that we should not consume trailing semicolon when computing token range of D.
Definition: BuildTree.cpp:1685
ArrayRef< syntax::Token > getDeclarationRange(Decl *D)
Definition: BuildTree.cpp:509
bool isResponsibleForCreatingDeclaration(const Decl *D) const
Returns true if D is the last declarator in a chain and is thus reponsible for creating SimpleDeclara...
Definition: BuildTree.cpp:483
syntax::TranslationUnit * finalize() &&
Finish building the tree and consume the root node.
Definition: BuildTree.cpp:440
ArrayRef< syntax::Token > getStmtRange(const Stmt *S) const
Find the adjusted range for the statement, consuming the trailing semicolon when needed.
Definition: BuildTree.cpp:525
ArrayRef< syntax::Token > getTemplateRange(const ClassTemplateSpecializationDecl *D) const
Definition: BuildTree.cpp:476
llvm::BumpPtrAllocator & allocator()
Definition: BuildTree.cpp:377
void foldNode(ArrayRef< syntax::Token > Range, syntax::Tree *New, ASTPtr From)
Populate children for New node, assuming it covers tokens from Range.
Definition: BuildTree.cpp:384
TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager &TBTM)
Definition: BuildTree.cpp:369
void foldNode(llvm::ArrayRef< syntax::Token > Range, syntax::Tree *New, NestedNameSpecifierLoc From)
Definition: BuildTree.cpp:396
void markStmtChild(Stmt *Child, NodeRole Role)
Mark the Child node with a corresponding Role.
Definition: BuildTree.cpp:1717
uint32_t Literal
Literals are represented as positive integers.
Definition: CNFFormula.h:35
NodeRole
A relation between a parent and child node, e.g.
Definition: Nodes.h:54
@ ListElement
List API roles.
@ LiteralToken
A token that represents a literal, e.g. 'nullptr', '1', 'true', etc.
@ Detached
A node without a parent.
@ CloseParen
A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc.
@ IntroducerKeyword
A keywords that introduces some grammar construct, e.g. 'if', 'try', etc.
@ Unknown
Children of an unknown semantic nature, e.g. skipped tokens, comments.
@ BodyStatement
An inner statement for those that have only a single child of kind statement, e.g.
@ OpenParen
An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc.
@ ArrowToken
Tokens or Keywords.
syntax::TranslationUnit * buildSyntaxTree(Arena &A, TokenBufferTokenManager &TBTM, ASTContext &Context)
Build a syntax tree for the main file.
Definition: BuildTree.cpp:1754
NodeKind
A kind of a syntax node, used for implementing casts.
Definition: Nodes.h:32
The JSON file list parser is used to communicate input to InstallAPI.
@ OO_None
Not an overloaded operator.
Definition: OperatorKinds.h:22
@ NUM_OVERLOADED_OPERATORS
Definition: OperatorKinds.h:26
Expr * IgnoreExprNodes(Expr *E, FnTys &&... Fns)
Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, Recursively apply each of the f...
Definition: IgnoreExpr.h:34
const FunctionProtoType * T
Expr * IgnoreImplicitSingleStep(Expr *E)
Definition: IgnoreExpr.h:111