clang  19.0.0git
Tree.h
Go to the documentation of this file.
1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 // - leaf nodes correspond to tokens,
10 // - tree nodes correspond to language grammar constructs.
11 //
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continuous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
17 //
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
23 
24 #include "clang/Basic/TokenKinds.h"
26 #include "llvm/ADT/iterator.h"
27 #include "llvm/Support/Allocator.h"
28 #include <cstdint>
29 #include <vector>
30 
31 namespace clang {
32 namespace syntax {
33 
34 /// A memory arena for syntax trees.
35 // FIXME: use BumpPtrAllocator directly.
36 class Arena {
37 public:
38  llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
39 private:
40  /// Keeps all the allocated nodes and their intermediate data structures.
41  llvm::BumpPtrAllocator Allocator;
42 };
43 
44 class Tree;
45 class TreeBuilder;
46 class FactoryImpl;
47 class MutationsImpl;
48 
49 enum class NodeKind : uint16_t;
50 enum class NodeRole : uint8_t;
51 
52 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
53 /// a Tree (representing language constructrs).
54 class Node {
55 protected:
56  /// Newly created nodes are detached from a tree, parent and sibling links are
57  /// set when the node is added as a child to another one.
59  /// Nodes are allocated on Arenas; the destructor is never called.
60  ~Node() = default;
61 
62 public:
63  /// Nodes cannot simply be copied without violating tree invariants.
64  Node(const Node &) = delete;
65  Node &operator=(const Node &) = delete;
66  /// Idiomatically, nodes are allocated on an Arena and never moved.
67  Node(Node &&) = delete;
68  Node &operator=(Node &&) = delete;
69 
70  NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
71  NodeRole getRole() const { return static_cast<NodeRole>(Role); }
72 
73  /// Whether the node is detached from a tree, i.e. does not have a parent.
74  bool isDetached() const;
75  /// Whether the node was created from the AST backed by the source code
76  /// rather than added later through mutation APIs or created with factory
77  /// functions.
78  /// When this flag is true, all subtrees are also original.
79  /// This flag is set to false on any modifications to the node or any of its
80  /// subtrees, even if this simply involves swapping existing subtrees.
81  bool isOriginal() const { return Original; }
82  /// If this function return false, the tree cannot be modified because there
83  /// is no reasonable way to produce the corresponding textual replacements.
84  /// This can happen when the node crosses macro expansion boundaries.
85  ///
86  /// Note that even if the node is not modifiable, its child nodes can be
87  /// modifiable.
88  bool canModify() const { return CanModify; }
89 
90  const Tree *getParent() const { return Parent; }
91  Tree *getParent() { return Parent; }
92 
93  const Node *getNextSibling() const { return NextSibling; }
94  Node *getNextSibling() { return NextSibling; }
95  const Node *getPreviousSibling() const { return PreviousSibling; }
96  Node *getPreviousSibling() { return PreviousSibling; }
97 
98  /// Dumps the structure of a subtree. For debugging and testing purposes.
99  std::string dump(const TokenManager &SM) const;
100  /// Dumps the tokens forming this subtree.
101  std::string dumpTokens(const TokenManager &SM) const;
102 
103  /// Asserts invariants on this node of the tree and its immediate children.
104  /// Will not recurse into the subtree. No-op if NDEBUG is set.
105  void assertInvariants() const;
106  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
107  void assertInvariantsRecursive() const;
108 
109 private:
110  // Tree is allowed to change the Parent link and Role.
111  friend class Tree;
112  // TreeBuilder is allowed to set the Original and CanModify flags.
113  friend class TreeBuilder;
114  // MutationsImpl sets roles and CanModify flag.
115  friend class MutationsImpl;
116  // FactoryImpl sets CanModify flag.
117  friend class FactoryImpl;
118 
119  void setRole(NodeRole NR);
120 
121  Tree *Parent;
122  Node *NextSibling;
123  Node *PreviousSibling;
124  unsigned Kind : 16;
125  unsigned Role : 8;
126  unsigned Original : 1;
127  unsigned CanModify : 1;
128 };
129 
130 /// A leaf node points to a single token.
131 // FIXME: add TokenKind field (borrow some bits from the Node::kind).
132 class Leaf final : public Node {
133 public:
135  static bool classof(const Node *N);
136 
137  TokenManager::Key getTokenKey() const { return K; }
138 
139 private:
141 };
142 
143 /// A node that has children and represents a syntactic language construct.
144 class Tree : public Node {
145  /// Iterator over children (common base for const/non-const).
146  /// Not invalidated by tree mutations (holds a stable node pointer).
147  template <typename DerivedT, typename NodeT>
148  class ChildIteratorBase
149  : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
150  NodeT> {
151  protected:
152  NodeT *N = nullptr;
153  using Base = ChildIteratorBase;
154 
155  public:
156  ChildIteratorBase() = default;
157  explicit ChildIteratorBase(NodeT *N) : N(N) {}
158 
159  friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
160  return LHS.N == RHS.N;
161  }
162 
163  NodeT &operator*() const { return *N; }
164  DerivedT &operator++() {
165  N = N->getNextSibling();
166  return *static_cast<DerivedT *>(this);
167  }
168 
169  /// Truthy if valid (not past-the-end).
170  /// This allows: if (auto It = find_if(N.children(), ...) )
171  explicit operator bool() const { return N != nullptr; }
172  /// The element, or nullptr if past-the-end.
173  NodeT *asPointer() const { return N; }
174  };
175 
176 public:
177  static bool classof(const Node *N);
178 
179  Node *getFirstChild() { return FirstChild; }
180  const Node *getFirstChild() const { return FirstChild; }
181  Node *getLastChild() { return LastChild; }
182  const Node *getLastChild() const { return LastChild; }
183 
184  const Leaf *findFirstLeaf() const;
186  return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
187  }
188 
189  const Leaf *findLastLeaf() const;
191  return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
192  }
193 
194  /// child_iterator is not invalidated by mutations.
195  struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
196  using Base::ChildIteratorBase;
197  };
199  : ChildIteratorBase<ConstChildIterator, const Node> {
200  using Base::ChildIteratorBase;
201  ConstChildIterator() = default;
202  ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
203  };
204 
205  llvm::iterator_range<ChildIterator> getChildren() {
207  }
208  llvm::iterator_range<ConstChildIterator> getChildren() const {
210  }
211 
212  /// Find the first node with a corresponding role.
213  const Node *findChild(NodeRole R) const;
215  return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
216  }
217 
218 protected:
219  using Node::Node;
220 
221 private:
222  /// Append \p Child to the list of children and sets the parent pointer.
223  /// A very low-level operation that does not check any invariants, only used
224  /// by TreeBuilder and FactoryImpl.
225  /// EXPECTS: Role != Detached.
226  void appendChildLowLevel(Node *Child, NodeRole Role);
227  /// Similar but prepends.
228  void prependChildLowLevel(Node *Child, NodeRole Role);
229 
230  /// Like the previous overloads, but does not set role for \p Child.
231  /// EXPECTS: Child->Role != Detached
232  void appendChildLowLevel(Node *Child);
233  void prependChildLowLevel(Node *Child);
234  friend class TreeBuilder;
235  friend class FactoryImpl;
236 
237  /// Replace a range of children [Begin, End) with a list of
238  /// new nodes starting at \p New.
239  /// Only used by MutationsImpl to implement higher-level mutation operations.
240  /// (!) \p New can be null to model removal of the child range.
241  /// (!) \p End can be null to model one past the end.
242  /// (!) \p Begin can be null to model an append.
243  void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
244  friend class MutationsImpl;
245 
246  Node *FirstChild = nullptr;
247  Node *LastChild = nullptr;
248 };
249 
250 /// A list of Elements separated or terminated by a fixed token.
251 ///
252 /// This type models the following grammar construct:
253 /// delimited-list(element, delimiter, termination, canBeEmpty)
254 class List : public Tree {
255 public:
256  template <typename Element> struct ElementAndDelimiter {
257  Element *element;
259  };
260 
261  enum class TerminationKind {
262  Terminated,
264  Separated,
265  };
266 
267  using Tree::Tree;
268  static bool classof(const Node *N);
269  /// Returns the elements and corresponding delimiters. Missing elements
270  /// and delimiters are represented as null pointers.
271  ///
272  /// For example, in a separated list:
273  /// "a, b, c" <=> [("a" , ","), ("b" , "," ), ("c" , null)]
274  /// "a, , c" <=> [("a" , ","), (null, "," ), ("c" , null)]
275  /// "a, b c" <=> [("a" , ","), ("b" , null), ("c" , null)]
276  /// "a, b," <=> [("a" , ","), ("b" , "," ), (null, null)]
277  ///
278  /// In a terminated or maybe-terminated list:
279  /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
280  /// "a; ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
281  /// "a; b c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
282  /// "a; b; c" <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
283  std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
284 
285  /// Returns the elements of the list. Missing elements are represented
286  /// as null pointers in the same way as in the return value of
287  /// `getElementsAsNodesAndDelimiters()`.
288  std::vector<Node *> getElementsAsNodes();
289 
290  // These can't be implemented with the information we have!
291 
292  /// Returns the appropriate delimiter for this list.
293  ///
294  /// Useful for discovering the correct delimiter to use when adding
295  /// elements to empty or one-element lists.
297 
299 
300  /// Whether this list can be empty in syntactically and semantically correct
301  /// code.
302  ///
303  /// This list may be empty when the source code has errors even if
304  /// canBeEmpty() returns false.
305  bool canBeEmpty() const;
306 };
307 
308 } // namespace syntax
309 } // namespace clang
310 
311 #endif
NodeId Parent
Definition: ASTDiff.cpp:191
SyntaxTree::Impl & Tree
Definition: ASTDiff.cpp:192
DynTypedNode Node
#define SM(sm)
Definition: Cuda.cpp:83
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:225
static void dump(llvm::raw_ostream &OS, StringRef FunctionName, ArrayRef< CounterExpression > Expressions, ArrayRef< CounterMappingRegion > Regions)
Defines the clang::TokenKind enum and support functions.
SourceLocation End
SourceLocation Begin
A memory arena for syntax trees.
Definition: Tree.h:36
llvm::BumpPtrAllocator & getAllocator()
Definition: Tree.h:38
Exposes private syntax tree APIs required to implement node synthesis.
Definition: Synthesis.cpp:18
A leaf node points to a single token.
Definition: Tree.h:132
TokenManager::Key getTokenKey() const
Definition: Tree.h:137
static bool classof(const Node *N)
Leaf(TokenManager::Key K)
Definition: Tree.cpp:35
A list of Elements separated or terminated by a fixed token.
Definition: Tree.h:254
bool canBeEmpty() const
Whether this list can be empty in syntactically and semantically correct code.
Definition: Tree.cpp:427
static bool classof(const Node *N)
std::vector< Node * > getElementsAsNodes()
Returns the elements of the list.
Definition: Tree.cpp:357
std::vector< ElementAndDelimiter< Node > > getElementsAsNodesAndDelimiters()
Returns the elements and corresponding delimiters.
Definition: Tree.cpp:312
TerminationKind getTerminationKind() const
Definition: Tree.cpp:413
clang::tok::TokenKind getDelimiterTokenKind() const
Returns the appropriate delimiter for this list.
Definition: Tree.cpp:399
A node in a syntax tree.
Definition: Tree.h:54
const Node * getNextSibling() const
Definition: Tree.h:93
Node * getPreviousSibling()
Definition: Tree.h:96
Node & operator=(Node &&)=delete
friend class Tree
Definition: Tree.h:111
Node * getNextSibling()
Definition: Tree.h:94
Node & operator=(const Node &)=delete
const Node * getPreviousSibling() const
Definition: Tree.h:95
bool canModify() const
If this function return false, the tree cannot be modified because there is no reasonable way to prod...
Definition: Tree.h:88
const Tree * getParent() const
Definition: Tree.h:90
NodeRole getRole() const
Definition: Tree.h:71
Node(NodeKind Kind)
Newly created nodes are detached from a tree, parent and sibling links are set when the node is added...
Definition: Tree.cpp:37
Node(const Node &)=delete
Nodes cannot simply be copied without violating tree invariants.
Tree * getParent()
Definition: Tree.h:91
NodeKind getKind() const
Definition: Tree.h:70
~Node()=default
Nodes are allocated on Arenas; the destructor is never called.
Node(Node &&)=delete
Idiomatically, nodes are allocated on an Arena and never moved.
bool isOriginal() const
Whether the node was created from the AST backed by the source code rather than added later through m...
Definition: Tree.h:81
Defines interfaces for operating "Token" in the clang syntax-tree.
Definition: TokenManager.h:29
uintptr_t Key
A key to identify a specific token.
Definition: TokenManager.h:40
A node that has children and represents a syntactic language construct.
Definition: Tree.h:144
static bool classof(const Node *N)
const Leaf * findLastLeaf() const
Definition: Tree.cpp:293
const Leaf * findFirstLeaf() const
Definition: Tree.cpp:283
const Node * getFirstChild() const
Definition: Tree.h:180
llvm::iterator_range< ChildIterator > getChildren()
Definition: Tree.h:205
Leaf * findLastLeaf()
Definition: Tree.h:190
llvm::iterator_range< ConstChildIterator > getChildren() const
Definition: Tree.h:208
Node * getFirstChild()
Definition: Tree.h:179
const Node * findChild(NodeRole R) const
Find the first node with a corresponding role.
Definition: Tree.cpp:303
Node * findChild(NodeRole R)
Definition: Tree.h:214
Node * getLastChild()
Definition: Tree.h:181
const Node * getLastChild() const
Definition: Tree.h:182
Leaf * findFirstLeaf()
Definition: Tree.h:185
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:367
NodeRole
A relation between a parent and child node, e.g.
Definition: Nodes.h:54
NodeKind
A kind of a syntax node, used for implementing casts.
Definition: Nodes.h:32
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:223
#define bool
Definition: stdbool.h:24
child_iterator is not invalidated by mutations.
Definition: Tree.h:195
ConstChildIterator(const ChildIterator &I)
Definition: Tree.h:202