clang  19.0.0git
Macros.h
Go to the documentation of this file.
1 //===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the main building blocks of macro support in
11 /// clang-format.
12 ///
13 /// In order to not violate the requirement that clang-format can format files
14 /// in isolation, clang-format's macro support uses expansions users provide
15 /// as part of clang-format's style configuration.
16 ///
17 /// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
18 /// one level of expansion (\see MacroExpander for a full description of what
19 /// is supported).
20 ///
21 /// As part of parsing, clang-format uses the MacroExpander to expand the
22 /// spelled token streams into expanded token streams when it encounters a
23 /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
24 /// from the expanded token stream.
25 /// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
26 /// matches the spelled token stream into unwrapped lines that best resemble the
27 /// structure of the expanded unwrapped lines. These reconstructed unwrapped
28 /// lines are aliasing the tokens in the expanded token stream, so that token
29 /// annotations will be reused when formatting the spelled macro calls.
30 ///
31 /// When formatting, clang-format annotates and formats the expanded unwrapped
32 /// lines first, determining the token types. Next, it formats the spelled
33 /// unwrapped lines, keeping the token types fixed, while allowing other
34 /// formatting decisions to change.
35 ///
36 //===----------------------------------------------------------------------===//
37 
38 #ifndef CLANG_LIB_FORMAT_MACROS_H
39 #define CLANG_LIB_FORMAT_MACROS_H
40 
41 #include <list>
42 
43 #include "FormatToken.h"
44 #include "llvm/ADT/DenseMap.h"
45 
46 namespace clang {
47 namespace format {
48 
49 struct UnwrappedLine;
50 struct UnwrappedLineNode;
51 
52 /// Takes a set of macro definitions as strings and allows expanding calls to
53 /// those macros.
54 ///
55 /// For example:
56 /// Definition: A(x, y)=x + y
57 /// Call : A(int a = 1, 2)
58 /// Expansion : int a = 1 + 2
59 ///
60 /// Expansion does not check arity of the definition.
61 /// If fewer arguments than expected are provided, the remaining parameters
62 /// are considered empty:
63 /// Call : A(a)
64 /// Expansion: a +
65 /// If more arguments than expected are provided, they will be discarded.
66 ///
67 /// The expander does not support:
68 /// - recursive expansion
69 /// - stringification
70 /// - concatenation
71 /// - variadic macros
72 ///
73 /// Furthermore, only a single expansion of each macro argument is supported,
74 /// so that we cannot get conflicting formatting decisions from different
75 /// expansions.
76 /// Definition: A(x)=x+x
77 /// Call : A(id)
78 /// Expansion : id+x
79 ///
81 public:
83 
84  /// Construct a macro expander from a set of macro definitions.
85  /// Macro definitions must be encoded as UTF-8.
86  ///
87  /// Each entry in \p Macros must conform to the following simple
88  /// macro-definition language:
89  /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
90  /// <params> ::= <id-list> | ""
91  /// <id-list> ::= <id> | <id> "," <params>
92  /// <expansion> ::= "=" <tail> | <eof>
93  /// <tail> ::= <tok> <tail> | <eof>
94  ///
95  /// Macros that cannot be parsed will be silently discarded.
96  ///
97  MacroExpander(const std::vector<std::string> &Macros,
98  SourceManager &SourceMgr, const FormatStyle &Style,
99  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
100  IdentifierTable &IdentTable);
102 
103  /// Returns whether any macro \p Name is defined, regardless of overloads.
104  bool defined(StringRef Name) const;
105 
106  /// Returns whetherh there is an object-like overload, i.e. where the macro
107  /// has no arguments and should not consume subsequent parentheses.
108  bool objectLike(StringRef Name) const;
109 
110  /// Returns whether macro \p Name provides an overload with the given arity.
111  bool hasArity(StringRef Name, unsigned Arity) const;
112 
113  /// Returns the expanded stream of format tokens for \p ID, where
114  /// each element in \p Args is a positional argument to the macro call.
115  /// If \p Args is not set, the object-like overload is used.
116  /// If \p Args is set, the overload with the arity equal to \c Args.size() is
117  /// used.
119  expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;
120 
121 private:
122  struct Definition;
123  class DefinitionParser;
124 
125  void parseDefinition(const std::string &Macro);
126 
127  SourceManager &SourceMgr;
128  const FormatStyle &Style;
129  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
130  IdentifierTable &IdentTable;
132  llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
133  llvm::StringMap<Definition> ObjectLike;
134 };
135 
136 /// Converts a sequence of UnwrappedLines containing expanded macros into a
137 /// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
138 /// broken into child lines, in a way that best conveys the structure of the
139 /// expanded code.
140 ///
141 /// In the simplest case, a spelled UnwrappedLine contains one macro, and after
142 /// expanding it we have one expanded UnwrappedLine. In general, macro
143 /// expansions can span UnwrappedLines, and multiple macros can contribute
144 /// tokens to the same line. We keep consuming expanded lines until:
145 /// * all expansions that started have finished (we're not chopping any macros
146 /// in half)
147 /// * *and* we've reached the end of a *spelled* unwrapped line.
148 ///
149 /// A single UnwrappedLine represents this chunk of code.
150 ///
151 /// After this point, the state of the spelled/expanded stream is "in sync"
152 /// (both at the start of an UnwrappedLine, with no macros open), so the
153 /// Reconstructor can be thrown away and parsing can continue.
154 ///
155 /// Given a mapping from the macro name identifier token in the macro call
156 /// to the tokens of the macro call, for example:
157 /// CLASSA -> CLASSA({public: void x();})
158 ///
159 /// When getting the formatted lines of the expansion via the \c addLine method
160 /// (each '->' specifies a call to \c addLine ):
161 /// -> class A {
162 /// -> public:
163 /// -> void x();
164 /// -> };
165 ///
166 /// Creates the tree of unwrapped lines containing the macro call tokens so that
167 /// the macro call tokens fit the semantic structure of the expanded formatted
168 /// lines:
169 /// -> CLASSA({
170 /// -> public:
171 /// -> void x();
172 /// -> })
174 public:
175  /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
176  /// \p Level, using the map from name identifier token to the corresponding
177  /// tokens of the spelled macro call.
179  unsigned Level,
180  const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
181  &ActiveExpansions);
182 
183  /// For the given \p Line, match all occurences of tokens expanded from a
184  /// macro to unwrapped lines in the spelled macro call so that the resulting
185  /// tree of unwrapped lines best resembles the structure of unwrapped lines
186  /// passed in via \c addLine.
187  void addLine(const UnwrappedLine &Line);
188 
189  /// Check whether at the current state there is no open macro expansion
190  /// that needs to be processed to finish an macro call.
191  /// Only when \c finished() is true, \c takeResult() can be called to retrieve
192  /// the resulting \c UnwrappedLine.
193  /// If there are multiple subsequent macro calls within an unwrapped line in
194  /// the spelled token stream, the calling code may also continue to call
195  /// \c addLine() when \c finished() is true.
196  bool finished() const { return ActiveExpansions.empty(); }
197 
198  /// Retrieve the formatted \c UnwrappedLine containing the orginal
199  /// macro calls, formatted according to the expanded token stream received
200  /// via \c addLine().
201  /// Generally, this line tries to have the same structure as the expanded,
202  /// formatted unwrapped lines handed in via \c addLine(), with the exception
203  /// that for multiple top-level lines, each subsequent line will be the
204  /// child of the last token in its predecessor. This representation is chosen
205  /// because it is a precondition to the formatter that we get what looks like
206  /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
207  ///
208  /// If a token in a macro argument is a child of a token in the expansion,
209  /// the parent will be the corresponding token in the macro call.
210  /// For example:
211  /// #define C(a, b) class C { a b
212  /// C(int x;, int y;)
213  /// would expand to
214  /// class C { int x; int y;
215  /// where in a formatted line "int x;" and "int y;" would both be new separate
216  /// lines.
217  ///
218  /// In the result, "int x;" will be a child of the opening parenthesis in "C("
219  /// and "int y;" will be a child of the "," token:
220  /// C (
221  /// \- int x;
222  /// ,
223  /// \- int y;
224  /// )
226 
227 private:
228  void add(FormatToken *Token, FormatToken *ExpandedParent, bool First,
229  unsigned Level);
230  void prepareParent(FormatToken *ExpandedParent, bool First, unsigned Level);
231  FormatToken *getParentInResult(FormatToken *Parent);
232  void reconstruct(FormatToken *Token);
233  void startReconstruction(FormatToken *Token);
234  bool reconstructActiveCallUntil(FormatToken *Token);
235  void endReconstruction(FormatToken *Token);
236  bool processNextReconstructed();
237  void finalize();
238 
239  struct ReconstructedLine;
240 
241  void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
242  UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
243  void debug(const ReconstructedLine &Line, int Level);
244  ReconstructedLine &parentLine();
245  ReconstructedLine *currentLine();
246  void debugParentMap() const;
247 
248 #ifndef NDEBUG
249  enum ReconstructorState {
250  Start, // No macro expansion was found in the input yet.
251  InProgress, // During a macro reconstruction.
252  Finalized, // Past macro reconstruction, the result is finalized.
253  };
254  ReconstructorState State = Start;
255 #endif
256 
257  // Node in which we build up the resulting unwrapped line; this type is
258  // analogous to UnwrappedLineNode.
259  struct LineNode {
260  LineNode() = default;
261  LineNode(FormatToken *Tok) : Tok(Tok) {}
262  FormatToken *Tok = nullptr;
263  SmallVector<std::unique_ptr<ReconstructedLine>> Children;
264  };
265 
266  // Line in which we build up the resulting unwrapped line.
267  // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
268  // instead of rolling our own type.
269  struct ReconstructedLine {
270  explicit ReconstructedLine(unsigned Level) : Level(Level) {}
271  unsigned Level;
272  SmallVector<std::unique_ptr<LineNode>> Tokens;
273  };
274 
275  // The line in which we collect the resulting reconstructed output.
276  // To reduce special cases in the algorithm, the first level of the line
277  // contains a single null token that has the reconstructed incoming
278  // lines as children.
279  // In the end, we stich the lines together so that each subsequent line
280  // is a child of the last token of the previous line. This is necessary
281  // in order to format the overall expression as a single logical line -
282  // if we created separate lines, we'd format them with their own top-level
283  // indent depending on the semantic structure, which is not desired.
284  ReconstructedLine Result;
285 
286  // Stack of currently "open" lines, where each line's predecessor's last
287  // token is the parent token for that line.
288  SmallVector<ReconstructedLine *> ActiveReconstructedLines;
289 
290  // Maps from the expanded token to the token that takes its place in the
291  // reconstructed token stream in terms of parent-child relationships.
292  // Note that it might take multiple steps to arrive at the correct
293  // parent in the output.
294  // Given: #define C(a, b) []() { a; b; }
295  // And a call: C(f(), g())
296  // The structure in the incoming formatted unwrapped line will be:
297  // []() {
298  // |- f();
299  // \- g();
300  // }
301  // with f and g being children of the opening brace.
302  // In the reconstructed call:
303  // C(f(), g())
304  // \- f()
305  // \- g()
306  // We want f to be a child of the opening parenthesis and g to be a child
307  // of the comma token in the macro call.
308  // Thus, we map
309  // { -> (
310  // and add
311  // ( -> ,
312  // once we're past the comma in the reconstruction.
313  llvm::DenseMap<FormatToken *, FormatToken *>
314  SpelledParentToReconstructedParent;
315 
316  // Keeps track of a single expansion while we're reconstructing tokens it
317  // generated.
318  struct Expansion {
319  // The identifier token of the macro call.
320  FormatToken *ID;
321  // Our current position in the reconstruction.
322  std::list<UnwrappedLineNode>::iterator SpelledI;
323  // The end of the reconstructed token sequence.
324  std::list<UnwrappedLineNode>::iterator SpelledE;
325  };
326 
327  // Stack of macro calls for which we're in the middle of an expansion.
328  SmallVector<Expansion> ActiveExpansions;
329 
330  struct MacroCallState {
331  MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
332  FormatToken *MacroCallLParen);
333 
334  ReconstructedLine *Line;
335 
336  // The last token in the parent line or expansion, or nullptr if the macro
337  // expansion is on a top-level line.
338  //
339  // For example, in the macro call:
340  // auto f = []() { ID(1); };
341  // The MacroCallState for ID will have '{' as ParentLastToken.
342  //
343  // In the macro call:
344  // ID(ID(void f()));
345  // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
346  // while the MacroCallState for the inner ID will have the '(' of the outer
347  // ID as ParentLastToken.
348  //
349  // In the macro call:
350  // ID2(a, ID(b));
351  // The MacroCallState of ID will have ',' as ParentLastToken.
352  FormatToken *ParentLastToken;
353 
354  // The l_paren of this MacroCallState's macro call.
355  FormatToken *MacroCallLParen;
356  };
357 
358  // Keeps track of the lines into which the opening brace/parenthesis &
359  // argument separating commas for each level in the macro call go in order to
360  // put the corresponding closing brace/parenthesis into the same line in the
361  // output and keep track of which parents in the expanded token stream map to
362  // which tokens in the reconstructed stream.
363  // When an opening brace/parenthesis has children, we want the structure of
364  // the output line to be:
365  // |- MACRO
366  // |- (
367  // | \- <argument>
368  // |- ,
369  // | \- <argument>
370  // \- )
371  SmallVector<MacroCallState> MacroCallStructure;
372 
373  // Maps from identifier of the macro call to an unwrapped line containing
374  // all tokens of the macro call.
375  const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
376  &IdToReconstructed;
377 };
378 
379 } // namespace format
380 } // namespace clang
381 
382 #endif
NodeId Parent
Definition: ASTDiff.cpp:191
static char ID
Definition: Arena.cpp:183
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
LineState State
const AnnotatedLine * Line
Implements an efficient mapping from strings to IdentifierInfo nodes.
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
Converts a sequence of UnwrappedLines containing expanded macros into a single UnwrappedLine containi...
Definition: Macros.h:173
void addLine(const UnwrappedLine &Line)
For the given Line, match all occurences of tokens expanded from a macro to unwrapped lines in the sp...
UnwrappedLine takeResult() &&
Retrieve the formatted UnwrappedLine containing the orginal macro calls, formatted according to the e...
bool finished() const
Check whether at the current state there is no open macro expansion that needs to be processed to fin...
Definition: Macros.h:196
MacroCallReconstructor(unsigned Level, const llvm::DenseMap< FormatToken *, std::unique_ptr< UnwrappedLine >> &ActiveExpansions)
Create an Reconstructor whose resulting UnwrappedLine will start at Level, using the map from name id...
Takes a set of macro definitions as strings and allows expanding calls to those macros.
Definition: Macros.h:80
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
MacroExpander(const std::vector< std::string > &Macros, SourceManager &SourceMgr, const FormatStyle &Style, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
Construct a macro expander from a set of macro definitions.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
The JSON file list parser is used to communicate input to InstallAPI.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...