clang  19.0.0git
MacroExpander.cpp
Go to the documentation of this file.
1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of MacroExpander, which handles macro
11 /// configuration and expansion while formatting.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Macros.h"
16 
17 #include "Encoding.h"
18 #include "FormatToken.h"
19 #include "FormatTokenLexer.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "clang/Format/Format.h"
22 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Lex/ModuleLoader.h"
26 #include "clang/Lex/Preprocessor.h"
28 #include "llvm/ADT/StringSet.h"
29 #include "llvm/Support/ErrorHandling.h"
30 
31 namespace clang {
32 namespace format {
33 
35  StringRef Name;
38 
39  // Map from each argument's name to its position in the argument list.
40  // With "M(x, y) x + y":
41  // x -> 0
42  // y -> 1
43  llvm::StringMap<size_t> ArgMap;
44 
45  bool ObjectLike = true;
46 };
47 
49 public:
50  DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51  assert(!Tokens.empty());
52  Current = Tokens[0];
53  }
54 
55  // Parse the token stream and return the corresponding Definition object.
56  // Returns an empty definition object with a null-Name on error.
58  if (Current->isNot(tok::identifier))
59  return {};
60  Def.Name = Current->TokenText;
61  nextToken();
62  if (Current->is(tok::l_paren)) {
63  Def.ObjectLike = false;
64  if (!parseParams())
65  return {};
66  }
67  if (!parseExpansion())
68  return {};
69 
70  return Def;
71  }
72 
73 private:
74  bool parseParams() {
75  assert(Current->is(tok::l_paren));
76  nextToken();
77  while (Current->is(tok::identifier)) {
78  Def.Params.push_back(Current);
79  Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80  nextToken();
81  if (Current->isNot(tok::comma))
82  break;
83  nextToken();
84  }
85  if (Current->isNot(tok::r_paren))
86  return false;
87  nextToken();
88  return true;
89  }
90 
91  bool parseExpansion() {
92  if (!Current->isOneOf(tok::equal, tok::eof))
93  return false;
94  if (Current->is(tok::equal))
95  nextToken();
96  parseTail();
97  return true;
98  }
99 
100  void parseTail() {
101  while (Current->isNot(tok::eof)) {
102  Def.Body.push_back(Current);
103  nextToken();
104  }
105  Def.Body.push_back(Current);
106  }
107 
108  void nextToken() {
109  if (Pos + 1 < Tokens.size())
110  ++Pos;
111  Current = Tokens[Pos];
112  Current->Finalized = true;
113  }
114 
115  size_t Pos = 0;
116  FormatToken *Current = nullptr;
117  Definition Def;
118  ArrayRef<FormatToken *> Tokens;
119 };
120 
122  const std::vector<std::string> &Macros, SourceManager &SourceMgr,
123  const FormatStyle &Style,
124  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125  IdentifierTable &IdentTable)
126  : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127  IdentTable(IdentTable) {
128  for (const std::string &Macro : Macros)
129  parseDefinition(Macro);
130 }
131 
133 
134 void MacroExpander::parseDefinition(const std::string &Macro) {
135  Buffers.push_back(
136  llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137  FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138  FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139  Allocator, IdentTable);
140  const auto Tokens = Lex.lex();
141  if (!Tokens.empty()) {
142  DefinitionParser Parser(Tokens);
143  auto Definition = Parser.parse();
144  if (Definition.ObjectLike) {
145  ObjectLike[Definition.Name] = std::move(Definition);
146  } else {
147  FunctionLike[Definition.Name][Definition.Params.size()] =
148  std::move(Definition);
149  }
150  }
151 }
152 
153 bool MacroExpander::defined(StringRef Name) const {
154  return FunctionLike.contains(Name) || ObjectLike.contains(Name);
155 }
156 
157 bool MacroExpander::objectLike(StringRef Name) const {
158  return ObjectLike.contains(Name);
159 }
160 
161 bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const {
162  auto it = FunctionLike.find(Name);
163  return it != FunctionLike.end() && it->second.contains(Arity);
164 }
165 
168  std::optional<ArgsList> OptionalArgs) const {
169  if (OptionalArgs)
170  assert(hasArity(ID->TokenText, OptionalArgs->size()));
171  else
172  assert(objectLike(ID->TokenText));
173  const Definition &Def = OptionalArgs
174  ? FunctionLike.find(ID->TokenText)
175  ->second.find(OptionalArgs.value().size())
176  ->second
177  : ObjectLike.find(ID->TokenText)->second;
178  ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
180  // Expand each argument at most once.
181  llvm::StringSet<> ExpandedArgs;
182 
183  // Adds the given token to Result.
184  auto pushToken = [&](FormatToken *Tok) {
185  Tok->MacroCtx->ExpandedFrom.push_back(ID);
186  Result.push_back(Tok);
187  };
188 
189  // If Tok references a parameter, adds the corresponding argument to Result.
190  // Returns false if Tok does not reference a parameter.
191  auto expandArgument = [&](FormatToken *Tok) -> bool {
192  // If the current token references a parameter, expand the corresponding
193  // argument.
194  if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
195  return false;
196  ExpandedArgs.insert(Tok->TokenText);
197  auto I = Def.ArgMap.find(Tok->TokenText);
198  if (I == Def.ArgMap.end())
199  return false;
200  // If there are fewer arguments than referenced parameters, treat the
201  // parameter as empty.
202  // FIXME: Potentially fully abort the expansion instead.
203  if (I->getValue() >= Args.size())
204  return true;
205  for (FormatToken *Arg : Args[I->getValue()]) {
206  // A token can be part of a macro argument at multiple levels.
207  // For example, with "ID(x) x":
208  // in ID(ID(x)), 'x' is expanded first as argument to the inner
209  // ID, then again as argument to the outer ID. We keep the macro
210  // role the token had from the inner expansion.
211  if (!Arg->MacroCtx)
212  Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
213  pushToken(Arg);
214  }
215  return true;
216  };
217 
218  // Expand the definition into Result.
219  for (FormatToken *Tok : Def.Body) {
220  if (expandArgument(Tok))
221  continue;
222  // Create a copy of the tokens from the macro body, i.e. were not provided
223  // by user code.
224  FormatToken *New = new (Allocator.Allocate()) FormatToken;
225  New->copyFrom(*Tok);
226  assert(!New->MacroCtx);
227  // Tokens that are not part of the user code are not formatted.
229  pushToken(New);
230  }
231  assert(Result.size() >= 1 && Result.back()->is(tok::eof));
232  if (Result.size() > 1) {
233  ++Result[0]->MacroCtx->StartOfExpansion;
234  ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
235  }
236  return Result;
237 }
238 
239 } // namespace format
240 } // namespace clang
static char ID
Definition: Arena.cpp:183
Contains functions for text encoding manipulation.
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
Various functions to configurably format source code.
This file contains the main building blocks of macro support in clang-format.
Defines the clang::Preprocessor interface.
Defines the clang::TokenKind enum and support functions.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:58
This class handles loading and caching of source files into memory.
FileID createFileID(FileEntryRef SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
DefinitionParser(ArrayRef< FormatToken * > Tokens)
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
ArrayRef< SmallVector< FormatToken *, 8 > > ArgsList
Definition: Macros.h:82
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
MacroExpander(const std::vector< std::string > &Macros, SourceManager &SourceMgr, const FormatStyle &Style, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
Construct a macro expander from a set of macro definitions.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
@ MR_Hidden
The token was expanded from a macro definition, and is not visible as part of the macro call.
Definition: FormatToken.h:233
@ MR_ExpandedArg
The token was expanded from a macro argument when formatting the expanded token sequence.
Definition: FormatToken.h:227
The JSON file list parser is used to communicate input to InstallAPI.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
bool isNot(T Kind) const
Definition: FormatToken.h:621
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:310
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:369
std::optional< MacroExpansion > MacroCtx
Definition: FormatToken.h:591
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:602
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:614
void copyFrom(const FormatToken &Tok)
Definition: FormatToken.h:860
SmallVector< FormatToken *, 8 > Params
SmallVector< FormatToken *, 8 > Body
Contains information on the token's role in a macro expansion.
Definition: FormatToken.h:263