clang  19.0.0git
MacroArgs.cpp
Go to the documentation of this file.
1 //===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the MacroArgs interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Lex/MacroArgs.h"
15 #include "clang/Lex/MacroInfo.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/Support/SaveAndRestore.h"
19 #include <algorithm>
20 
21 using namespace clang;
22 
23 /// MacroArgs ctor function - This destroys the vector passed in.
25  ArrayRef<Token> UnexpArgTokens,
26  bool VarargsElided, Preprocessor &PP) {
27  assert(MI->isFunctionLike() &&
28  "Can't have args for an object-like macro!");
29  MacroArgs **ResultEnt = nullptr;
30  unsigned ClosestMatch = ~0U;
31 
32  // See if we have an entry with a big enough argument list to reuse on the
33  // free list. If so, reuse it.
34  for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
35  Entry = &(*Entry)->ArgCache) {
36  if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
37  (*Entry)->NumUnexpArgTokens < ClosestMatch) {
38  ResultEnt = Entry;
39 
40  // If we have an exact match, use it.
41  if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
42  break;
43  // Otherwise, use the best fit.
44  ClosestMatch = (*Entry)->NumUnexpArgTokens;
45  }
46  }
47  MacroArgs *Result;
48  if (!ResultEnt) {
49  // Allocate memory for a MacroArgs object with the lexer tokens at the end,
50  // and construct the MacroArgs object.
51  Result = new (
52  llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
53  MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
54  } else {
55  Result = *ResultEnt;
56  // Unlink this node from the preprocessors singly linked list.
57  *ResultEnt = Result->ArgCache;
58  Result->NumUnexpArgTokens = UnexpArgTokens.size();
59  Result->VarargsElided = VarargsElided;
60  Result->NumMacroArgs = MI->getNumParams();
61  }
62 
63  // Copy the actual unexpanded tokens to immediately after the result ptr.
64  if (!UnexpArgTokens.empty()) {
65  static_assert(std::is_trivial_v<Token>,
66  "assume trivial copyability if copying into the "
67  "uninitialized array (as opposed to reusing a cached "
68  "MacroArgs)");
69  std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
70  Result->getTrailingObjects<Token>());
71  }
72 
73  return Result;
74 }
75 
76 /// destroy - Destroy and deallocate the memory for this object.
77 ///
79  // Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
80  // would deallocate the element vectors.
81  for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
82  PreExpArgTokens[i].clear();
83 
84  // Add this to the preprocessor's free list.
85  ArgCache = PP.MacroArgCache;
86  PP.MacroArgCache = this;
87 }
88 
89 /// deallocate - This should only be called by the Preprocessor when managing
90 /// its freelist.
92  MacroArgs *Next = ArgCache;
93 
94  // Run the dtor to deallocate the vectors.
95  this->~MacroArgs();
96  // Release the memory for the object.
97  static_assert(std::is_trivially_destructible_v<Token>,
98  "assume trivially destructible and forego destructors");
99  free(this);
100 
101  return Next;
102 }
103 
104 
105 /// getArgLength - Given a pointer to an expanded or unexpanded argument,
106 /// return the number of tokens, not counting the EOF, that make up the
107 /// argument.
108 unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
109  unsigned NumArgTokens = 0;
110  for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
111  ++NumArgTokens;
112  return NumArgTokens;
113 }
114 
115 
116 /// getUnexpArgument - Return the unexpanded tokens for the specified formal.
117 ///
118 const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
119 
120  assert(Arg < getNumMacroArguments() && "Invalid arg #");
121  // The unexpanded argument tokens start immediately after the MacroArgs object
122  // in memory.
123  const Token *Start = getTrailingObjects<Token>();
124  const Token *Result = Start;
125 
126  // Scan to find Arg.
127  for (; Arg; ++Result) {
128  assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
129  if (Result->is(tok::eof))
130  --Arg;
131  }
132  assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
133  return Result;
134 }
135 
137  Preprocessor &PP) {
138  if (!MI->isVariadic())
139  return false;
140  const int VariadicArgIndex = getNumMacroArguments() - 1;
141  return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof);
142 }
143 
144 /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
145 /// by pre-expansion, return false. Otherwise, conservatively return true.
147  Preprocessor &PP) const {
148  // If there are no identifiers in the argument list, or if the identifiers are
149  // known to not be macros, pre-expansion won't modify it.
150  for (; ArgTok->isNot(tok::eof); ++ArgTok)
151  if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
152  if (II->hasMacroDefinition())
153  // Return true even though the macro could be a function-like macro
154  // without a following '(' token, or could be disabled, or not visible.
155  return true;
156  return false;
157 }
158 
159 /// getPreExpArgument - Return the pre-expanded form of the specified
160 /// argument.
161 const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
162  Preprocessor &PP) {
163  assert(Arg < getNumMacroArguments() && "Invalid argument number!");
164 
165  // If we have already computed this, return it.
166  if (PreExpArgTokens.size() < getNumMacroArguments())
167  PreExpArgTokens.resize(getNumMacroArguments());
168 
169  std::vector<Token> &Result = PreExpArgTokens[Arg];
170  if (!Result.empty()) return Result;
171 
172  SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);
173 
174  const Token *AT = getUnexpArgument(Arg);
175  unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
176 
177  // Otherwise, we have to pre-expand this argument, populating Result. To do
178  // this, we set up a fake TokenLexer to lex from the unexpanded argument
179  // list. With this installed, we lex expanded tokens until we hit the EOF
180  // token at the end of the unexp list.
181  PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
182  false /*owns tokens*/, false /*is reinject*/);
183 
184  // Lex all of the macro-expanded tokens into Result.
185  do {
186  Result.push_back(Token());
187  Token &Tok = Result.back();
188  PP.Lex(Tok);
189  } while (Result.back().isNot(tok::eof));
190 
191  // Pop the token stream off the top of the stack. We know that the internal
192  // pointer inside of it is to the "end" of the token stream, but the stack
193  // will not otherwise be popped until the next token is lexed. The problem is
194  // that the token may be lexed sometime after the vector of tokens itself is
195  // destroyed, which would be badness.
196  if (PP.InCachingLexMode())
197  PP.ExitCachingLexMode();
199  return Result;
200 }
201 
202 
203 /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
204 /// tokens into the literal string token that should be produced by the C #
205 /// preprocessor operator. If Charify is true, then it should be turned into
206 /// a character literal for the Microsoft charize (#@) extension.
207 ///
209  Preprocessor &PP, bool Charify,
210  SourceLocation ExpansionLocStart,
211  SourceLocation ExpansionLocEnd) {
212  Token Tok;
213  Tok.startToken();
214  Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
215 
216  const Token *ArgTokStart = ArgToks;
217 
218  // Stringify all the tokens.
219  SmallString<128> Result;
220  Result += "\"";
221 
222  bool isFirst = true;
223  for (; ArgToks->isNot(tok::eof); ++ArgToks) {
224  const Token &Tok = *ArgToks;
225  if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
226  Result += ' ';
227  isFirst = false;
228 
229  // If this is a string or character constant, escape the token as specified
230  // by 6.10.3.2p2.
231  if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
232  Tok.is(tok::char_constant) || // 'x'
233  Tok.is(tok::wide_char_constant) || // L'x'.
234  Tok.is(tok::utf8_char_constant) || // u8'x'.
235  Tok.is(tok::utf16_char_constant) || // u'x'.
236  Tok.is(tok::utf32_char_constant)) { // U'x'.
237  bool Invalid = false;
238  std::string TokStr = PP.getSpelling(Tok, &Invalid);
239  if (!Invalid) {
240  std::string Str = Lexer::Stringify(TokStr);
241  Result.append(Str.begin(), Str.end());
242  }
243  } else if (Tok.is(tok::code_completion)) {
245  } else {
246  // Otherwise, just append the token. Do some gymnastics to get the token
247  // in place and avoid copies where possible.
248  unsigned CurStrLen = Result.size();
249  Result.resize(CurStrLen+Tok.getLength());
250  const char *BufPtr = Result.data() + CurStrLen;
251  bool Invalid = false;
252  unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
253 
254  if (!Invalid) {
255  // If getSpelling returned a pointer to an already uniqued version of
256  // the string instead of filling in BufPtr, memcpy it onto our string.
257  if (ActualTokLen && BufPtr != &Result[CurStrLen])
258  memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
259 
260  // If the token was dirty, the spelling may be shorter than the token.
261  if (ActualTokLen != Tok.getLength())
262  Result.resize(CurStrLen+ActualTokLen);
263  }
264  }
265  }
266 
267  // If the last character of the string is a \, and if it isn't escaped, this
268  // is an invalid string literal, diagnose it as specified in C99.
269  if (Result.back() == '\\') {
270  // Count the number of consecutive \ characters. If even, then they are
271  // just escaped backslashes, otherwise it's an error.
272  unsigned FirstNonSlash = Result.size()-2;
273  // Guaranteed to find the starting " if nothing else.
274  while (Result[FirstNonSlash] == '\\')
275  --FirstNonSlash;
276  if ((Result.size()-1-FirstNonSlash) & 1) {
277  // Diagnose errors for things like: #define F(X) #X / F(\‍)
278  PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
279  Result.pop_back(); // remove one of the \'s.
280  }
281  }
282  Result += '"';
283 
284  // If this is the charify operation and the result is not a legal character
285  // constant, diagnose it.
286  if (Charify) {
287  // First step, turn double quotes into single quotes:
288  Result[0] = '\'';
289  Result[Result.size()-1] = '\'';
290 
291  // Check for bogus character.
292  bool isBad = false;
293  if (Result.size() == 3)
294  isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
295  else
296  isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
297 
298  if (isBad) {
299  PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
300  Result = "' '"; // Use something arbitrary, but legal.
301  }
302  }
303 
304  PP.CreateString(Result, Tok,
305  ExpansionLocStart, ExpansionLocEnd);
306  return Tok;
307 }
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Preprocessor interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
One of these records is kept for each identifier that is lexed.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Definition: Lexer.cpp:310
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
Definition: MacroArgs.cpp:118
MacroArgs * deallocate()
deallocate - This should only be called by the Preprocessor when managing its freelist.
Definition: MacroArgs.cpp:91
const std::vector< Token > & getPreExpArgument(unsigned Arg, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
Definition: MacroArgs.cpp:161
static MacroArgs * create(const MacroInfo *MI, ArrayRef< Token > UnexpArgTokens, bool VarargsElided, Preprocessor &PP)
MacroArgs ctor function - Create a new MacroArgs object with the specified macro and argument info.
Definition: MacroArgs.cpp:24
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens,...
Definition: MacroArgs.cpp:108
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion,...
Definition: MacroArgs.cpp:146
bool invokedWithVariadicArgument(const MacroInfo *const MI, Preprocessor &PP)
Returns true if the macro was defined with a variadic (ellipsis) parameter AND was invoked with at le...
Definition: MacroArgs.cpp:136
unsigned getNumMacroArguments() const
getNumMacroArguments - Return the number of arguments the invoked macro expects.
Definition: MacroArgs.h:95
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
Definition: MacroArgs.cpp:208
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition: MacroArgs.cpp:78
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
bool isFunctionLike() const
Definition: MacroInfo.h:201
unsigned getNumParams() const
Definition: MacroInfo.h:184
bool isVariadic() const
Definition: MacroInfo.h:209
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void Lex(Token &Result)
Lex the next token for this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
unsigned getLength() const
Definition: Token.h:135
void setKind(tok::TokenKind K)
Definition: Token.h:95
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
tok::TokenKind getKind() const
Definition: Token.h:94
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
Definition: TokenKinds.h:89
The JSON file list parser is used to communicate input to InstallAPI.