clang  19.0.0git
TokenLexer.cpp
Go to the documentation of this file.
1 //===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the TokenLexer interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Lex/TokenLexer.h"
14 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Basic/TokenKinds.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Lex/MacroArgs.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/Token.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/SmallString.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/iterator_range.h"
32 #include <cassert>
33 #include <cstring>
34 #include <optional>
35 
36 using namespace clang;
37 
38 /// Create a TokenLexer for the specified macro with the specified actual
39 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
41  MacroArgs *Actuals) {
42  // If the client is reusing a TokenLexer, make sure to free any memory
43  // associated with it.
44  destroy();
45 
46  Macro = MI;
47  ActualArgs = Actuals;
48  CurTokenIdx = 0;
49 
50  ExpandLocStart = Tok.getLocation();
51  ExpandLocEnd = ELEnd;
52  AtStartOfLine = Tok.isAtStartOfLine();
53  HasLeadingSpace = Tok.hasLeadingSpace();
54  NextTokGetsSpace = false;
55  Tokens = &*Macro->tokens_begin();
56  OwnsTokens = false;
57  DisableMacroExpansion = false;
58  IsReinject = false;
59  NumTokens = Macro->tokens_end()-Macro->tokens_begin();
60  MacroExpansionStart = SourceLocation();
61 
63  MacroStartSLocOffset = SM.getNextLocalOffset();
64 
65  if (NumTokens > 0) {
66  assert(Tokens[0].getLocation().isValid());
67  assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
68  "Macro defined in macro?");
69  assert(ExpandLocStart.isValid());
70 
71  // Reserve a source location entry chunk for the length of the macro
72  // definition. Tokens that get lexed directly from the definition will
73  // have their locations pointing inside this chunk. This is to avoid
74  // creating separate source location entries for each token.
75  MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
76  MacroDefLength = Macro->getDefinitionLength(SM);
77  MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
78  ExpandLocStart,
79  ExpandLocEnd,
80  MacroDefLength);
81  }
82 
83  // If this is a function-like macro, expand the arguments and change
84  // Tokens to point to the expanded tokens.
85  if (Macro->isFunctionLike() && Macro->getNumParams())
86  ExpandFunctionArguments();
87 
88  // Mark the macro as currently disabled, so that it is not recursively
89  // expanded. The macro must be disabled only after argument pre-expansion of
90  // function-like macro arguments occurs.
91  Macro->DisableMacro();
92 }
93 
94 /// Create a TokenLexer for the specified token stream. This does not
95 /// take ownership of the specified token vector.
96 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
97  bool disableMacroExpansion, bool ownsTokens,
98  bool isReinject) {
99  assert(!isReinject || disableMacroExpansion);
100  // If the client is reusing a TokenLexer, make sure to free any memory
101  // associated with it.
102  destroy();
103 
104  Macro = nullptr;
105  ActualArgs = nullptr;
106  Tokens = TokArray;
107  OwnsTokens = ownsTokens;
108  DisableMacroExpansion = disableMacroExpansion;
109  IsReinject = isReinject;
110  NumTokens = NumToks;
111  CurTokenIdx = 0;
112  ExpandLocStart = ExpandLocEnd = SourceLocation();
113  AtStartOfLine = false;
114  HasLeadingSpace = false;
115  NextTokGetsSpace = false;
116  MacroExpansionStart = SourceLocation();
117 
118  // Set HasLeadingSpace/AtStartOfLine so that the first token will be
119  // returned unmodified.
120  if (NumToks != 0) {
121  AtStartOfLine = TokArray[0].isAtStartOfLine();
122  HasLeadingSpace = TokArray[0].hasLeadingSpace();
123  }
124 }
125 
126 void TokenLexer::destroy() {
127  // If this was a function-like macro that actually uses its arguments, delete
128  // the expanded tokens.
129  if (OwnsTokens) {
130  delete [] Tokens;
131  Tokens = nullptr;
132  OwnsTokens = false;
133  }
134 
135  // TokenLexer owns its formal arguments.
136  if (ActualArgs) ActualArgs->destroy(PP);
137 }
138 
139 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
140  SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
141  unsigned MacroArgNo, Preprocessor &PP) {
142  // Is the macro argument __VA_ARGS__?
143  if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1)
144  return false;
145 
146  // In Microsoft-compatibility mode, a comma is removed in the expansion
147  // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
148  // not supported by gcc.
149  if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
150  return false;
151 
152  // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
153  // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
154  // named arguments, where it remains. In all other modes, including C99
155  // with GNU extensions, it is removed regardless of named arguments.
156  // Microsoft also appears to support this extension, unofficially.
157  if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
158  && Macro->getNumParams() < 2)
159  return false;
160 
161  // Is a comma available to be removed?
162  if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
163  return false;
164 
165  // Issue an extension diagnostic for the paste operator.
166  if (HasPasteOperator)
167  PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
168 
169  // Remove the comma.
170  ResultToks.pop_back();
171 
172  if (!ResultToks.empty()) {
173  // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
174  // then removal of the comma should produce a placemarker token (in C99
175  // terms) which we model by popping off the previous ##, giving us a plain
176  // "X" when __VA_ARGS__ is empty.
177  if (ResultToks.back().is(tok::hashhash))
178  ResultToks.pop_back();
179 
180  // Remember that this comma was elided.
181  ResultToks.back().setFlag(Token::CommaAfterElided);
182  }
183 
184  // Never add a space, even if the comma, ##, or arg had a space.
185  NextTokGetsSpace = false;
186  return true;
187 }
188 
189 void TokenLexer::stringifyVAOPTContents(
190  SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx,
191  const SourceLocation VAOPTClosingParenLoc) {
192  const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt();
193  const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt;
194  Token *const VAOPTTokens =
195  NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr;
196 
197  SmallVector<Token, 64> ConcatenatedVAOPTResultToks;
198  // FIXME: Should we keep track within VCtx that we did or didnot
199  // encounter pasting - and only then perform this loop.
200 
201  // Perform token pasting (concatenation) prior to stringization.
202  for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens;
203  ++CurTokenIdx) {
204  if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) {
205  assert(CurTokenIdx != 0 &&
206  "Can not have __VAOPT__ contents begin with a ##");
207  Token &LHS = VAOPTTokens[CurTokenIdx - 1];
208  pasteTokens(LHS, llvm::ArrayRef(VAOPTTokens, NumVAOptTokens),
209  CurTokenIdx);
210  // Replace the token prior to the first ## in this iteration.
211  ConcatenatedVAOPTResultToks.back() = LHS;
212  if (CurTokenIdx == NumVAOptTokens)
213  break;
214  }
215  ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]);
216  }
217 
218  ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok());
219  // Get the SourceLocation that represents the start location within
220  // the macro definition that marks where this string is substituted
221  // into: i.e. the __VA_OPT__ and the ')' within the spelling of the
222  // macro definition, and use it to indicate that the stringified token
223  // was generated from that location.
224  const SourceLocation ExpansionLocStartWithinMacro =
225  getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc());
226  const SourceLocation ExpansionLocEndWithinMacro =
227  getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc);
228 
229  Token StringifiedVAOPT = MacroArgs::StringifyArgument(
230  &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/,
231  ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro);
232 
234  StringifiedVAOPT.setFlag(Token::LeadingSpace);
235 
236  StringifiedVAOPT.setFlag(Token::StringifiedInMacro);
237  // Resize (shrink) the token stream to just capture this stringified token.
238  ResultToks.resize(NumToksPriorToVAOpt + 1);
239  ResultToks.back() = StringifiedVAOPT;
240 }
241 
242 /// Expand the arguments of a function-like macro so that we can quickly
243 /// return preexpanded tokens from Tokens.
244 void TokenLexer::ExpandFunctionArguments() {
245  SmallVector<Token, 128> ResultToks;
246 
247  // Loop through 'Tokens', expanding them into ResultToks. Keep
248  // track of whether we change anything. If not, no need to keep them. If so,
249  // we install the newly expanded sequence as the new 'Tokens' list.
250  bool MadeChange = false;
251 
252  std::optional<bool> CalledWithVariadicArguments;
253 
254  VAOptExpansionContext VCtx(PP);
255 
256  for (unsigned I = 0, E = NumTokens; I != E; ++I) {
257  const Token &CurTok = Tokens[I];
258  // We don't want a space for the next token after a paste
259  // operator. In valid code, the token will get smooshed onto the
260  // preceding one anyway. In assembler-with-cpp mode, invalid
261  // pastes are allowed through: in this case, we do not want the
262  // extra whitespace to be added. For example, we want ". ## foo"
263  // -> ".foo" not ". foo".
264  if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
265  NextTokGetsSpace = true;
266 
267  if (VCtx.isVAOptToken(CurTok)) {
268  MadeChange = true;
269  assert(Tokens[I + 1].is(tok::l_paren) &&
270  "__VA_OPT__ must be followed by '('");
271 
272  ++I; // Skip the l_paren
274  ResultToks.size());
275 
276  continue;
277  }
278 
279  // We have entered into the __VA_OPT__ context, so handle tokens
280  // appropriately.
281  if (VCtx.isInVAOpt()) {
282  // If we are about to process a token that is either an argument to
283  // __VA_OPT__ or its closing rparen, then:
284  // 1) If the token is the closing rparen that exits us out of __VA_OPT__,
285  // perform any necessary stringification or placemarker processing,
286  // and/or skip to the next token.
287  // 2) else if macro was invoked without variadic arguments skip this
288  // token.
289  // 3) else (macro was invoked with variadic arguments) process the token
290  // normally.
291 
292  if (Tokens[I].is(tok::l_paren))
293  VCtx.sawOpeningParen(Tokens[I].getLocation());
294  // Continue skipping tokens within __VA_OPT__ if the macro was not
295  // called with variadic arguments, else let the rest of the loop handle
296  // this token. Note sawClosingParen() returns true only if the r_paren matches
297  // the closing r_paren of the __VA_OPT__.
298  if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) {
299  // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__.
300  if (!CalledWithVariadicArguments) {
301  CalledWithVariadicArguments =
302  ActualArgs->invokedWithVariadicArgument(Macro, PP);
303  }
304  if (!*CalledWithVariadicArguments) {
305  // Skip this token.
306  continue;
307  }
308  // ... else the macro was called with variadic arguments, and we do not
309  // have a closing rparen - so process this token normally.
310  } else {
311  // Current token is the closing r_paren which marks the end of the
312  // __VA_OPT__ invocation, so handle any place-marker pasting (if
313  // empty) by removing hashhash either before (if exists) or after. And
314  // also stringify the entire contents if VAOPT was preceded by a hash,
315  // but do so only after any token concatenation that needs to occur
316  // within the contents of VAOPT.
317 
318  if (VCtx.hasStringifyOrCharifyBefore()) {
319  // Replace all the tokens just added from within VAOPT into a single
320  // stringified token. This requires token-pasting to eagerly occur
321  // within these tokens. If either the contents of VAOPT were empty
322  // or the macro wasn't called with any variadic arguments, the result
323  // is a token that represents an empty string.
324  stringifyVAOPTContents(ResultToks, VCtx,
325  /*ClosingParenLoc*/ Tokens[I].getLocation());
326 
327  } else if (/*No tokens within VAOPT*/
328  ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) {
329  // Treat VAOPT as a placemarker token. Eat either the '##' before the
330  // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
331  // hashhash was not a placemarker) or the '##'
332  // after VAOPT, but not both.
333 
334  if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) {
335  ResultToks.pop_back();
336  } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) {
337  ++I; // Skip the following hashhash.
338  }
339  } else {
340  // If there's a ## before the __VA_OPT__, we might have discovered
341  // that the __VA_OPT__ begins with a placeholder. We delay action on
342  // that to now to avoid messing up our stashed count of tokens before
343  // __VA_OPT__.
344  if (VCtx.beginsWithPlaceholder()) {
345  assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 &&
346  ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() &&
347  ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is(
348  tok::hashhash) &&
349  "no token paste before __VA_OPT__");
350  ResultToks.erase(ResultToks.begin() +
351  VCtx.getNumberOfTokensPriorToVAOpt() - 1);
352  }
353  // If the expansion of __VA_OPT__ ends with a placeholder, eat any
354  // following '##' token.
355  if (VCtx.endsWithPlaceholder() && I + 1 != E &&
356  Tokens[I + 1].is(tok::hashhash)) {
357  ++I;
358  }
359  }
360  VCtx.reset();
361  // We processed __VA_OPT__'s closing paren (and the exit out of
362  // __VA_OPT__), so skip to the next token.
363  continue;
364  }
365  }
366 
367  // If we found the stringify operator, get the argument stringified. The
368  // preprocessor already verified that the following token is a macro
369  // parameter or __VA_OPT__ when the #define was lexed.
370 
371  if (CurTok.isOneOf(tok::hash, tok::hashat)) {
372  int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo());
373  assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) &&
374  "Token following # is not an argument or __VA_OPT__!");
375 
376  if (ArgNo == -1) {
377  // Handle the __VA_OPT__ case.
378  VCtx.sawHashOrHashAtBefore(NextTokGetsSpace,
379  CurTok.is(tok::hashat));
380  continue;
381  }
382  // Else handle the simple argument case.
383  SourceLocation ExpansionLocStart =
384  getExpansionLocForMacroDefLoc(CurTok.getLocation());
385  SourceLocation ExpansionLocEnd =
386  getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation());
387 
388  bool Charify = CurTok.is(tok::hashat);
389  const Token *UnexpArg = ActualArgs->getUnexpArgument(ArgNo);
391  UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd);
393 
394  // The stringified/charified string leading space flag gets set to match
395  // the #/#@ operator.
396  if (NextTokGetsSpace)
398 
399  ResultToks.push_back(Res);
400  MadeChange = true;
401  ++I; // Skip arg name.
402  NextTokGetsSpace = false;
403  continue;
404  }
405 
406  // Find out if there is a paste (##) operator before or after the token.
407  bool NonEmptyPasteBefore =
408  !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
409  bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash);
410  bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash);
411  bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren);
412 
413  assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) &&
414  "unexpected ## in ResultToks");
415 
416  // Otherwise, if this is not an argument token, just add the token to the
417  // output buffer.
418  IdentifierInfo *II = CurTok.getIdentifierInfo();
419  int ArgNo = II ? Macro->getParameterNum(II) : -1;
420  if (ArgNo == -1) {
421  // This isn't an argument, just add it.
422  ResultToks.push_back(CurTok);
423 
424  if (NextTokGetsSpace) {
425  ResultToks.back().setFlag(Token::LeadingSpace);
426  NextTokGetsSpace = false;
427  } else if (PasteBefore && !NonEmptyPasteBefore)
428  ResultToks.back().clearFlag(Token::LeadingSpace);
429 
430  continue;
431  }
432 
433  // An argument is expanded somehow, the result is different than the
434  // input.
435  MadeChange = true;
436 
437  // Otherwise, this is a use of the argument.
438 
439  // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
440  // are no trailing commas if __VA_ARGS__ is empty.
441  if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
442  MaybeRemoveCommaBeforeVaArgs(ResultToks,
443  /*HasPasteOperator=*/false,
444  Macro, ArgNo, PP))
445  continue;
446 
447  // If it is not the LHS/RHS of a ## operator, we must pre-expand the
448  // argument and substitute the expanded tokens into the result. This is
449  // C99 6.10.3.1p1.
450  if (!PasteBefore && !PasteAfter) {
451  const Token *ResultArgToks;
452 
453  // Only preexpand the argument if it could possibly need it. This
454  // avoids some work in common cases.
455  const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
456  if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
457  ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
458  else
459  ResultArgToks = ArgTok; // Use non-preexpanded tokens.
460 
461  // If the arg token expanded into anything, append it.
462  if (ResultArgToks->isNot(tok::eof)) {
463  size_t FirstResult = ResultToks.size();
464  unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
465  ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
466 
467  // In Microsoft-compatibility mode, we follow MSVC's preprocessing
468  // behavior by not considering single commas from nested macro
469  // expansions as argument separators. Set a flag on the token so we can
470  // test for this later when the macro expansion is processed.
471  if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
472  ResultToks.back().is(tok::comma))
473  ResultToks.back().setFlag(Token::IgnoredComma);
474 
475  // If the '##' came from expanding an argument, turn it into 'unknown'
476  // to avoid pasting.
477  for (Token &Tok : llvm::drop_begin(ResultToks, FirstResult))
478  if (Tok.is(tok::hashhash))
479  Tok.setKind(tok::unknown);
480 
481  if(ExpandLocStart.isValid()) {
482  updateLocForMacroArgTokens(CurTok.getLocation(),
483  ResultToks.begin()+FirstResult,
484  ResultToks.end());
485  }
486 
487  // If any tokens were substituted from the argument, the whitespace
488  // before the first token should match the whitespace of the arg
489  // identifier.
490  ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
491  NextTokGetsSpace);
492  ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
493  NextTokGetsSpace = false;
494  } else {
495  // We're creating a placeholder token. Usually this doesn't matter,
496  // but it can affect paste behavior when at the start or end of a
497  // __VA_OPT__.
498  if (NonEmptyPasteBefore) {
499  // We're imagining a placeholder token is inserted here. If this is
500  // the first token in a __VA_OPT__ after a ##, delete the ##.
501  assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__");
503  } else if (RParenAfter)
505  }
506  continue;
507  }
508 
509  // Okay, we have a token that is either the LHS or RHS of a paste (##)
510  // argument. It gets substituted as its non-pre-expanded tokens.
511  const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
512  unsigned NumToks = MacroArgs::getArgLength(ArgToks);
513  if (NumToks) { // Not an empty argument?
514  bool VaArgsPseudoPaste = false;
515  // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
516  // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
517  // the expander tries to paste ',' with the first token of the __VA_ARGS__
518  // expansion.
519  if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
520  ResultToks[ResultToks.size()-2].is(tok::comma) &&
521  (unsigned)ArgNo == Macro->getNumParams()-1 &&
522  Macro->isVariadic()) {
523  VaArgsPseudoPaste = true;
524  // Remove the paste operator, report use of the extension.
525  PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
526  }
527 
528  ResultToks.append(ArgToks, ArgToks+NumToks);
529 
530  // If the '##' came from expanding an argument, turn it into 'unknown'
531  // to avoid pasting.
532  for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
533  ResultToks.end())) {
534  if (Tok.is(tok::hashhash))
535  Tok.setKind(tok::unknown);
536  }
537 
538  if (ExpandLocStart.isValid()) {
539  updateLocForMacroArgTokens(CurTok.getLocation(),
540  ResultToks.end()-NumToks, ResultToks.end());
541  }
542 
543  // Transfer the leading whitespace information from the token
544  // (the macro argument) onto the first token of the
545  // expansion. Note that we don't do this for the GNU
546  // pseudo-paste extension ", ## __VA_ARGS__".
547  if (!VaArgsPseudoPaste) {
548  ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine,
549  false);
550  ResultToks[ResultToks.size() - NumToks].setFlagValue(
551  Token::LeadingSpace, NextTokGetsSpace);
552  }
553 
554  NextTokGetsSpace = false;
555  continue;
556  }
557 
558  // If an empty argument is on the LHS or RHS of a paste, the standard (C99
559  // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
560  // implement this by eating ## operators when a LHS or RHS expands to
561  // empty.
562  if (PasteAfter) {
563  // Discard the argument token and skip (don't copy to the expansion
564  // buffer) the paste operator after it.
565  ++I;
566  continue;
567  }
568 
569  if (RParenAfter && !NonEmptyPasteBefore)
571 
572  // If this is on the RHS of a paste operator, we've already copied the
573  // paste operator to the ResultToks list, unless the LHS was empty too.
574  // Remove it.
575  assert(PasteBefore);
576  if (NonEmptyPasteBefore) {
577  assert(ResultToks.back().is(tok::hashhash));
578  // Do not remove the paste operator if it is the one before __VA_OPT__
579  // (and we are still processing tokens within VA_OPT). We handle the case
580  // of removing the paste operator if __VA_OPT__ reduces to the notional
581  // placemarker above when we encounter the closing paren of VA_OPT.
582  if (!VCtx.isInVAOpt() ||
583  ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
584  ResultToks.pop_back();
585  else
587  }
588 
589  // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
590  // and if the macro had at least one real argument, and if the token before
591  // the ## was a comma, remove the comma. This is a GCC extension which is
592  // disabled when using -std=c99.
593  if (ActualArgs->isVarargsElidedUse())
594  MaybeRemoveCommaBeforeVaArgs(ResultToks,
595  /*HasPasteOperator=*/true,
596  Macro, ArgNo, PP);
597  }
598 
599  // If anything changed, install this as the new Tokens list.
600  if (MadeChange) {
601  assert(!OwnsTokens && "This would leak if we already own the token list");
602  // This is deleted in the dtor.
603  NumTokens = ResultToks.size();
604  // The tokens will be added to Preprocessor's cache and will be removed
605  // when this TokenLexer finishes lexing them.
606  Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
607 
608  // The preprocessor cache of macro expanded tokens owns these tokens,not us.
609  OwnsTokens = false;
610  }
611 }
612 
613 /// Checks if two tokens form wide string literal.
614 static bool isWideStringLiteralFromMacro(const Token &FirstTok,
615  const Token &SecondTok) {
616  return FirstTok.is(tok::identifier) &&
617  FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
618  SecondTok.stringifiedInMacro();
619 }
620 
621 /// Lex - Lex and return a token from this macro stream.
622 bool TokenLexer::Lex(Token &Tok) {
623  // Lexing off the end of the macro, pop this macro off the expansion stack.
624  if (isAtEnd()) {
625  // If this is a macro (not a token stream), mark the macro enabled now
626  // that it is no longer being expanded.
627  if (Macro) Macro->EnableMacro();
628 
629  Tok.startToken();
630  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
631  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
632  if (CurTokenIdx == 0)
634  return PP.HandleEndOfTokenLexer(Tok);
635  }
636 
638 
639  // If this is the first token of the expanded result, we inherit spacing
640  // properties later.
641  bool isFirstToken = CurTokenIdx == 0;
642 
643  // Get the next token to return.
644  Tok = Tokens[CurTokenIdx++];
645  if (IsReinject)
647 
648  bool TokenIsFromPaste = false;
649 
650  // If this token is followed by a token paste (##) operator, paste the tokens!
651  // Note that ## is a normal token when not expanding a macro.
652  if (!isAtEnd() && Macro &&
653  (Tokens[CurTokenIdx].is(tok::hashhash) ||
654  // Special processing of L#x macros in -fms-compatibility mode.
655  // Microsoft compiler is able to form a wide string literal from
656  // 'L#macro_arg' construct in a function-like macro.
657  (PP.getLangOpts().MSVCCompat &&
658  isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) {
659  // When handling the microsoft /##/ extension, the final token is
660  // returned by pasteTokens, not the pasted token.
661  if (pasteTokens(Tok))
662  return true;
663 
664  TokenIsFromPaste = true;
665  }
666 
667  // The token's current location indicate where the token was lexed from. We
668  // need this information to compute the spelling of the token, but any
669  // diagnostics for the expanded token should appear as if they came from
670  // ExpansionLoc. Pull this information together into a new SourceLocation
671  // that captures all of this.
672  if (ExpandLocStart.isValid() && // Don't do this for token streams.
673  // Check that the token's location was not already set properly.
674  SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
675  SourceLocation instLoc;
676  if (Tok.is(tok::comment)) {
677  instLoc = SM.createExpansionLoc(Tok.getLocation(),
678  ExpandLocStart,
679  ExpandLocEnd,
680  Tok.getLength());
681  } else {
682  instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
683  }
684 
685  Tok.setLocation(instLoc);
686  }
687 
688  // If this is the first token, set the lexical properties of the token to
689  // match the lexical properties of the macro identifier.
690  if (isFirstToken) {
691  Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
692  Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
693  } else {
694  // If this is not the first token, we may still need to pass through
695  // leading whitespace if we've expanded a macro.
696  if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
697  if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
698  }
699  AtStartOfLine = false;
700  HasLeadingSpace = false;
701 
702  // Handle recursive expansion!
703  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
704  // Change the kind of this identifier to the appropriate token kind, e.g.
705  // turning "for" into a keyword.
706  IdentifierInfo *II = Tok.getIdentifierInfo();
707  Tok.setKind(II->getTokenID());
708 
709  // If this identifier was poisoned and from a paste, emit an error. This
710  // won't be handled by Preprocessor::HandleIdentifier because this is coming
711  // from a macro expansion.
712  if (II->isPoisoned() && TokenIsFromPaste) {
713  PP.HandlePoisonedIdentifier(Tok);
714  }
715 
716  if (!DisableMacroExpansion && II->isHandleIdentifierCase())
717  return PP.HandleIdentifier(Tok);
718  }
719 
720  // Otherwise, return a normal token.
721  return true;
722 }
723 
724 bool TokenLexer::pasteTokens(Token &Tok) {
725  return pasteTokens(Tok, llvm::ArrayRef(Tokens, NumTokens), CurTokenIdx);
726 }
727 
728 /// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##
729 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
730 /// are more ## after it, chomp them iteratively. Return the result as LHSTok.
731 /// If this returns true, the caller should immediately return the token.
732 bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
733  unsigned int &CurIdx) {
734  assert(CurIdx > 0 && "## can not be the first token within tokens");
735  assert((TokenStream[CurIdx].is(tok::hashhash) ||
736  (PP.getLangOpts().MSVCCompat &&
737  isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) &&
738  "Token at this Index must be ## or part of the MSVC 'L "
739  "#macro-arg' pasting pair");
740 
741  // MSVC: If previous token was pasted, this must be a recovery from an invalid
742  // paste operation. Ignore spaces before this token to mimic MSVC output.
743  // Required for generating valid UUID strings in some MS headers.
744  if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) &&
745  TokenStream[CurIdx - 2].is(tok::hashhash))
747 
748  SmallString<128> Buffer;
749  const char *ResultTokStrPtr = nullptr;
750  SourceLocation StartLoc = LHSTok.getLocation();
751  SourceLocation PasteOpLoc;
752 
753  auto IsAtEnd = [&TokenStream, &CurIdx] {
754  return TokenStream.size() == CurIdx;
755  };
756 
757  do {
758  // Consume the ## operator if any.
759  PasteOpLoc = TokenStream[CurIdx].getLocation();
760  if (TokenStream[CurIdx].is(tok::hashhash))
761  ++CurIdx;
762  assert(!IsAtEnd() && "No token on the RHS of a paste operator!");
763 
764  // Get the RHS token.
765  const Token &RHS = TokenStream[CurIdx];
766 
767  // Allocate space for the result token. This is guaranteed to be enough for
768  // the two tokens.
769  Buffer.resize(LHSTok.getLength() + RHS.getLength());
770 
771  // Get the spelling of the LHS token in Buffer.
772  const char *BufPtr = &Buffer[0];
773  bool Invalid = false;
774  unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid);
775  if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
776  memcpy(&Buffer[0], BufPtr, LHSLen);
777  if (Invalid)
778  return true;
779 
780  BufPtr = Buffer.data() + LHSLen;
781  unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
782  if (Invalid)
783  return true;
784  if (RHSLen && BufPtr != &Buffer[LHSLen])
785  // Really, we want the chars in Buffer!
786  memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
787 
788  // Trim excess space.
789  Buffer.resize(LHSLen+RHSLen);
790 
791  // Plop the pasted result (including the trailing newline and null) into a
792  // scratch buffer where we can lex it.
793  Token ResultTokTmp;
794  ResultTokTmp.startToken();
795 
796  // Claim that the tmp token is a string_literal so that we can get the
797  // character pointer back from CreateString in getLiteralData().
798  ResultTokTmp.setKind(tok::string_literal);
799  PP.CreateString(Buffer, ResultTokTmp);
800  SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
801  ResultTokStrPtr = ResultTokTmp.getLiteralData();
802 
803  // Lex the resultant pasted token into Result.
804  Token Result;
805 
806  if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
807  // Common paste case: identifier+identifier = identifier. Avoid creating
808  // a lexer and other overhead.
809  PP.IncrementPasteCounter(true);
810  Result.startToken();
811  Result.setKind(tok::raw_identifier);
812  Result.setRawIdentifierData(ResultTokStrPtr);
813  Result.setLocation(ResultTokLoc);
814  Result.setLength(LHSLen+RHSLen);
815  } else {
816  PP.IncrementPasteCounter(false);
817 
818  assert(ResultTokLoc.isFileID() &&
819  "Should be a raw location into scratch buffer");
820  SourceManager &SourceMgr = PP.getSourceManager();
821  FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
822 
823  bool Invalid = false;
824  const char *ScratchBufStart
825  = SourceMgr.getBufferData(LocFileID, &Invalid).data();
826  if (Invalid)
827  return false;
828 
829  // Make a lexer to lex this string from. Lex just this one token.
830  // Make a lexer object so that we lex and expand the paste result.
831  Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
832  PP.getLangOpts(), ScratchBufStart,
833  ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
834 
835  // Lex a token in raw mode. This way it won't look up identifiers
836  // automatically, lexing off the end will return an eof token, and
837  // warnings are disabled. This returns true if the result token is the
838  // entire buffer.
839  bool isInvalid = !TL.LexFromRawLexer(Result);
840 
841  // If we got an EOF token, we didn't form even ONE token. For example, we
842  // did "/ ## /" to get "//".
843  isInvalid |= Result.is(tok::eof);
844 
845  // If pasting the two tokens didn't form a full new token, this is an
846  // error. This occurs with "x ## +" and other stuff. Return with LHSTok
847  // unmodified and with RHS as the next token to lex.
848  if (isInvalid) {
849  // Explicitly convert the token location to have proper expansion
850  // information so that the user knows where it came from.
853  SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
854 
855  // Test for the Microsoft extension of /##/ turning into // here on the
856  // error path.
857  if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) &&
858  RHS.is(tok::slash)) {
859  HandleMicrosoftCommentPaste(LHSTok, Loc);
860  return true;
861  }
862 
863  // Do not emit the error when preprocessing assembler code.
864  if (!PP.getLangOpts().AsmPreprocessor) {
865  // If we're in microsoft extensions mode, downgrade this from a hard
866  // error to an extension that defaults to an error. This allows
867  // disabling it.
868  PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
869  : diag::err_pp_bad_paste)
870  << Buffer;
871  }
872 
873  // An error has occurred so exit loop.
874  break;
875  }
876 
877  // Turn ## into 'unknown' to avoid # ## # from looking like a paste
878  // operator.
879  if (Result.is(tok::hashhash))
880  Result.setKind(tok::unknown);
881  }
882 
883  // Transfer properties of the LHS over the Result.
884  Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine());
885  Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace());
886 
887  // Finally, replace LHS with the result, consume the RHS, and iterate.
888  ++CurIdx;
889  LHSTok = Result;
890  } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash));
891 
892  SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation();
893 
894  // The token's current location indicate where the token was lexed from. We
895  // need this information to compute the spelling of the token, but any
896  // diagnostics for the expanded token should appear as if the token was
897  // expanded from the full ## expression. Pull this information together into
898  // a new SourceLocation that captures all of this.
900  if (StartLoc.isFileID())
901  StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
902  if (EndLoc.isFileID())
903  EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
904  FileID MacroFID = SM.getFileID(MacroExpansionStart);
905  while (SM.getFileID(StartLoc) != MacroFID)
906  StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin();
907  while (SM.getFileID(EndLoc) != MacroFID)
908  EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
909 
910  LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc,
911  LHSTok.getLength()));
912 
913  // Now that we got the result token, it will be subject to expansion. Since
914  // token pasting re-lexes the result token in raw mode, identifier information
915  // isn't looked up. As such, if the result is an identifier, look up id info.
916  if (LHSTok.is(tok::raw_identifier)) {
917  // Look up the identifier info for the token. We disabled identifier lookup
918  // by saying we're skipping contents, so we need to do this manually.
919  PP.LookUpIdentifierInfo(LHSTok);
920  }
921  return false;
922 }
923 
924 /// isNextTokenLParen - If the next token lexed will pop this macro off the
925 /// expansion stack, return 2. If the next unexpanded token is a '(', return
926 /// 1, otherwise return 0.
928  // Out of tokens?
929  if (isAtEnd())
930  return 2;
931  return Tokens[CurTokenIdx].is(tok::l_paren);
932 }
933 
934 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
935 /// preprocessor directive.
937  return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
938 }
939 
940 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
941 /// together to form a comment that comments out everything in the current
942 /// macro, other active macros, and anything left on the current physical
943 /// source line of the expanded buffer. Handle this by returning the
944 /// first token on the next line.
945 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
946  PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
947 
948  // We 'comment out' the rest of this macro by just ignoring the rest of the
949  // tokens that have not been lexed yet, if any.
950 
951  // Since this must be a macro, mark the macro enabled now that it is no longer
952  // being expanded.
953  assert(Macro && "Token streams can't paste comments");
954  Macro->EnableMacro();
955 
957 }
958 
959 /// If \arg loc is a file ID and points inside the current macro
960 /// definition, returns the appropriate source location pointing at the
961 /// macro expansion source location entry, otherwise it returns an invalid
962 /// SourceLocation.
964 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
965  assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
966  "Not appropriate for token streams");
967  assert(loc.isValid() && loc.isFileID());
968 
970  assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
971  "Expected loc to come from the macro definition");
972 
973  SourceLocation::UIntTy relativeOffset = 0;
974  SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
975  return MacroExpansionStart.getLocWithOffset(relativeOffset);
976 }
977 
978 /// Finds the tokens that are consecutive (from the same FileID)
979 /// creates a single SLocEntry, and assigns SourceLocations to each token that
980 /// point to that SLocEntry. e.g for
981 /// assert(foo == bar);
982 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
983 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
984 ///
985 /// \arg begin_tokens will be updated to a position past all the found
986 /// consecutive tokens.
988  SourceLocation ExpandLoc,
989  Token *&begin_tokens,
990  Token * end_tokens) {
991  assert(begin_tokens + 1 < end_tokens);
992  SourceLocation BeginLoc = begin_tokens->getLocation();
993  llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
995 
996  auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
997  // The maximum distance between two consecutive tokens in a partition.
998  // This is an important trick to avoid using too much SourceLocation address
999  // space!
1000  static constexpr SourceLocation::IntTy MaxDistance = 50;
1001  auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
1002  Last = Loc;
1003  return Distance <= MaxDistance;
1004  };
1005 
1006  // Partition the tokens by their FileID.
1007  // This is a hot function, and calling getFileID can be expensive, the
1008  // implementation is optimized by reducing the number of getFileID.
1009  if (BeginLoc.isFileID()) {
1010  // Consecutive tokens not written in macros must be from the same file.
1011  // (Neither #include nor eof can occur inside a macro argument.)
1012  Partition = All.take_while([&](const Token &T) {
1013  return T.getLocation().isFileID() && NearLast(T.getLocation());
1014  });
1015  } else {
1016  // Call getFileID once to calculate the bounds, and use the cheaper
1017  // sourcelocation-against-bounds comparison.
1018  FileID BeginFID = SM.getFileID(BeginLoc);
1019  SourceLocation Limit =
1020  SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
1021  Partition = All.take_while([&](const Token &T) {
1022  // NOTE: the Limit is included! The lexer recovery only ever inserts a
1023  // single token past the end of the FileID, specifically the ) when a
1024  // macro-arg containing a comma should be guarded by parentheses.
1025  //
1026  // It is safe to include the Limit here because SourceManager allocates
1027  // FileSize + 1 for each SLocEntry.
1028  //
1029  // See https://github.com/llvm/llvm-project/issues/60722.
1030  return T.getLocation() >= BeginLoc && T.getLocation() <= Limit
1031  && NearLast(T.getLocation());
1032  });
1033  }
1034  assert(!Partition.empty());
1035 
1036  // For the consecutive tokens, find the length of the SLocEntry to contain
1037  // all of them.
1038  SourceLocation::UIntTy FullLength =
1039  Partition.back().getEndLoc().getRawEncoding() -
1040  Partition.front().getLocation().getRawEncoding();
1041  // Create a macro expansion SLocEntry that will "contain" all of the tokens.
1042  SourceLocation Expansion =
1043  SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);
1044 
1045 #ifdef EXPENSIVE_CHECKS
1046  assert(llvm::all_of(Partition.drop_front(),
1047  [&SM, ID = SM.getFileID(Partition.front().getLocation())](
1048  const Token &T) {
1049  return ID == SM.getFileID(T.getLocation());
1050  }) &&
1051  "Must have the same FIleID!");
1052 #endif
1053  // Change the location of the tokens from the spelling location to the new
1054  // expanded location.
1055  for (Token& T : Partition) {
1056  SourceLocation::IntTy RelativeOffset =
1057  T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
1058  T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
1059  }
1060  begin_tokens = &Partition.back() + 1;
1061 }
1062 
1063 /// Creates SLocEntries and updates the locations of macro argument
1064 /// tokens to their new expanded locations.
1065 ///
1066 /// \param ArgIdSpellLoc the location of the macro argument id inside the macro
1067 /// definition.
1068 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
1069  Token *begin_tokens,
1070  Token *end_tokens) {
1072 
1073  SourceLocation ExpandLoc =
1074  getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
1075 
1076  while (begin_tokens < end_tokens) {
1077  // If there's only one token just create a SLocEntry for it.
1078  if (end_tokens - begin_tokens == 1) {
1079  Token &Tok = *begin_tokens;
1080  Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
1081  ExpandLoc,
1082  Tok.getLength()));
1083  return;
1084  }
1085 
1086  updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
1087  }
1088 }
1089 
1090 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
1091  AtStartOfLine = Result.isAtStartOfLine();
1092  HasLeadingSpace = Result.hasLeadingSpace();
1093 }
static char ID
Definition: Arena.cpp:183
#define SM(sm)
Definition: Cuda.cpp:83
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Preprocessor interface.
SourceLocation Loc
Definition: SemaObjC.cpp:755
Defines the clang::SourceLocation class and associated facilities.
static bool isInvalid(LocType Loc, bool *Invalid)
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok)
Checks if two tokens form wide string literal.
Definition: TokenLexer.cpp:614
static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation ExpandLoc, Token *&begin_tokens, Token *end_tokens)
Finds the tokens that are consecutive (from the same FileID) creates a single SLocEntry,...
Definition: TokenLexer.cpp:987
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
void setLocation(SourceLocation L)
Definition: DeclBase.h:446
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:78
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
Definition: MacroArgs.cpp:118
const std::vector< Token > & getPreExpArgument(unsigned Arg, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
Definition: MacroArgs.cpp:161
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens,...
Definition: MacroArgs.cpp:108
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion,...
Definition: MacroArgs.cpp:146
bool invokedWithVariadicArgument(const MacroInfo *const MI, Preprocessor &PP)
Returns true if the macro was defined with a variadic (ellipsis) parameter AND was invoked with at le...
Definition: MacroArgs.cpp:136
bool isVarargsElidedUse() const
isVarargsElidedUse - Return true if this is a C99 style varargs macro invocation and there was no arg...
Definition: MacroArgs.h:102
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
Definition: MacroArgs.cpp:208
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition: MacroArgs.cpp:78
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
bool isFunctionLike() const
Definition: MacroInfo.h:201
const_tokens_iterator tokens_begin() const
Definition: MacroInfo.h:244
const_tokens_iterator tokens_end() const
Definition: MacroInfo.h:245
unsigned getNumParams() const
Definition: MacroInfo.h:184
unsigned getDefinitionLength(const SourceManager &SM) const
Get length in characters of the macro definition.
Definition: MacroInfo.h:134
void DisableMacro()
Definition: MacroInfo.h:288
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
SourceManager & getSourceManager() const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
const LangOptions & getLangOpts() const
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
bool isParsingPreprocessorDirective() const
isParsingPreprocessorDirective - Return true if we are in the middle of a preprocessor directive.
Definition: TokenLexer.cpp:936
unsigned isNextTokenLParen() const
If the next token lexed will pop this macro off the expansion stack, return 2.
Definition: TokenLexer.cpp:927
bool Lex(Token &Tok)
Lex and return a token from this macro stream.
Definition: TokenLexer.cpp:622
void Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, MacroArgs *Actuals)
Initialize this TokenLexer to expand from the specified macro with the specified argument information...
Definition: TokenLexer.cpp:40
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:110
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:254
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
unsigned getLength() const
Definition: Token.h:135
void setKind(tok::TokenKind K)
Definition: Token.h:95
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:225
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
@ IgnoredComma
Definition: Token.h:84
@ IsReinjected
Definition: Token.h:89
@ LeadingEmptyMacro
Definition: Token.h:81
@ LeadingSpace
Definition: Token.h:77
@ StartOfLine
Definition: Token.h:75
@ StringifiedInMacro
Definition: Token.h:85
@ CommaAfterElided
Definition: Token.h:87
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280
void setLocation(SourceLocation L)
Definition: Token.h:140
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:121
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:310
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:267
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:244
A class for tracking whether we're inside a VA_OPT during a traversal of the tokens of a macro during...
bool isInVAOpt() const
Returns true if we have seen the VA_OPT and '(' but before having seen the matching ')'.
bool isVAOptToken(const Token &T) const
void sawHashOrHashAtBefore(const bool HasLeadingSpace, const bool IsHashAt)
SourceLocation getVAOptLoc() const
unsigned int getNumberOfTokensPriorToVAOpt() const
void sawOpeningParen(SourceLocation LParenLoc)
Call this function each time an lparen is seen.
bool sawClosingParen()
Call this function each time an rparen is seen.
void sawVAOptFollowedByOpeningParens(const SourceLocation VAOptLoc, const unsigned int NumPriorTokens)
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T