clang  19.0.0git
MacroExpansionContext.cpp
Go to the documentation of this file.
1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/Support/Debug.h"
11 #include <optional>
12 
13 #define DEBUG_TYPE "macro-expansion-context"
14 
15 static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
16  clang::Token Tok);
17 
18 namespace clang {
19 namespace detail {
21  const Preprocessor &PP;
22  SourceManager &SM;
23  MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
24 
25 public:
27  const Preprocessor &PP, SourceManager &SM,
28  MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
29  : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
30 
31  void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
32  SourceRange Range, const MacroArgs *Args) override {
33  // Ignore annotation tokens like: _Pragma("pack(push, 1)")
34  if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
35  return;
36 
37  SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
38  assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
39 
40  const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
41  // If the range is empty, use the length of the macro.
42  if (Range.getBegin() == Range.getEnd())
43  return SM.getExpansionLoc(
44  MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
45 
46  // Include the last character.
47  return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
48  }();
49 
50  (void)PP;
51  LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
52  dumpTokenInto(PP, llvm::dbgs(), MacroName);
53  llvm::dbgs()
54  << "' with length " << MacroName.getLength() << " at ";
55  MacroNameBegin.print(llvm::dbgs(), SM);
56  llvm::dbgs() << ", expansion end at ";
57  ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
58 
59  // If the expansion range is empty, use the identifier of the macro as a
60  // range.
61  MacroExpansionContext::ExpansionRangeMap::iterator It;
62  bool Inserted;
63  std::tie(It, Inserted) =
64  ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
65  if (Inserted) {
66  LLVM_DEBUG(llvm::dbgs() << "maps ";
67  It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
68  It->getSecond().print(llvm::dbgs(), SM);
69  llvm::dbgs() << '\n';);
70  } else {
71  if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
72  It->getSecond() = ExpansionEnd;
73  LLVM_DEBUG(
74  llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
75  llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
76  llvm::dbgs() << '\n';);
77  }
78  }
79  }
80 };
81 } // namespace detail
82 } // namespace clang
83 
84 using namespace clang;
85 
87  : LangOpts(LangOpts) {}
88 
90  PP = &NewPP;
91  SM = &NewPP.getSourceManager();
92 
93  // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
94  PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
95  *PP, *SM, ExpansionRanges));
96  // Same applies here.
97  PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
98 }
99 
100 std::optional<StringRef>
102  if (MacroExpansionLoc.isMacroID())
103  return std::nullopt;
104 
105  // If there was no macro expansion at that location, return std::nullopt.
106  if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
107  return std::nullopt;
108 
109  // There was macro expansion, but resulted in no tokens, return empty string.
110  const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
111  if (It == ExpandedTokens.end())
112  return StringRef{""};
113 
114  // Otherwise we have the actual token sequence as string.
115  return It->getSecond().str();
116 }
117 
118 std::optional<StringRef>
120  if (MacroExpansionLoc.isMacroID())
121  return std::nullopt;
122 
123  const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
124  if (It == ExpansionRanges.end())
125  return std::nullopt;
126 
127  assert(It->getFirst() != It->getSecond() &&
128  "Every macro expansion must cover a non-empty range.");
129 
130  return Lexer::getSourceText(
131  CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
132  LangOpts);
133 }
134 
136  dumpExpansionRangesToStream(llvm::dbgs());
137 }
139  dumpExpandedTextsToStream(llvm::dbgs());
140 }
141 
143  std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
144  LocalExpansionRanges.reserve(ExpansionRanges.size());
145  for (const auto &Record : ExpansionRanges)
146  LocalExpansionRanges.emplace_back(
147  std::make_pair(Record.getFirst(), Record.getSecond()));
148  llvm::sort(LocalExpansionRanges);
149 
150  OS << "\n=============== ExpansionRanges ===============\n";
151  for (const auto &Record : LocalExpansionRanges) {
152  OS << "> ";
153  Record.first.print(OS, *SM);
154  OS << ", ";
155  Record.second.print(OS, *SM);
156  OS << '\n';
157  }
158 }
159 
161  std::vector<std::pair<SourceLocation, MacroExpansionText>>
162  LocalExpandedTokens;
163  LocalExpandedTokens.reserve(ExpandedTokens.size());
164  for (const auto &Record : ExpandedTokens)
165  LocalExpandedTokens.emplace_back(
166  std::make_pair(Record.getFirst(), Record.getSecond()));
167  llvm::sort(LocalExpandedTokens);
168 
169  OS << "\n=============== ExpandedTokens ===============\n";
170  for (const auto &Record : LocalExpandedTokens) {
171  OS << "> ";
172  Record.first.print(OS, *SM);
173  OS << " -> '" << Record.second << "'\n";
174  }
175 }
176 
177 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
178  assert(Tok.isNot(tok::raw_identifier));
179 
180  // Ignore annotation tokens like: _Pragma("pack(push, 1)")
181  if (Tok.isAnnotation())
182  return;
183 
184  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
185  // FIXME: For now, we don't respect whitespaces between macro expanded
186  // tokens. We just emit a space after every identifier to produce a valid
187  // code for `int a ;` like expansions.
188  // ^-^-- Space after the 'int' and 'a' identifiers.
189  OS << II->getName() << ' ';
190  } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
191  OS << StringRef(Tok.getLiteralData(), Tok.getLength());
192  } else {
193  char Tmp[256];
194  if (Tok.getLength() < sizeof(Tmp)) {
195  const char *TokPtr = Tmp;
196  // FIXME: Might use a different overload for cleaner callsite.
197  unsigned Len = PP.getSpelling(Tok, TokPtr);
198  OS.write(TokPtr, Len);
199  } else {
200  OS << "<too long token>";
201  }
202  }
203 }
204 
205 void MacroExpansionContext::onTokenLexed(const Token &Tok) {
206  SourceLocation SLoc = Tok.getLocation();
207  if (SLoc.isFileID())
208  return;
209 
210  LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
211  dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
212  SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
213 
214  // Remove spelling location.
215  SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
216 
217  MacroExpansionText TokenAsString;
218  llvm::raw_svector_ostream OS(TokenAsString);
219 
220  // FIXME: Prepend newlines and space to produce the exact same output as the
221  // preprocessor would for this token.
222 
223  dumpTokenInto(*PP, OS, Tok);
224 
225  ExpansionMap::iterator It;
226  bool Inserted;
227  std::tie(It, Inserted) =
228  ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
229  if (!Inserted)
230  It->getSecond().append(TokenAsString);
231 }
232 
#define SM(sm)
Definition: Cuda.cpp:83
void print(llvm::raw_ostream &OS, const Pointer &P, ASTContext &Ctx, QualType Ty)
llvm::raw_ostream & OS
Definition: Logger.cpp:24
llvm::MachO::Record Record
Definition: MachO.h:31
static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, clang::Token Tok)
SourceRange Range
Definition: SemaObjC.cpp:754
static CharSourceRange getCharRange(SourceRange R)
One of these records is kept for each identifier that is lexed.
StringRef getName() const
Return the actual identifier string.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:482
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
Definition: Lexer.cpp:1024
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
A description of the current definition of a macro.
Definition: MacroInfo.h:590
void registerForPreprocessor(Preprocessor &PP)
Register the necessary callbacks to the Preprocessor to record the expansion events and the generated...
LLVM_DUMP_METHOD void dumpExpandedTextsToStream(raw_ostream &OS) const
LLVM_DUMP_METHOD void dumpExpandedTexts() const
LLVM_DUMP_METHOD void dumpExpansionRanges() const
MacroExpansionContext(const LangOptions &LangOpts)
Creates a MacroExpansionContext.
LLVM_DUMP_METHOD void dumpExpansionRangesToStream(raw_ostream &OS) const
std::optional< StringRef > getExpandedText(SourceLocation MacroExpansionLoc) const
std::optional< StringRef > getOriginalText(SourceLocation MacroExpansionLoc) const
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition: PPCallbacks.h:35
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
SourceManager & getSourceManager() const
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream.
Encodes a location in the source.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:116
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
unsigned getLength() const
Definition: Token.h:135
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:225
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:121
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:295
void MacroExpands(const Token &MacroName, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
MacroExpansionRangeRecorder(const Preprocessor &PP, SourceManager &SM, MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
The JSON file list parser is used to communicate input to InstallAPI.