clang  19.0.0git
RangeSelector.cpp
Go to the documentation of this file.
1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "clang/AST/Expr.h"
11 #include "clang/AST/TypeLoc.h"
14 #include "clang/Lex/Lexer.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
18 #include "llvm/Support/Error.h"
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 using namespace clang;
24 using namespace transformer;
25 
27 using llvm::Error;
28 using llvm::StringError;
29 
31 
32 static Error invalidArgumentError(Twine Message) {
33  return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
34 }
35 
36 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
37  return invalidArgumentError("mismatched type (node id=" + ID +
38  " kind=" + Kind.asStringRef() + ")");
39 }
40 
41 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
42  Twine ExpectedType) {
43  return invalidArgumentError("mismatched type: expected one of " +
44  ExpectedType + " (node id=" + ID +
45  " kind=" + Kind.asStringRef() + ")");
46 }
47 
48 static Error missingPropertyError(StringRef ID, Twine Description,
49  StringRef Property) {
50  return invalidArgumentError(Description + " requires property '" + Property +
51  "' (node id=" + ID + ")");
52 }
53 
55  StringRef ID) {
56  auto &NodesMap = Nodes.getMap();
57  auto It = NodesMap.find(ID);
58  if (It == NodesMap.end())
59  return invalidArgumentError("ID not bound: " + ID);
60  return It->second;
61 }
62 
63 // FIXME: handling of macros should be configurable.
65  const SourceManager &SM,
66  const LangOptions &LangOpts) {
67  if (Start.isInvalid() || Start.isMacroID())
68  return SourceLocation();
69 
70  SourceLocation BeforeStart = Start.getLocWithOffset(-1);
71  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
72  return SourceLocation();
73 
74  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
75 }
76 
77 // Finds the start location of the previous token of kind \p TK.
78 // FIXME: handling of macros should be configurable.
80  const SourceManager &SM,
81  const LangOptions &LangOpts,
82  tok::TokenKind TK) {
83  while (true) {
84  SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
85  if (L.isInvalid() || L.isMacroID())
86  return SourceLocation();
87 
88  Token T;
89  if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
90  return SourceLocation();
91 
92  if (T.is(TK))
93  return T.getLocation();
94 
95  Start = L;
96  }
97 }
98 
100  const LangOptions &LangOpts) {
101  SourceLocation EndLoc =
102  E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
103  return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
104 }
105 
107  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
108  Expected<CharSourceRange> SelectedRange = Selector(Result);
109  if (!SelectedRange)
110  return SelectedRange.takeError();
111  return CharSourceRange::getCharRange(SelectedRange->getBegin());
112  };
113 }
114 
116  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
117  Expected<CharSourceRange> SelectedRange = Selector(Result);
118  if (!SelectedRange)
119  return SelectedRange.takeError();
120  SourceLocation End = SelectedRange->getEnd();
121  if (SelectedRange->isTokenRange()) {
122  // We need to find the actual (exclusive) end location from which to
123  // create a new source range. However, that's not guaranteed to be valid,
124  // even if the token location itself is valid. So, we create a token range
125  // consisting only of the last token, then map that range back to the
126  // source file. If that succeeds, we have a valid location for the end of
127  // the generated range.
129  CharSourceRange::getTokenRange(SelectedRange->getEnd()),
130  *Result.SourceManager, Result.Context->getLangOpts());
131  if (Range.isInvalid())
132  return invalidArgumentError(
133  "after: can't resolve sub-range to valid source range");
134  End = Range.getEnd();
135  }
136 
138  };
139 }
140 
142  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
143  Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
144  if (!Node)
145  return Node.takeError();
146  return (Node->get<Decl>() != nullptr ||
147  (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
148  ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
149  *Result.Context)
151  };
152 }
153 
155  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
156  Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
157  if (!Node)
158  return Node.takeError();
159  return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
160  *Result.Context);
161  };
162 }
163 
165  return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
166  Expected<CharSourceRange> BeginRange = Begin(Result);
167  if (!BeginRange)
168  return BeginRange.takeError();
169  Expected<CharSourceRange> EndRange = End(Result);
170  if (!EndRange)
171  return EndRange.takeError();
172  SourceLocation B = BeginRange->getBegin();
173  SourceLocation E = EndRange->getEnd();
174  // Note: we are precluding the possibility of sub-token ranges in the case
175  // that EndRange is a token range.
176  if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
177  return invalidArgumentError("Bad range: out of order");
178  }
179  return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
180  };
181 }
182 
184  std::string EndID) {
185  return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
186 }
187 
189  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
190  Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
191  if (!Node)
192  return Node.takeError();
193  if (auto *M = Node->get<clang::MemberExpr>())
195  M->getMemberNameInfo().getSourceRange());
196  return typeError(ID, Node->getNodeKind(), "MemberExpr");
197  };
198 }
199 
201  return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
202  Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
203  if (!N)
204  return N.takeError();
205  auto &Node = *N;
206  if (const auto *D = Node.get<NamedDecl>()) {
207  if (!D->getDeclName().isIdentifier())
208  return missingPropertyError(ID, "name", "identifier");
209  SourceLocation L = D->getLocation();
210  auto R = CharSourceRange::getTokenRange(L, L);
211  // Verify that the range covers exactly the name.
212  // FIXME: extend this code to support cases like `operator +` or
213  // `foo<int>` for which this range will be too short. Doing so will
214  // require subcasing `NamedDecl`, because it doesn't provide virtual
215  // access to the \c DeclarationNameInfo.
216  if (tooling::getText(R, *Result.Context) != D->getName())
217  return CharSourceRange();
218  return R;
219  }
220  if (const auto *E = Node.get<DeclRefExpr>()) {
221  if (!E->getNameInfo().getName().isIdentifier())
222  return missingPropertyError(ID, "name", "identifier");
223  SourceLocation L = E->getLocation();
224  return CharSourceRange::getTokenRange(L, L);
225  }
226  if (const auto *I = Node.get<CXXCtorInitializer>()) {
227  if (!I->isMemberInitializer() && I->isWritten())
228  return missingPropertyError(ID, "name", "explicit member initializer");
229  SourceLocation L = I->getMemberLocation();
230  return CharSourceRange::getTokenRange(L, L);
231  }
232  if (const auto *T = Node.get<TypeLoc>()) {
233  TypeLoc Loc = *T;
234  auto ET = Loc.getAs<ElaboratedTypeLoc>();
235  if (!ET.isNull())
236  Loc = ET.getNamedTypeLoc();
237  if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>();
238  !SpecLoc.isNull())
239  return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc());
240  return CharSourceRange::getTokenRange(Loc.getSourceRange());
241  }
242  return typeError(ID, Node.getNodeKind(),
243  "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
244  };
245 }
246 
247 namespace {
248 // FIXME: make this available in the public API for users to easily create their
249 // own selectors.
250 
251 // Creates a selector from a range-selection function \p Func, which selects a
252 // range that is relative to a bound node id. \c T is the node type expected by
253 // \p Func.
254 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
255 class RelativeSelector {
256  std::string ID;
257 
258 public:
259  RelativeSelector(std::string ID) : ID(std::move(ID)) {}
260 
261  Expected<CharSourceRange> operator()(const MatchResult &Result) {
262  Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
263  if (!N)
264  return N.takeError();
265  if (const auto *Arg = N->get<T>())
266  return Func(Result, *Arg);
267  return typeError(ID, N->getNodeKind());
268  }
269 };
270 } // namespace
271 
272 // FIXME: Change the following functions from being in an anonymous namespace
273 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
274 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
275 // namespace works around a bug in earlier versions.
276 namespace {
277 // Returns the range of the statements (all source between the braces).
278 CharSourceRange getStatementsRange(const MatchResult &,
279  const CompoundStmt &CS) {
281  CS.getRBracLoc());
282 }
283 } // namespace
284 
286  return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
287 }
288 
289 namespace {
290 // Returns the range of the source between the call's parentheses.
291 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
292  const CallExpr &CE) {
294  findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
295  .getLocWithOffset(1),
296  CE.getRParenLoc());
297 }
298 } // namespace
299 
301  return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
302 }
303 
304 namespace {
305 // Returns the range of the elements of the initializer list. Includes all
306 // source between the braces.
307 CharSourceRange getElementsRange(const MatchResult &,
308  const InitListExpr &E) {
310  E.getRBraceLoc());
311 }
312 } // namespace
313 
315  return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
316 }
317 
318 namespace {
319 // Returns the range of the else branch, including the `else` keyword.
320 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
322  CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
323  tok::TokenKind::semi, *Result.Context);
324 }
325 } // namespace
326 
328  return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
329 }
330 
332  return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
333  Expected<CharSourceRange> SRange = S(Result);
334  if (!SRange)
335  return SRange.takeError();
336  return Result.SourceManager->getExpansionRange(*SRange);
337  };
338 }
BoundNodesTreeBuilder Nodes
DynTypedNode Node
static char ID
Definition: Arena.cpp:183
#define SM(sm)
Definition: Cuda.cpp:83
static Expected< DynTypedNode > getNode(const ast_matchers::BoundNodes &Nodes, StringRef ID)
static Error invalidArgumentError(Twine Message)
static SourceLocation findPreviousTokenKind(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts, tok::TokenKind TK)
MatchFinder::MatchResult MatchResult
static SourceLocation findPreviousTokenStart(SourceLocation Start, const SourceManager &SM, const LangOptions &LangOpts)
static Error missingPropertyError(StringRef ID, Twine Description, StringRef Property)
static Error typeError(StringRef ID, const ASTNodeKind &Kind)
static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM, const LangOptions &LangOpts)
Defines a combinator library supporting the definition of selectors, which select source ranges based...
SourceRange Range
Definition: SemaObjC.cpp:754
SourceLocation Loc
Definition: SemaObjC.cpp:755
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TypeLoc interface and its subclasses.
SourceLocation End
SourceLocation Begin
Kind identifier.
Definition: ASTTypeTraits.h:51
Represents a C++ base or member initializer.
Definition: DeclCXX.h:2300
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2872
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:3050
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:3063
SourceLocation getRParenLoc() const
Definition: Expr.h:3182
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
static CharSourceRange getTokenRange(SourceRange R)
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition: Stmt.h:1606
SourceLocation getLBracLoc() const
Definition: Stmt.h:1738
SourceLocation getRBracLoc() const
Definition: Stmt.h:1739
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1260
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
const LangOptions & getLangOpts() const LLVM_READONLY
Helper to get the language options from the ASTContext.
Definition: DeclBase.cpp:507
const T * get() const
Retrieve the stored node as type T.
ASTNodeKind getNodeKind() const
SourceRange getSourceRange() const
For nodes which represent textual entities in the source code, return their SourceRange.
This represents one expression.
Definition: Expr.h:110
IfStmt - This represents an if/then/else.
Definition: Stmt.h:2138
Describes an C or C++ initializer list.
Definition: Expr.h:4888
SourceLocation getLBraceLoc() const
Definition: Expr.h:5042
SourceLocation getRBraceLoc() const
Definition: Expr.h:5044
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:482
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
Definition: Lexer.cpp:955
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
Definition: Lexer.cpp:609
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
Definition: Lexer.cpp:510
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3224
This represents a decl that may have a name.
Definition: Decl.h:249
Smart pointer class that efficiently represents Objective-C method names.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
bool isInvalid() const
SourceLocation getEnd() const
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
Base wrapper for a particular "section" of type source info.
Definition: TypeLoc.h:59
bool isNull() const
Definition: TypeLoc.h:121
Maps string IDs to AST nodes matched by parts of a matcher.
Definition: ASTMatchers.h:109
A class to allow finding matches over the Clang AST.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
CharSourceRange getExtendedRange(const T &Node, tok::TokenKind Next, ASTContext &Context)
Returns the source range spanning the node, extended to include Next, if it immediately follows Node.
Definition: SourceCode.h:34
CharSourceRange maybeExtendRange(CharSourceRange Range, tok::TokenKind Terminator, ASTContext &Context)
Extends Range to include the token Terminator, if it immediately follows the end of the range.
Definition: SourceCode.cpp:37
StringRef getText(CharSourceRange Range, const ASTContext &Context)
Returns the source-code text in the specified range.
Definition: SourceCode.cpp:31
RangeSelector initListElements(std::string ID)
RangeSelector enclose(RangeSelector Begin, RangeSelector End)
Selects from the start of Begin and to the end of End.
RangeSelector member(std::string ID)
Given a MemberExpr, selects the member token.
RangeSelector elseBranch(std::string ID)
Given an \IfStmt (bound to ID), selects the range of the else branch, starting from the else keyword.
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
MatchConsumer< CharSourceRange > RangeSelector
Definition: RangeSelector.h:27
RangeSelector encloseNodes(std::string BeginID, std::string EndID)
Convenience version of range where end-points are bound nodes.
RangeSelector after(RangeSelector Selector)
Selects the point immediately following Selector.
RangeSelector callArgs(std::string ID)
RangeSelector before(RangeSelector Selector)
Selects the (empty) range [B,B) when Selector selects the range [B,E).
RangeSelector statement(std::string ID)
Selects a node, including trailing semicolon (always).
RangeSelector expansion(RangeSelector S)
Selects the range from which S was expanded (possibly along with other source), if S is an expansion,...
RangeSelector statements(std::string ID)
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
@ Property
The type of a property.
const FunctionProtoType * T
Definition: Format.h:5433
Contains all information for a given match.