clang  19.0.0git
ExplodedGraph.cpp
Go to the documentation of this file.
1 //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the template classes ExplodedNode and ExplodedGraph,
10 // which represent a path-sensitive, intra-procedural "exploded graph."
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprObjC.h"
17 #include "clang/AST/ParentMap.h"
18 #include "clang/AST/Stmt.h"
22 #include "clang/Basic/LLVM.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/FoldingSet.h"
28 #include "llvm/ADT/PointerUnion.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Casting.h"
31 #include <cassert>
32 #include <memory>
33 #include <optional>
34 
35 using namespace clang;
36 using namespace ento;
37 
38 //===----------------------------------------------------------------------===//
39 // Cleanup.
40 //===----------------------------------------------------------------------===//
41 
43 
45 
46 //===----------------------------------------------------------------------===//
47 // Node reclamation.
48 //===----------------------------------------------------------------------===//
49 
51  if (!Ex->isLValue())
52  return false;
53  return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex);
54 }
55 
56 bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
57  // First, we only consider nodes for reclamation of the following
58  // conditions apply:
59  //
60  // (1) 1 predecessor (that has one successor)
61  // (2) 1 successor (that has one predecessor)
62  //
63  // If a node has no successor it is on the "frontier", while a node
64  // with no predecessor is a root.
65  //
66  // After these prerequisites, we discard all "filler" nodes that
67  // are used only for intermediate processing, and are not essential
68  // for analyzer history:
69  //
70  // (a) PreStmtPurgeDeadSymbols
71  //
72  // We then discard all other nodes where *all* of the following conditions
73  // apply:
74  //
75  // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
76  // (4) There is no 'tag' for the ProgramPoint.
77  // (5) The 'store' is the same as the predecessor.
78  // (6) The 'GDM' is the same as the predecessor.
79  // (7) The LocationContext is the same as the predecessor.
80  // (8) Expressions that are *not* lvalue expressions.
81  // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
82  // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
83  // PreImplicitCall (so that we would be able to find it when retrying a
84  // call with no inlining).
85  // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
86 
87  // Conditions 1 and 2.
88  if (node->pred_size() != 1 || node->succ_size() != 1)
89  return false;
90 
91  const ExplodedNode *pred = *(node->pred_begin());
92  if (pred->succ_size() != 1)
93  return false;
94 
95  const ExplodedNode *succ = *(node->succ_begin());
96  if (succ->pred_size() != 1)
97  return false;
98 
99  // Now reclaim any nodes that are (by definition) not essential to
100  // analysis history and are not consulted by any client code.
101  ProgramPoint progPoint = node->getLocation();
102  if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
103  return !progPoint.getTag();
104 
105  // Condition 3.
106  if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
107  return false;
108 
109  // Condition 4.
110  if (progPoint.getTag())
111  return false;
112 
113  // Conditions 5, 6, and 7.
114  ProgramStateRef state = node->getState();
115  ProgramStateRef pred_state = pred->getState();
116  if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
117  progPoint.getLocationContext() != pred->getLocationContext())
118  return false;
119 
120  // All further checks require expressions. As per #3, we know that we have
121  // a PostStmt.
122  const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
123  if (!Ex)
124  return false;
125 
126  // Condition 8.
127  // Do not collect nodes for "interesting" lvalue expressions since they are
128  // used extensively for generating path diagnostics.
129  if (isInterestingLValueExpr(Ex))
130  return false;
131 
132  // Condition 9.
133  // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
134  // diagnostic generation; specifically, so that we could anchor arrows
135  // pointing to the beginning of statements (as written in code).
136  const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
137  if (!PM.isConsumedExpr(Ex))
138  return false;
139 
140  // Condition 10.
141  const ProgramPoint SuccLoc = succ->getLocation();
142  if (std::optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
143  if (CallEvent::isCallStmt(SP->getStmt()))
144  return false;
145 
146  // Condition 10, continuation.
147  if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
148  return false;
149 
150  return true;
151 }
152 
153 void ExplodedGraph::collectNode(ExplodedNode *node) {
154  // Removing a node means:
155  // (a) changing the predecessors successor to the successor of this node
156  // (b) changing the successors predecessor to the predecessor of this node
157  // (c) Putting 'node' onto freeNodes.
158  assert(node->pred_size() == 1 || node->succ_size() == 1);
159  ExplodedNode *pred = *(node->pred_begin());
160  ExplodedNode *succ = *(node->succ_begin());
161  pred->replaceSuccessor(succ);
162  succ->replacePredecessor(pred);
163  FreeNodes.push_back(node);
164  Nodes.RemoveNode(node);
165  --NumNodes;
166  node->~ExplodedNode();
167 }
168 
170  if (ChangedNodes.empty())
171  return;
172 
173  // Only periodically reclaim nodes so that we can build up a set of
174  // nodes that meet the reclamation criteria. Freshly created nodes
175  // by definition have no successor, and thus cannot be reclaimed (see below).
176  assert(ReclaimCounter > 0);
177  if (--ReclaimCounter != 0)
178  return;
180 
181  for (const auto node : ChangedNodes)
182  if (shouldCollect(node))
183  collectNode(node);
184  ChangedNodes.clear();
185 }
186 
187 //===----------------------------------------------------------------------===//
188 // ExplodedNode.
189 //===----------------------------------------------------------------------===//
190 
191 // An NodeGroup's storage type is actually very much like a TinyPtrVector:
192 // it can be either a pointer to a single ExplodedNode, or a pointer to a
193 // BumpVector allocated with the ExplodedGraph's allocator. This allows the
194 // common case of single-node NodeGroups to be implemented with no extra memory.
195 //
196 // Consequently, each of the NodeGroup methods have up to four cases to handle:
197 // 1. The flag is set and this group does not actually contain any nodes.
198 // 2. The group is empty, in which case the storage value is null.
199 // 3. The group contains a single node.
200 // 4. The group contains more than one node.
202 using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
203 
205  assert(!V->isSink());
206  Preds.addNode(V, G);
207  V->Succs.addNode(this, G);
208 }
209 
210 void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
211  assert(!getFlag());
212 
213  GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
214  assert(Storage.is<ExplodedNode *>());
215  Storage = node;
216  assert(Storage.is<ExplodedNode *>());
217 }
218 
219 void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
220  assert(!getFlag());
221 
222  GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
223  if (Storage.isNull()) {
224  Storage = N;
225  assert(Storage.is<ExplodedNode *>());
226  return;
227  }
228 
230 
231  if (!V) {
232  // Switch from single-node to multi-node representation.
233  ExplodedNode *Old = Storage.get<ExplodedNode *>();
234 
236  V = new (G.getAllocator()) ExplodedNodeVector(Ctx, 4);
237  V->push_back(Old, Ctx);
238 
239  Storage = V;
240  assert(!getFlag());
241  assert(Storage.is<ExplodedNodeVector *>());
242  }
243 
244  V->push_back(N, G.getNodeAllocator());
245 }
246 
247 unsigned ExplodedNode::NodeGroup::size() const {
248  if (getFlag())
249  return 0;
250 
251  const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
252  if (Storage.isNull())
253  return 0;
254  if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
255  return V->size();
256  return 1;
257 }
258 
259 ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
260  if (getFlag())
261  return nullptr;
262 
263  const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
264  if (Storage.isNull())
265  return nullptr;
266  if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
267  return V->begin();
268  return Storage.getAddrOfPtr1();
269 }
270 
271 ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
272  if (getFlag())
273  return nullptr;
274 
275  const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
276  if (Storage.isNull())
277  return nullptr;
278  if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
279  return V->end();
280  return Storage.getAddrOfPtr1() + 1;
281 }
282 
284  return pred_size() == 1 && succ_size() == 1 &&
285  getFirstPred()->getState()->getID() == getState()->getID() &&
286  getFirstPred()->succ_size() == 1;
287 }
288 
291  if (auto BEP = P.getAs<BlockEntrance>())
292  return BEP->getBlock();
293 
294  // Find the node's current statement in the CFG.
295  // FIXME: getStmtForDiagnostics() does nasty things in order to provide
296  // a valid statement for body farms, do we need this behavior here?
297  if (const Stmt *S = getStmtForDiagnostics())
298  return getLocationContext()
300  ->getCFGStmtMap()
301  ->getBlock(S);
302 
303  return nullptr;
304 }
305 
306 static const LocationContext *
309  const LocationContext *ParentLC = LC->getParent();
310  assert(ParentLC && "We don't start analysis from autosynthesized code");
311  while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
312  LC = ParentLC;
313  ParentLC = LC->getParent();
314  assert(ParentLC && "We don't start analysis from autosynthesized code");
315  }
316  return LC;
317 }
318 
320  // We cannot place diagnostics on autosynthesized code.
321  // Put them onto the call site through which we jumped into autosynthesized
322  // code for the first time.
323  const LocationContext *LC = getLocationContext();
325  // It must be a stack frame because we only autosynthesize functions.
326  return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC))
327  ->getCallSite();
328  }
329  // Otherwise, see if the node's program point directly points to a statement.
330  // FIXME: Refactor into a ProgramPoint method?
332  if (auto SP = P.getAs<StmtPoint>())
333  return SP->getStmt();
334  if (auto BE = P.getAs<BlockEdge>())
335  return BE->getSrc()->getTerminatorStmt();
336  if (auto CE = P.getAs<CallEnter>())
337  return CE->getCallExpr();
338  if (auto CEE = P.getAs<CallExitEnd>())
339  return CEE->getCalleeContext()->getCallSite();
340  if (auto PIPP = P.getAs<PostInitializer>())
341  return PIPP->getInitializer()->getInit();
342  if (auto CEB = P.getAs<CallExitBegin>())
343  return CEB->getReturnStmt();
344  if (auto FEP = P.getAs<FunctionExitPoint>())
345  return FEP->getStmt();
346 
347  return nullptr;
348 }
349 
351  for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
352  if (const Stmt *S = N->getStmtForDiagnostics()) {
353  // Check if the statement is '?' or '&&'/'||'. These are "merges",
354  // not actual statement points.
355  switch (S->getStmtClass()) {
356  case Stmt::ChooseExprClass:
357  case Stmt::BinaryConditionalOperatorClass:
358  case Stmt::ConditionalOperatorClass:
359  continue;
360  case Stmt::BinaryOperatorClass: {
361  BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
362  if (Op == BO_LAnd || Op == BO_LOr)
363  continue;
364  break;
365  }
366  default:
367  break;
368  }
369  // We found the statement, so return it.
370  return S;
371  }
372  }
373 
374  return nullptr;
375 }
376 
378  for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
379  if (const Stmt *S = N->getStmtForDiagnostics())
380  return S;
381 
382  return nullptr;
383 }
384 
386  if (const Stmt *S = getStmtForDiagnostics())
387  return S;
388 
390 }
391 
394  bool IsSink,
395  bool* IsNew) {
396  // Profile 'State' to determine if we already have an existing node.
397  llvm::FoldingSetNodeID profile;
398  void *InsertPos = nullptr;
399 
400  NodeTy::Profile(profile, L, State, IsSink);
401  NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
402 
403  if (!V) {
404  if (!FreeNodes.empty()) {
405  V = FreeNodes.back();
406  FreeNodes.pop_back();
407  }
408  else {
409  // Allocate a new node.
410  V = getAllocator().Allocate<NodeTy>();
411  }
412 
413  ++NumNodes;
414  new (V) NodeTy(L, State, NumNodes, IsSink);
415 
417  ChangedNodes.push_back(V);
418 
419  // Insert the node into the node set and return it.
420  Nodes.InsertNode(V, InsertPos);
421 
422  if (IsNew) *IsNew = true;
423  }
424  else
425  if (IsNew) *IsNew = false;
426 
427  return V;
428 }
429 
432  int64_t Id,
433  bool IsSink) {
434  NodeTy *V = getAllocator().Allocate<NodeTy>();
435  new (V) NodeTy(L, State, Id, IsSink);
436  return V;
437 }
438 
439 std::unique_ptr<ExplodedGraph>
441  InterExplodedGraphMap *ForwardMap,
442  InterExplodedGraphMap *InverseMap) const {
443  if (Nodes.empty())
444  return nullptr;
445 
446  using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
447  Pass1Ty Pass1;
448 
449  using Pass2Ty = InterExplodedGraphMap;
450  InterExplodedGraphMap Pass2Scratch;
451  Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
452 
454 
455  // ===- Pass 1 (reverse DFS) -===
456  for (const auto Sink : Sinks)
457  if (Sink)
458  WL1.push_back(Sink);
459 
460  // Process the first worklist until it is empty.
461  while (!WL1.empty()) {
462  const ExplodedNode *N = WL1.pop_back_val();
463 
464  // Have we already visited this node? If so, continue to the next one.
465  if (!Pass1.insert(N).second)
466  continue;
467 
468  // If this is a root enqueue it to the second worklist.
469  if (N->Preds.empty()) {
470  WL2.push_back(N);
471  continue;
472  }
473 
474  // Visit our predecessors and enqueue them.
475  WL1.append(N->Preds.begin(), N->Preds.end());
476  }
477 
478  // We didn't hit a root? Return with a null pointer for the new graph.
479  if (WL2.empty())
480  return nullptr;
481 
482  // Create an empty graph.
483  std::unique_ptr<ExplodedGraph> G = MakeEmptyGraph();
484 
485  // ===- Pass 2 (forward DFS to construct the new graph) -===
486  while (!WL2.empty()) {
487  const ExplodedNode *N = WL2.pop_back_val();
488 
489  // Skip this node if we have already processed it.
490  if (Pass2.contains(N))
491  continue;
492 
493  // Create the corresponding node in the new graph and record the mapping
494  // from the old node to the new node.
495  ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
496  N->getID(), N->isSink());
497  Pass2[N] = NewN;
498 
499  // Also record the reverse mapping from the new node to the old node.
500  if (InverseMap) (*InverseMap)[NewN] = N;
501 
502  // If this node is a root, designate it as such in the graph.
503  if (N->Preds.empty())
504  G->addRoot(NewN);
505 
506  // In the case that some of the intended predecessors of NewN have already
507  // been created, we should hook them up as predecessors.
508 
509  // Walk through the predecessors of 'N' and hook up their corresponding
510  // nodes in the new graph (if any) to the freshly created node.
511  for (const ExplodedNode *Pred : N->Preds) {
512  Pass2Ty::iterator PI = Pass2.find(Pred);
513  if (PI == Pass2.end())
514  continue;
515 
516  NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
517  }
518 
519  // In the case that some of the intended successors of NewN have already
520  // been created, we should hook them up as successors. Otherwise, enqueue
521  // the new nodes from the original graph that should have nodes created
522  // in the new graph.
523  for (const ExplodedNode *Succ : N->Succs) {
524  Pass2Ty::iterator PI = Pass2.find(Succ);
525  if (PI != Pass2.end()) {
526  const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
527  continue;
528  }
529 
530  // Enqueue nodes to the worklist that were marked during pass 1.
531  if (Pass1.count(Succ))
532  WL2.push_back(Succ);
533  }
534  }
535 
536  return G;
537 }
#define V(N, I)
Definition: ASTContext.h:3299
int Id
Definition: ASTDiff.cpp:190
StringRef P
llvm::PointerUnion< ExplodedNode *, ExplodedNodeVector * > GroupStorage
BumpVector< ExplodedNode * > ExplodedNodeVector
static const LocationContext * findTopAutosynthesizedParentContext(const LocationContext *LC)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
LineState State
Represents a single basic block in a source-level CFG.
Definition: CFG.h:604
CFGBlock * getBlock(Stmt *S)
Returns the CFGBlock the specified Stmt* appears in.
Definition: CFGStmtMap.cpp:27
Represents a point when we begin processing an inlined call.
Definition: ProgramPoint.h:628
Represents a point when we start the call exit sequence (for inlined call).
Definition: ProgramPoint.h:666
Represents a point when we finish the call exit sequence (for inlined call).
Definition: ProgramPoint.h:686
This represents one expression.
Definition: Expr.h:110
bool isLValue() const
isLValue - True if this expression is an "l-value" according to the rules of the current language.
Definition: Expr.h:277
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
const LocationContext * getParent() const
It might return null.
const ParentMap & getParentMap() const
LLVM_ATTRIBUTE_RETURNS_NONNULL AnalysisDeclContext * getAnalysisDeclContext() const
bool isConsumedExpr(Expr *E) const
Definition: ParentMap.cpp:188
Represents a program point after a store evaluation.
Definition: ProgramPoint.h:426
Represents a program point just before an implicit call event.
Definition: ProgramPoint.h:579
Represents a point after we ran remove dead bindings BEFORE processing the given statement.
Definition: ProgramPoint.h:468
std::optional< T > getAs() const
Convert to the specified ProgramPoint type, returning std::nullopt if this ProgramPoint is not of the...
Definition: ProgramPoint.h:147
T castAs() const
Convert to the specified ProgramPoint type, asserting that this ProgramPoint is of the desired type.
Definition: ProgramPoint.h:137
const ProgramPointTag * getTag() const
Definition: ProgramPoint.h:173
const LocationContext * getLocationContext() const
Definition: ProgramPoint.h:175
const Stmt * getStmt() const
Definition: ProgramPoint.h:274
Stmt - This represents one statement.
Definition: Stmt.h:84
static bool isCallStmt(const Stmt *S)
Returns true if this is a statement is a function or method call of some kind.
Definition: CallEvent.cpp:347
std::unique_ptr< ExplodedGraph > trim(ArrayRef< const NodeTy * > Nodes, InterExplodedGraphMap *ForwardMap=nullptr, InterExplodedGraphMap *InverseMap=nullptr) const
Creates a trimmed version of the graph that only contains paths leading to the given nodes.
unsigned ReclaimCounter
Counter to determine when to reclaim nodes.
NodeVector ChangedNodes
A list of recently allocated nodes that can potentially be recycled.
int64_t NumNodes
NumNodes - The number of nodes in the graph.
BumpVectorContext & getNodeAllocator()
unsigned ReclaimNodeInterval
Determines how often nodes are reclaimed.
void reclaimRecentlyAllocatedNodes()
Reclaim "uninteresting" nodes created since the last time this method was called.
static bool isInterestingLValueExpr(const Expr *Ex)
Returns true if nodes for the given expression kind are always kept around.
std::unique_ptr< ExplodedGraph > MakeEmptyGraph() const
ExplodedNode * getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink=false, bool *IsNew=nullptr)
Retrieve the node associated with a (Location,State) pair, where the 'Location' is a ProgramPoint in ...
ExplodedNode * createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink=false)
Create a node for a (Location, State) pair, but don't store it for deduplication later.
llvm::FoldingSet< ExplodedNode > Nodes
Nodes - The nodes in the graph.
llvm::BumpPtrAllocator & getAllocator()
NodeVector FreeNodes
A list of nodes that can be reused.
ExplodedNode * getFirstPred()
const CFGBlock * getCFGBlock() const
ExplodedNode * getFirstSucc()
const ProgramStateRef & getState() const
const LocationContext * getLocationContext() const
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
bool isTrivial() const
The node is trivial if it has only one successor, only one predecessor, it's predecessor has only one...
const Stmt * getPreviousStmtForDiagnostics() const
Find the statement that was executed immediately before this node.
ProgramPoint getLocation() const
getLocation - Returns the edge associated with the given node.
void addPredecessor(ExplodedNode *V, ExplodedGraph &G)
addPredeccessor - Adds a predecessor to the current node, and in tandem add this node as a successor ...
unsigned pred_size() const
static void Profile(llvm::FoldingSetNodeID &ID, const ProgramPoint &Loc, const ProgramStateRef &state, bool IsSink)
const Stmt * getNextStmtForDiagnostics() const
Find the next statement that was executed on this node's execution path.
const Stmt * getCurrentOrPreviousStmtForDiagnostics() const
Find the statement that was executed at or immediately before this node.
unsigned succ_size() const
llvm::DenseMap< const ExplodedNode *, const ExplodedNode * > InterExplodedGraphMap
RangeSelector node(std::string ID)
Selects a node, including trailing semicolon, if any (for declarations and non-expression statements)...
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind
long int64_t