clang  19.0.0git
DataflowAnalysisContext.h
Go to the documentation of this file.
1 //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DataflowAnalysisContext class that owns objects that
10 // encompass the state of a program and stores context that is used during
11 // dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17 
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/Expr.h"
20 #include "clang/AST/TypeOrdering.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/DenseSet.h"
29 #include "llvm/ADT/SetVector.h"
30 #include "llvm/Support/Compiler.h"
31 #include <cassert>
32 #include <memory>
33 #include <optional>
34 
35 namespace clang {
36 namespace dataflow {
37 class Logger;
38 
40  /// The maximum depth to analyze. A value of zero is equivalent to disabling
41  /// context-sensitive analysis entirely.
42  unsigned Depth = 2;
43 };
44 
45 /// Owns objects that encompass the state of a program and stores context that
46 /// is used during dataflow analysis.
48 public:
49  struct Options {
50  /// Options for analyzing function bodies when present in the translation
51  /// unit, or empty to disable context-sensitive analysis. Note that this is
52  /// fundamentally limited: some constructs, such as recursion, are
53  /// explicitly unsupported.
54  std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
55 
56  /// If provided, analysis details will be recorded here.
57  /// (This is always non-null within an AnalysisContext, the framework
58  /// provides a fallback no-op logger).
59  Logger *Log = nullptr;
60  };
61 
62  /// Constructs a dataflow analysis context.
63  ///
64  /// Requirements:
65  ///
66  /// `S` must not be null.
67  DataflowAnalysisContext(std::unique_ptr<Solver> S,
68  Options Opts = Options{
69  /*ContextSensitiveOpts=*/std::nullopt,
70  /*Logger=*/nullptr})
71  : DataflowAnalysisContext(*S, std::move(S), Opts) {}
72 
73  /// Constructs a dataflow analysis context.
74  ///
75  /// Requirements:
76  ///
77  /// `S` must outlive the `DataflowAnalysisContext`.
79  /*ContextSensitiveOpts=*/std::nullopt,
80  /*Logger=*/nullptr})
81  : DataflowAnalysisContext(S, nullptr, Opts) {}
82 
84 
85  /// Sets a callback that returns the names and types of the synthetic fields
86  /// to add to a `RecordStorageLocation` of a given type.
87  /// Typically, this is called from the constructor of a `DataflowAnalysis`
88  ///
89  /// The field types returned by the callback may not have reference type.
90  ///
91  /// To maintain the invariant that all `RecordStorageLocation`s of a given
92  /// type have the same fields:
93  /// * The callback must always return the same result for a given type
94  /// * `setSyntheticFieldCallback()` must be called before any
95  // `RecordStorageLocation`s are created.
97  std::function<llvm::StringMap<QualType>(QualType)> CB) {
98  assert(!RecordStorageLocationCreated);
99  SyntheticFieldCallback = CB;
100  }
101 
102  /// Returns a new storage location appropriate for `Type`.
103  ///
104  /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
106 
107  /// Creates a `RecordStorageLocation` for the given type and with the given
108  /// fields.
109  ///
110  /// Requirements:
111  ///
112  /// `FieldLocs` must contain exactly the fields returned by
113  /// `getModeledFields(Type)`.
114  /// `SyntheticFields` must contain exactly the fields returned by
115  /// `getSyntheticFields(Type)`.
119 
120  /// Returns a stable storage location for `D`.
122 
123  /// Returns a stable storage location for `E`.
125 
126  /// Returns a pointer value that represents a null pointer. Calls with
127  /// `PointeeType` that are canonically equivalent will return the same result.
128  /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
130 
131  /// Adds `Constraint` to current and future flow conditions in this context.
132  ///
133  /// Invariants must contain only flow-insensitive information, i.e. facts that
134  /// are true on all paths through the program.
135  /// Information can be added eagerly (when analysis begins), or lazily (e.g.
136  /// when values are first used). The analysis must be careful that the same
137  /// information is added regardless of which order blocks are analyzed in.
138  void addInvariant(const Formula &Constraint);
139 
140  /// Adds `Constraint` to the flow condition identified by `Token`.
141  void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
142 
143  /// Creates a new flow condition with the same constraints as the flow
144  /// condition identified by `Token` and returns its token.
146 
147  /// Creates a new flow condition that represents the disjunction of the flow
148  /// conditions identified by `FirstToken` and `SecondToken`, and returns its
149  /// token.
150  Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
151 
152  /// Returns true if the constraints of the flow condition identified by
153  /// `Token` imply that `F` is true.
154  /// Returns false if the flow condition does not imply `F` or if the solver
155  /// times out.
156  bool flowConditionImplies(Atom Token, const Formula &F);
157 
158  /// Returns true if the constraints of the flow condition identified by
159  /// `Token` still allow `F` to be true.
160  /// Returns false if the flow condition implies that `F` is false or if the
161  /// solver times out.
162  bool flowConditionAllows(Atom Token, const Formula &F);
163 
164  /// Returns true if `Val1` is equivalent to `Val2`.
165  /// Note: This function doesn't take into account constraints on `Val1` and
166  /// `Val2` imposed by the flow condition.
167  bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
168 
169  LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
170  llvm::raw_ostream &OS = llvm::dbgs());
171 
172  /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise,
173  /// returns null.
174  const AdornedCFG *getAdornedCFG(const FunctionDecl *F);
175 
176  const Options &getOptions() { return Opts; }
177 
178  Arena &arena() { return *A; }
179 
180  /// Returns the outcome of satisfiability checking on `Constraints`.
181  ///
182  /// Flow conditions are not incorporated, so they may need to be manually
183  /// included in `Constraints` to provide contextually-accurate results, e.g.
184  /// if any definitions or relationships of the values in `Constraints` have
185  /// been stored in flow conditions.
186  Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
187 
188  /// Returns the fields of `Type`, limited to the set of fields modeled by this
189  /// context.
191 
192  /// Returns the names and types of the synthetic fields for the given record
193  /// type.
194  llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
195  assert(Type->isRecordType());
196  if (SyntheticFieldCallback) {
197  llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type);
198  // Synthetic fields are not allowed to have reference type.
199  assert([&Result] {
200  for (const auto &Entry : Result)
201  if (Entry.getValue()->isReferenceType())
202  return false;
203  return true;
204  }());
205  return Result;
206  }
207  return {};
208  }
209 
210 private:
211  friend class Environment;
212 
213  struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
214  static QualType getEmptyKey() {
215  // Allow a NULL `QualType` by using a different value as the empty key.
216  return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
217  }
218 
219  using DenseMapInfo::getHashValue;
220  using DenseMapInfo::getTombstoneKey;
221  using DenseMapInfo::isEqual;
222  };
223 
224  /// `S` is the solver to use. `OwnedSolver` may be:
225  /// * Null (in which case `S` is non-onwed and must outlive this object), or
226  /// * Non-null (in which case it must refer to `S`, and the
227  /// `DataflowAnalysisContext will take ownership of `OwnedSolver`).
228  DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver,
229  Options Opts);
230 
231  // Extends the set of modeled field declarations.
232  void addModeledFields(const FieldSet &Fields);
233 
234  /// Adds all constraints of the flow condition identified by `Token` and all
235  /// of its transitive dependencies to `Constraints`.
236  void
237  addTransitiveFlowConditionConstraints(Atom Token,
238  llvm::SetVector<const Formula *> &Out);
239 
240  /// Returns true if the solver is able to prove that there is a satisfying
241  /// assignment for `Constraints`.
242  bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
243  return querySolver(std::move(Constraints)).getStatus() ==
245  }
246 
247  /// Returns true if the solver is able to prove that there is no satisfying
248  /// assignment for `Constraints`
249  bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
250  return querySolver(std::move(Constraints)).getStatus() ==
252  }
253 
254  Solver &S;
255  std::unique_ptr<Solver> OwnedSolver;
256  std::unique_ptr<Arena> A;
257 
258  // Maps from program declarations and statements to storage locations that are
259  // assigned to them. These assignments are global (aggregated across all basic
260  // blocks) and are used to produce stable storage locations when the same
261  // basic blocks are evaluated multiple times. The storage locations that are
262  // in scope for a particular basic block are stored in `Environment`.
263  llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
264  llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
265 
266  // Null pointer values, keyed by the canonical pointee type.
267  //
268  // FIXME: The pointer values are indexed by the pointee types which are
269  // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
270  // creating a type-independent `NullPointerValue` without a `PointeeLoc`
271  // field.
272  llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
273  NullPointerVals;
274 
275  Options Opts;
276 
277  // Flow conditions are tracked symbolically: each unique flow condition is
278  // associated with a fresh symbolic variable (token), bound to the clause that
279  // defines the flow condition. Conceptually, each binding corresponds to an
280  // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
281  // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
282  // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
283  // the `FlowConditionConstraints` map, keyed by the token of the flow
284  // condition.
285  //
286  // Flow conditions depend on other flow conditions if they are created using
287  // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
288  // dependencies is stored in the `FlowConditionDeps` map.
289  llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
290  llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
291  const Formula *Invariant = nullptr;
292 
293  llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts;
294 
295  // Fields modeled by environments covered by this context.
296  FieldSet ModeledFields;
297 
298  std::unique_ptr<Logger> LogOwner; // If created via flags.
299 
300  std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
301 
302  /// Has any `RecordStorageLocation` been created yet?
303  bool RecordStorageLocationCreated = false;
304 };
305 
306 } // namespace dataflow
307 } // namespace clang
308 
309 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
Allows QualTypes to be sorted and hence used in maps and sets.
This represents one expression.
Definition: Expr.h:110
Represents a function declaration or definition.
Definition: Decl.h:1972
A (possibly-)qualified type.
Definition: Type.h:940
static QualType getFromOpaquePtr(const void *Ptr)
Definition: Type.h:989
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1813
bool isRecordType() const
Definition: Type.h:7718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:707
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:32
The Arena owns the objects that model data within an analysis.
Definition: Arena.h:21
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
void setSyntheticFieldCallback(std::function< llvm::StringMap< QualType >(QualType)> CB)
Sets a callback that returns the names and types of the synthetic fields to add to a RecordStorageLoc...
const AdornedCFG * getAdornedCFG(const FunctionDecl *F)
Returns the AdornedCFG registered for F, if any.
DataflowAnalysisContext(std::unique_ptr< Solver > S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
Atom joinFlowConditions(Atom FirstToken, Atom SecondToken)
Creates a new flow condition that represents the disjunction of the flow conditions identified by Fir...
void addFlowConditionConstraint(Atom Token, const Formula &Constraint)
Adds Constraint to the flow condition identified by Token.
Atom forkFlowCondition(Atom Token)
Creates a new flow condition with the same constraints as the flow condition identified by Token and ...
bool equivalentFormulas(const Formula &Val1, const Formula &Val2)
Returns true if Val1 is equivalent to Val2.
StorageLocation & getStableStorageLocation(const ValueDecl &D)
Returns a stable storage location for D.
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
bool flowConditionImplies(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token imply that F is true.
Solver::Result querySolver(llvm::SetVector< const Formula * > Constraints)
Returns the outcome of satisfiability checking on Constraints.
bool flowConditionAllows(Atom Token, const Formula &F)
Returns true if the constraints of the flow condition identified by Token still allow F to be true.
PointerValue & getOrCreateNullPointerValue(QualType PointeeType)
Returns a pointer value that represents a null pointer.
void addInvariant(const Formula &Constraint)
Adds Constraint to current and future flow conditions in this context.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, llvm::raw_ostream &OS=llvm::dbgs())
DataflowAnalysisContext(Solver &S, Options Opts=Options{ std::nullopt, nullptr})
Constructs a dataflow analysis context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
Holds the state of the program (store and heap) at a given program point.
A logger is notified as the analysis progresses.
Definition: Logger.h:27
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
An interface for a SAT solver that can be used by dataflow analyses.
Definition: Solver.h:28
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
llvm::SmallSetVector< const FieldDecl *, 4 > FieldSet
A set of FieldDecl *.
Definition: ASTOps.h:41
The JSON file list parser is used to communicate input to InstallAPI.
unsigned Depth
The maximum depth to analyze.
Logger * Log
If provided, analysis details will be recorded here.
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Status getStatus() const
Returns the status of satisfiability checking on the queried boolean formula.
Definition: Solver.h:64
@ Unsatisfiable
Indicates that there is no satisfying assignment for a boolean formula.
@ Satisfiable
Indicates that there exists a satisfying assignment for a boolean formula.