clang  19.0.0git
DataflowAnalysis.h
Go to the documentation of this file.
1 //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines base types and functions for building dataflow analyses
10 // that run over Control-Flow Graphs (CFGs).
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
15 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
16 
17 #include <iterator>
18 #include <optional>
19 #include <type_traits>
20 #include <utility>
21 #include <vector>
22 
23 #include "clang/AST/ASTContext.h"
24 #include "clang/Analysis/CFG.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/STLFunctionalExtras.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Support/Errc.h"
35 #include "llvm/Support/Error.h"
36 
37 namespace clang {
38 namespace dataflow {
39 
40 /// Base class template for dataflow analyses built on a single lattice type.
41 ///
42 /// Requirements:
43 ///
44 /// `Derived` must be derived from a specialization of this class template and
45 /// must provide the following public members:
46 /// * `LatticeT initialElement()` - returns a lattice element that models the
47 /// initial state of a basic block;
48 /// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies
49 /// the analysis transfer function for a given CFG element and lattice
50 /// element.
51 ///
52 /// `Derived` can optionally provide the following members:
53 /// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E,
54 /// Environment &Env)` - applies the analysis transfer
55 /// function for a given edge from a CFG block of a conditional statement.
56 ///
57 /// `Derived` can optionally override the virtual functions in the
58 /// `Environment::ValueModel` interface (which is an indirect base class of
59 /// this class).
60 ///
61 /// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
62 /// provide the following public members:
63 /// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
64 /// argument by computing their least upper bound, modifies the object if
65 /// necessary, and returns an effect indicating whether any changes were
66 /// made to it;
67 /// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)`
68 /// * `bool operator==(const LatticeT &) const` - returns true if and only if
69 /// the object is equal to the argument.
70 ///
71 /// `LatticeT` can optionally provide the following members:
72 /// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the
73 /// lattice element with an approximation that can reach a fixed point more
74 /// quickly than iterated application of the transfer function alone. The
75 /// previous value is provided to inform the choice of widened value. The
76 /// function must also serve as a comparison operation, by indicating whether
77 /// the widened value is equivalent to the previous value with the returned
78 /// `LatticeJoinEffect`.
79 template <typename Derived, typename LatticeT>
81 public:
82  /// Bounded join-semilattice that is used in the analysis.
83  using Lattice = LatticeT;
84 
85  explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
86 
87  explicit DataflowAnalysis(ASTContext &Context,
89  : TypeErasedDataflowAnalysis(Options), Context(Context) {}
90 
91  ASTContext &getASTContext() final { return Context; }
92 
94  return {static_cast<Derived *>(this)->initialElement()};
95  }
96 
98  const TypeErasedLattice &E2) final {
99  // FIXME: change the signature of join() to avoid copying here.
100  Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value);
101  const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
102  L1.join(L2);
103  return {std::move(L1)};
104  }
105 
107  const TypeErasedLattice &Previous) final {
108  Lattice &C = llvm::any_cast<Lattice &>(Current.Value);
109  const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value);
110  return widenInternal(Rank0{}, C, P);
111  }
112 
114  const TypeErasedLattice &E2) final {
115  const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
116  const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
117  return L1 == L2;
118  }
119 
121  Environment &Env) final {
122  Lattice &L = llvm::any_cast<Lattice &>(E.Value);
123  static_cast<Derived *>(this)->transfer(Element, L, Env);
124  }
125 
126  void transferBranchTypeErased(bool Branch, const Stmt *Stmt,
127  TypeErasedLattice &E, Environment &Env) final {
128  transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt,
129  E, Env);
130  }
131 
132 private:
133  // These `Rank` structs are used for template metaprogramming to choose
134  // between overloads.
135  struct Rank1 {};
136  struct Rank0 : Rank1 {};
137 
138  // The first-choice implementation: use `widen` when it is available.
139  template <typename T>
140  static auto widenInternal(Rank0, T &Current, const T &Prev)
141  -> decltype(Current.widen(Prev)) {
142  return Current.widen(Prev);
143  }
144 
145  // The second-choice implementation: `widen` is unavailable. Widening is
146  // merged with equality checking, so when widening is unimplemented, we
147  // default to equality checking.
148  static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current,
149  const Lattice &Prev) {
150  return Prev == Current ? LatticeJoinEffect::Unchanged
152  }
153 
154  // The first-choice implementation: `transferBranch` is implemented.
155  template <typename Analysis>
156  static auto transferBranchInternal(Rank0, Analysis &A, bool Branch,
157  const Stmt *Stmt, TypeErasedLattice &L,
158  Environment &Env)
159  -> std::void_t<decltype(A.transferBranch(
160  Branch, Stmt, std::declval<LatticeT &>(), Env))> {
161  A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env);
162  }
163 
164  // The second-choice implementation: `transferBranch` is unimplemented. No-op.
165  template <typename Analysis>
166  static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *,
167  TypeErasedLattice &, Environment &) {}
168 
169  ASTContext &Context;
170 };
171 
172 // Model of the program at a given program point.
173 template <typename LatticeT> struct DataflowAnalysisState {
174  // Model of a program property.
175  LatticeT Lattice;
176 
177  // Model of the state of the program (store and heap).
179 };
180 
181 /// Performs dataflow analysis and returns a mapping from basic block IDs to
182 /// dataflow analysis states that model the respective basic blocks. The
183 /// returned vector, if any, will have the same size as the number of CFG
184 /// blocks, with indices corresponding to basic block IDs. Returns an error if
185 /// the dataflow analysis cannot be performed successfully. Otherwise, calls
186 /// `PostVisitCFG` on each CFG element with the final analysis results at that
187 /// program point.
188 ///
189 /// `MaxBlockVisits` caps the number of block visits during analysis. See
190 /// `runTypeErasedDataflowAnalysis` for a full description. The default value is
191 /// essentially arbitrary -- large enough to accommodate what seems like any
192 /// reasonable CFG, but still small enough to limit the cost of hitting the
193 /// limit.
194 template <typename AnalysisT>
195 llvm::Expected<std::vector<
196  std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
198  const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv,
199  std::function<void(const CFGElement &, const DataflowAnalysisState<
200  typename AnalysisT::Lattice> &)>
201  PostVisitCFG = nullptr,
202  std::int32_t MaxBlockVisits = 20'000) {
203  std::function<void(const CFGElement &,
205  PostVisitCFGClosure = nullptr;
206  if (PostVisitCFG) {
207  PostVisitCFGClosure = [&PostVisitCFG](
208  const CFGElement &Element,
210  auto *Lattice =
211  llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value);
212  // FIXME: we should not be copying the environment here!
213  // Ultimately the PostVisitCFG only gets a const reference anyway.
215  *Lattice, State.Env.fork()});
216  };
217  }
218 
219  auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
220  ACFG, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits);
221  if (!TypeErasedBlockStates)
222  return TypeErasedBlockStates.takeError();
223 
224  std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
225  BlockStates;
226  BlockStates.reserve(TypeErasedBlockStates->size());
227 
228  llvm::transform(
229  std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates),
230  [](auto &OptState) {
231  return llvm::transformOptional(
232  std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) {
234  llvm::any_cast<typename AnalysisT::Lattice>(
235  std::move(State.Lattice.Value)),
236  std::move(State.Env)};
237  });
238  });
239  return std::move(BlockStates);
240 }
241 
242 // Create an analysis class that is derived from `DataflowAnalysis`. This is an
243 // SFINAE adapter that allows us to call two different variants of constructor
244 // (either with or without the optional `Environment` parameter).
245 // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment`
246 // parameter in their constructor so that we can get rid of this abomination.
247 template <typename AnalysisT>
249  -> decltype(AnalysisT(ASTCtx, Env)) {
250  return AnalysisT(ASTCtx, Env);
251 }
252 template <typename AnalysisT>
253 auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
254  -> decltype(AnalysisT(ASTCtx)) {
255  return AnalysisT(ASTCtx);
256 }
257 
258 /// Runs a dataflow analysis over the given function and then runs `Diagnoser`
259 /// over the results. Returns a list of diagnostics for `FuncDecl` or an
260 /// error. Currently, errors can occur (at least) because the analysis requires
261 /// too many iterations over the CFG or the SAT solver times out.
262 ///
263 /// The default value of `MaxSATIterations` was chosen based on the following
264 /// observations:
265 /// - Non-pathological calls to the solver typically require only a few hundred
266 /// iterations.
267 /// - This limit is still low enough to keep runtimes acceptable (on typical
268 /// machines) in cases where we hit the limit.
269 ///
270 /// `MaxBlockVisits` caps the number of block visits during analysis. See
271 /// `runDataflowAnalysis` for a full description and explanation of the default
272 /// value.
273 template <typename AnalysisT, typename Diagnostic>
275  const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
276  llvm::function_ref<llvm::SmallVector<Diagnostic>(
277  const CFGElement &, ASTContext &,
279  Diagnoser,
280  std::int64_t MaxSATIterations = 1'000'000'000,
281  std::int32_t MaxBlockVisits = 20'000) {
282  llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl);
283  if (!Context)
284  return Context.takeError();
285 
286  auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations);
287  DataflowAnalysisContext AnalysisContext(*Solver);
288  Environment Env(AnalysisContext, FuncDecl);
289  AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env);
290  llvm::SmallVector<Diagnostic> Diagnostics;
291  if (llvm::Error Err =
293  *Context, Analysis, Env,
294  [&ASTCtx, &Diagnoser, &Diagnostics](
295  const CFGElement &Elt,
296  const TypeErasedDataflowAnalysisState &State) mutable {
297  auto EltDiagnostics = Diagnoser(
298  Elt, ASTCtx,
300  llvm::any_cast<const typename AnalysisT::Lattice &>(
301  State.Lattice.Value),
302  State.Env));
303  llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
304  },
305  MaxBlockVisits)
306  .takeError())
307  return std::move(Err);
308 
309  if (Solver->reachedLimit())
310  return llvm::createStringError(llvm::errc::interrupted,
311  "SAT solver timed out");
312 
313  return Diagnostics;
314 }
315 
316 /// Abstract base class for dataflow "models": reusable analysis components that
317 /// model a particular aspect of program semantics in the `Environment`. For
318 /// example, a model may capture a type and its related functions.
320 public:
321  /// Return value indicates whether the model processed the `Element`.
322  virtual bool transfer(const CFGElement &Element, Environment &Env) = 0;
323 };
324 
325 } // namespace dataflow
326 } // namespace clang
327 
328 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
Defines the clang::ASTContext interface.
StringRef P
const Environment & Env
Definition: HTMLLogger.cpp:148
llvm::ArrayRef< std::optional< TypeErasedDataflowAnalysisState > > BlockStates
Stores the state of a CFG block if it has been evaluated by the analysis.
const Environment & InitEnv
Initial state to start the analysis.
TypeErasedDataflowAnalysis & Analysis
The analysis to be run.
const AdornedCFG & ACFG
Contains the CFG being analyzed.
StateNode * Previous
LineState State
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:185
Represents a top-level expression in a basic block.
Definition: CFG.h:55
Represents a function declaration or definition.
Definition: Decl.h:1972
Stmt - This represents one statement.
Definition: Stmt.h:84
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:32
static llvm::Expected< AdornedCFG > build(const FunctionDecl &Func)
Builds an AdornedCFG from a FunctionDecl.
Definition: AdornedCFG.cpp:129
Owns objects that encompass the state of a program and stores context that is used during dataflow an...
Base class template for dataflow analyses built on a single lattice type.
ASTContext & getASTContext() final
Returns the ASTContext that is used by the analysis.
LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, const TypeErasedLattice &Previous) final
Chooses a lattice element that approximates the current element at a program point,...
DataflowAnalysis(ASTContext &Context, DataflowAnalysisOptions Options)
bool isEqualTypeErased(const TypeErasedLattice &E1, const TypeErasedLattice &E2) final
Returns true if and only if the two given type-erased lattice elements are equal.
DataflowAnalysis(ASTContext &Context)
TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, const TypeErasedLattice &E2) final
Joins two type-erased lattice elements by computing their least upper bound.
void transferBranchTypeErased(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, Environment &Env) final
Applies the analysis transfer function for a given edge from a CFG block of a conditional statement.
TypeErasedLattice typeErasedInitialElement() final
Returns a type-erased lattice element that models the initial state of a basic block.
LatticeT Lattice
Bounded join-semilattice that is used in the analysis.
void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, Environment &Env) final
Applies the analysis transfer function for a given control flow graph element and type-erased lattice...
Abstract base class for dataflow "models": reusable analysis components that model a particular aspec...
virtual bool transfer(const CFGElement &Element, Environment &Env)=0
Return value indicates whether the model processed the Element.
Supplements Environment with non-standard comparison and join operations.
Holds the state of the program (store and heap) at a given program point.
An interface for a SAT solver that can be used by dataflow analyses.
Definition: Solver.h:28
virtual bool reachedLimit() const =0
Type-erased base class for dataflow analyses built on a single lattice type.
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env, Environment::ValueModel &Model)
Evaluates S and updates Env accordingly.
Definition: Transfer.cpp:865
llvm::Expected< llvm::SmallVector< Diagnostic > > diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, llvm::function_ref< llvm::SmallVector< Diagnostic >(const CFGElement &, ASTContext &, const TransferStateForDiagnostics< typename AnalysisT::Lattice > &)> Diagnoser, std::int64_t MaxSATIterations=1 '000 '000 '000, std::int32_t MaxBlockVisits=20 '000)
Runs a dataflow analysis over the given function and then runs Diagnoser over the results.
llvm::Expected< std::vector< std::optional< DataflowAnalysisState< typename AnalysisT::Lattice > > > > runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis, const Environment &InitEnv, std::function< void(const CFGElement &, const DataflowAnalysisState< typename AnalysisT::Lattice > &)> PostVisitCFG=nullptr, std::int32_t MaxBlockVisits=20 '000)
Performs dataflow analysis and returns a mapping from basic block IDs to dataflow analysis states tha...
llvm::Expected< std::vector< std::optional< TypeErasedDataflowAnalysisState > > > runTypeErasedDataflowAnalysis(const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, std::function< void(const CFGElement &, const TypeErasedDataflowAnalysisState &)> PostVisitCFG, std::int32_t MaxBlockVisits)
Performs dataflow analysis and returns a mapping from basic block IDs to dataflow analysis states tha...
LatticeEffect LatticeJoinEffect
auto createAnalysis(ASTContext &ASTCtx, Environment &Env) -> decltype(AnalysisT(ASTCtx, Env))
LatticeEffect
Effect indicating whether a lattice operation resulted in a new value.
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
long int64_t
A read-only version of TransferState.
Definition: MatchSwitch.h:55
Type-erased model of the program at a given program point.
Type-erased lattice element container.