clang  19.0.0git
DataflowAnalysisContext.cpp
Go to the documentation of this file.
1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DataflowAnalysisContext class that owns objects that
10 // encompass the state of a program and stores context that is used during
11 // dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "clang/AST/ExprCXX.h"
23 #include "llvm/ADT/SetOperations.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 #include <memory>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 static llvm::cl::opt<std::string> DataflowLog(
37  "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
38  llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
39  "log to stderr. With an arg, writes HTML logs under the "
40  "specified directory (one per analyzed function)."));
41 
42 namespace clang {
43 namespace dataflow {
44 
46  // During context-sensitive analysis, a struct may be allocated in one
47  // function, but its field accessed in a function lower in the stack than
48  // the allocation. Since we only collect fields used in the function where
49  // the allocation occurs, we can't apply that filter when performing
50  // context-sensitive analysis. But, this only applies to storage locations,
51  // since field access it not allowed to fail. In contrast, field *values*
52  // don't need this allowance, since the API allows for uninitialized fields.
53  if (Opts.ContextSensitiveOpts)
54  return getObjectFields(Type);
55 
56  return llvm::set_intersection(getObjectFields(Type), ModeledFields);
57 }
58 
59 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
60  ModeledFields.set_union(Fields);
61 }
62 
64  if (!Type.isNull() && Type->isRecordType()) {
65  llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
66  for (const FieldDecl *Field : getModeledFields(Type))
67  if (Field->getType()->isReferenceType())
68  FieldLocs.insert({Field, nullptr});
69  else
70  FieldLocs.insert({Field, &createStorageLocation(
71  Field->getType().getNonReferenceType())});
72 
74  for (const auto &Entry : getSyntheticFields(Type))
75  SyntheticFields.insert(
76  {Entry.getKey(),
77  &createStorageLocation(Entry.getValue().getNonReferenceType())});
78 
79  return createRecordStorageLocation(Type, std::move(FieldLocs),
80  std::move(SyntheticFields));
81  }
82  return arena().create<ScalarStorageLocation>(Type);
83 }
84 
85 // Returns the keys for a given `StringMap`.
86 // Can't use `StringSet` as the return type as it doesn't support `operator==`.
87 template <typename T>
88 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
89  return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
90 }
91 
92 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
95  assert(Type->isRecordType());
96  assert(containsSameFields(getModeledFields(Type), FieldLocs));
97  assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
98 
99  RecordStorageLocationCreated = true;
100  return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
101  std::move(SyntheticFields));
102 }
103 
105 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
106  if (auto *Loc = DeclToLoc.lookup(&D))
107  return *Loc;
108  auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
109  DeclToLoc[&D] = &Loc;
110  return Loc;
111 }
112 
114 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
115  const Expr &CanonE = ignoreCFGOmittedNodes(E);
116 
117  if (auto *Loc = ExprToLoc.lookup(&CanonE))
118  return *Loc;
119  auto &Loc = createStorageLocation(CanonE.getType());
120  ExprToLoc[&CanonE] = &Loc;
121  return Loc;
122 }
123 
124 PointerValue &
125 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
126  auto CanonicalPointeeType =
127  PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
128  auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
129  if (Res.second) {
130  auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
131  Res.first->second = &arena().create<PointerValue>(PointeeLoc);
132  }
133  return *Res.first->second;
134 }
135 
136 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
137  if (Invariant == nullptr)
138  Invariant = &Constraint;
139  else
140  Invariant = &arena().makeAnd(*Invariant, Constraint);
141 }
142 
143 void DataflowAnalysisContext::addFlowConditionConstraint(
144  Atom Token, const Formula &Constraint) {
145  auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
146  if (!Res.second) {
147  Res.first->second =
148  &arena().makeAnd(*Res.first->second, Constraint);
149  }
150 }
151 
152 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
153  Atom ForkToken = arena().makeFlowConditionToken();
154  FlowConditionDeps[ForkToken].insert(Token);
155  addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
156  return ForkToken;
157 }
158 
159 Atom
160 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
161  Atom SecondToken) {
162  Atom Token = arena().makeFlowConditionToken();
163  FlowConditionDeps[Token].insert(FirstToken);
164  FlowConditionDeps[Token].insert(SecondToken);
165  addFlowConditionConstraint(Token,
166  arena().makeOr(arena().makeAtomRef(FirstToken),
167  arena().makeAtomRef(SecondToken)));
168  return Token;
169 }
170 
171 Solver::Result DataflowAnalysisContext::querySolver(
172  llvm::SetVector<const Formula *> Constraints) {
173  return S.solve(Constraints.getArrayRef());
174 }
175 
176 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
177  const Formula &F) {
178  if (F.isLiteral(true))
179  return true;
180 
181  // Returns true if and only if truth assignment of the flow condition implies
182  // that `F` is also true. We prove whether or not this property holds by
183  // reducing the problem to satisfiability checking. In other words, we attempt
184  // to show that assuming `F` is false makes the constraints induced by the
185  // flow condition unsatisfiable.
186  llvm::SetVector<const Formula *> Constraints;
187  Constraints.insert(&arena().makeAtomRef(Token));
188  Constraints.insert(&arena().makeNot(F));
189  addTransitiveFlowConditionConstraints(Token, Constraints);
190  return isUnsatisfiable(std::move(Constraints));
191 }
192 
193 bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
194  const Formula &F) {
195  if (F.isLiteral(false))
196  return false;
197 
198  llvm::SetVector<const Formula *> Constraints;
199  Constraints.insert(&arena().makeAtomRef(Token));
200  Constraints.insert(&F);
201  addTransitiveFlowConditionConstraints(Token, Constraints);
202  return isSatisfiable(std::move(Constraints));
203 }
204 
205 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
206  const Formula &Val2) {
207  llvm::SetVector<const Formula *> Constraints;
208  Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
209  return isUnsatisfiable(std::move(Constraints));
210 }
211 
212 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
213  Atom Token, llvm::SetVector<const Formula *> &Constraints) {
214  llvm::DenseSet<Atom> AddedTokens;
215  std::vector<Atom> Remaining = {Token};
216 
217  if (Invariant)
218  Constraints.insert(Invariant);
219  // Define all the flow conditions that might be referenced in constraints.
220  while (!Remaining.empty()) {
221  auto Token = Remaining.back();
222  Remaining.pop_back();
223  if (!AddedTokens.insert(Token).second)
224  continue;
225 
226  auto ConstraintsIt = FlowConditionConstraints.find(Token);
227  if (ConstraintsIt == FlowConditionConstraints.end()) {
228  Constraints.insert(&arena().makeAtomRef(Token));
229  } else {
230  // Bind flow condition token via `iff` to its set of constraints:
231  // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
232  Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
233  *ConstraintsIt->second));
234  }
235 
236  if (auto DepsIt = FlowConditionDeps.find(Token);
237  DepsIt != FlowConditionDeps.end())
238  for (Atom A : DepsIt->second)
239  Remaining.push_back(A);
240  }
241 }
242 
243 static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
244  llvm::raw_ostream &OS) {
245  OS << "(";
246  for (size_t i = 0; i < Atoms.size(); ++i) {
247  OS << Atoms[i];
248  if (i + 1 < Atoms.size())
249  OS << ", ";
250  }
251  OS << ")\n";
252 }
253 
254 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
255  llvm::raw_ostream &OS) {
256  llvm::SetVector<const Formula *> Constraints;
257  Constraints.insert(&arena().makeAtomRef(Token));
258  addTransitiveFlowConditionConstraints(Token, Constraints);
259 
260  OS << "Flow condition token: " << Token << "\n";
262  llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
263  simplifyConstraints(Constraints, arena(), &Info);
264  if (!Constraints.empty()) {
265  OS << "Constraints:\n";
266  for (const auto *Constraint : Constraints) {
267  Constraint->print(OS);
268  OS << "\n";
269  }
270  }
271  if (!Info.TrueAtoms.empty()) {
272  OS << "True atoms: ";
273  printAtomList(Info.TrueAtoms, OS);
274  }
275  if (!Info.FalseAtoms.empty()) {
276  OS << "False atoms: ";
277  printAtomList(Info.FalseAtoms, OS);
278  }
279  if (!Info.EquivalentAtoms.empty()) {
280  OS << "Equivalent atoms:\n";
281  for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
282  printAtomList(Class, OS);
283  }
284 
285  OS << "\nFlow condition constraints before simplification:\n";
286  for (const auto *Constraint : OriginalConstraints) {
287  Constraint->print(OS);
288  OS << "\n";
289  }
290 }
291 
292 const AdornedCFG *
293 DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) {
294  // Canonicalize the key:
295  F = F->getDefinition();
296  if (F == nullptr)
297  return nullptr;
298  auto It = FunctionContexts.find(F);
299  if (It != FunctionContexts.end())
300  return &It->second;
301 
302  if (F->doesThisDeclarationHaveABody()) {
303  auto ACFG = AdornedCFG::build(*F);
304  // FIXME: Handle errors.
305  assert(ACFG);
306  auto Result = FunctionContexts.insert({F, std::move(*ACFG)});
307  return &Result.first->second;
308  }
309 
310  return nullptr;
311 }
312 
313 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
314  if (DataflowLog.empty())
315  return Logger::textual(llvm::errs());
316 
317  llvm::StringRef Dir = DataflowLog;
318  if (auto EC = llvm::sys::fs::create_directories(Dir))
319  llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
320  // All analysis runs within a process will log to the same directory.
321  // Share a counter so they don't all overwrite each other's 0.html.
322  // (Don't share a logger, it's not threadsafe).
323  static std::atomic<unsigned> Counter = {0};
324  auto StreamFactory =
325  [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
327  llvm::sys::path::append(File,
328  std::to_string(Counter.fetch_add(1)) + ".html");
329  std::error_code EC;
330  auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
331  if (EC) {
332  llvm::errs() << "Failed to create log " << File << ": " << EC.message()
333  << "\n";
334  return std::make_unique<llvm::raw_null_ostream>();
335  }
336  return OS;
337  };
338  return Logger::html(std::move(StreamFactory));
339 }
340 
341 DataflowAnalysisContext::DataflowAnalysisContext(
342  Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts)
343  : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()),
344  Opts(Opts) {
345  // If the -dataflow-log command-line flag was set, synthesize a logger.
346  // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
347  // based tools.
348  if (Opts.Log == nullptr) {
349  if (DataflowLog.getNumOccurrences() > 0) {
350  LogOwner = makeLoggerFromCommandLine();
351  this->Opts.Log = LogOwner.get();
352  // FIXME: if the flag is given a value, write an HTML log to a file.
353  } else {
354  this->Opts.Log = &Logger::null();
355  }
356  }
357 }
358 
359 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
360 
361 } // namespace dataflow
362 } // namespace clang
static llvm::cl::opt< std::string > DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " "log to stderr. With an arg, writes HTML logs under the " "specified directory (one per analyzed function)."))
Defines the clang::Expr interface and subclasses for C++ expressions.
SourceLocation Loc
Definition: SemaObjC.cpp:755
const AdornedCFG & ACFG
Contains the CFG being analyzed.
This represents one expression.
Definition: Expr.h:110
QualType getType() const
Definition: Expr.h:142
Represents a member of a struct/union/class.
Definition: Decl.h:3060
Represents a function declaration or definition.
Definition: Decl.h:1972
bool doesThisDeclarationHaveABody() const
Returns whether this specific declaration of the function has a body.
Definition: Decl.h:2298
FunctionDecl * getDefinition()
Get the definition for this declaration.
Definition: Decl.h:2254
A (possibly-)qualified type.
Definition: Type.h:940
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1007
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7572
QualType getCanonicalType() const
Definition: Type.h:7423
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1813
bool isRecordType() const
Definition: Type.h:7718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:707
QualType getType() const
Definition: Decl.h:718
Holds CFG with additional information derived from it that is needed to perform dataflow analysis.
Definition: AdornedCFG.h:32
llvm::StringMap< QualType > getSyntheticFields(QualType Type)
Returns the names and types of the synthetic fields for the given record type.
StorageLocation & createStorageLocation(QualType Type)
Returns a new storage location appropriate for Type.
FieldSet getModeledFields(QualType Type)
Returns the fields of Type, limited to the set of fields modeled by this context.
RecordStorageLocation & createRecordStorageLocation(QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, RecordStorageLocation::SyntheticFieldMap SyntheticFields)
Creates a RecordStorageLocation for the given type and with the given fields.
bool isLiteral(bool b) const
Definition: Formula.h:78
static Logger & null()
Returns a dummy logger that does nothing.
Definition: Logger.cpp:16
Models a symbolic pointer. Specifically, any value of type T*.
Definition: Value.h:170
A storage location for a record (struct, class, or union).
llvm::DenseMap< const ValueDecl *, StorageLocation * > FieldToLoc
llvm::StringMap< StorageLocation * > SyntheticFieldMap
A storage location that is not subdivided further for the purposes of abstract interpretation.
Base class for elements of the local variable store and of the heap.
Atom
Identifies an atomic boolean variable such as "V1".
Definition: Formula.h:34
static void printAtomList(const llvm::SmallVector< Atom > &Atoms, llvm::raw_ostream &OS)
static std::unique_ptr< Logger > makeLoggerFromCommandLine()
static llvm::DenseSet< llvm::StringRef > getKeys(const llvm::StringMap< T > &Map)
void simplifyConstraints(llvm::SetVector< const Formula * > &Constraints, Arena &arena, SimplifyConstraintsInfo *Info=nullptr)
Simplifies a set of constraints (implicitly connected by "and") in a way that does not change satisfi...
const Expr & ignoreCFGOmittedNodes(const Expr &E)
Skip past nodes that the CFG does not emit.
Definition: ASTOps.cpp:34
FieldSet getObjectFields(QualType Type)
Returns the set of all fields in the type.
Definition: ASTOps.cpp:74
bool containsSameFields(const FieldSet &Fields, const RecordStorageLocation::FieldToLoc &FieldLocs)
Returns whether Fields and FieldLocs contain the same fields.
Definition: ASTOps.cpp:80
The JSON file list parser is used to communicate input to InstallAPI.
@ Invariant
The parameter is invariant: must match exactly.
@ Class
The "class" keyword introduces the elaborated-type-specifier.
Definition: Format.h:5433
std::optional< ContextSensitiveOptions > ContextSensitiveOpts
Options for analyzing function bodies when present in the translation unit, or empty to disable conte...
Information on the way a set of constraints was simplified.
llvm::SmallVector< Atom > TrueAtoms
Atoms that the original constraints imply must be true.
llvm::SmallVector< llvm::SmallVector< Atom > > EquivalentAtoms
List of equivalence classes of atoms.
llvm::SmallVector< Atom > FalseAtoms
Atoms that the original constraints imply must be false.