clang  19.0.0git
InnerPointerChecker.cpp
Go to the documentation of this file.
1 //=== InnerPointerChecker.cpp -------------------------------------*- C++ -*--//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a check that marks a raw pointer to a C++ container's
10 // inner buffer released when the object is destroyed. This information can
11 // be used by MallocChecker to detect use-after-free problems.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AllocationState.h"
16 #include "InterCheckerAPI.h"
24 
25 using namespace clang;
26 using namespace ento;
27 
28 // Associate container objects with a set of raw pointer symbols.
30 REGISTER_MAP_WITH_PROGRAMSTATE(RawPtrMap, const MemRegion *, PtrSet)
31 
32 
33 namespace {
34 
35 class InnerPointerChecker
36  : public Checker<check::DeadSymbols, check::PostCall> {
37 
38  CallDescriptionSet InvalidatingMemberFunctions{
39  CallDescription(CDM::CXXMethod, {"std", "basic_string", "append"}),
40  CallDescription(CDM::CXXMethod, {"std", "basic_string", "assign"}),
41  CallDescription(CDM::CXXMethod, {"std", "basic_string", "clear"}),
42  CallDescription(CDM::CXXMethod, {"std", "basic_string", "erase"}),
43  CallDescription(CDM::CXXMethod, {"std", "basic_string", "insert"}),
44  CallDescription(CDM::CXXMethod, {"std", "basic_string", "pop_back"}),
45  CallDescription(CDM::CXXMethod, {"std", "basic_string", "push_back"}),
46  CallDescription(CDM::CXXMethod, {"std", "basic_string", "replace"}),
47  CallDescription(CDM::CXXMethod, {"std", "basic_string", "reserve"}),
48  CallDescription(CDM::CXXMethod, {"std", "basic_string", "resize"}),
49  CallDescription(CDM::CXXMethod, {"std", "basic_string", "shrink_to_fit"}),
50  CallDescription(CDM::CXXMethod, {"std", "basic_string", "swap"})};
51 
52  CallDescriptionSet AddressofFunctions{
53  CallDescription(CDM::SimpleFunc, {"std", "addressof"}),
54  CallDescription(CDM::SimpleFunc, {"std", "__addressof"})};
55 
56  CallDescriptionSet InnerPointerAccessFunctions{
57  CallDescription(CDM::CXXMethod, {"std", "basic_string", "c_str"}),
58  CallDescription(CDM::SimpleFunc, {"std", "data"}, 1),
59  CallDescription(CDM::CXXMethod, {"std", "basic_string", "data"})};
60 
61 public:
62  class InnerPointerBRVisitor : public BugReporterVisitor {
63  SymbolRef PtrToBuf;
64 
65  public:
66  InnerPointerBRVisitor(SymbolRef Sym) : PtrToBuf(Sym) {}
67 
68  static void *getTag() {
69  static int Tag = 0;
70  return &Tag;
71  }
72 
73  void Profile(llvm::FoldingSetNodeID &ID) const override {
74  ID.AddPointer(getTag());
75  }
76 
77  PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
78  BugReporterContext &BRC,
79  PathSensitiveBugReport &BR) override;
80 
81  // FIXME: Scan the map once in the visitor's constructor and do a direct
82  // lookup by region.
83  bool isSymbolTracked(ProgramStateRef State, SymbolRef Sym) {
84  RawPtrMapTy Map = State->get<RawPtrMap>();
85  for (const auto &Entry : Map) {
86  if (Entry.second.contains(Sym))
87  return true;
88  }
89  return false;
90  }
91  };
92 
93  /// Check whether the called member function potentially invalidates
94  /// pointers referring to the container object's inner buffer.
95  bool isInvalidatingMemberFunction(const CallEvent &Call) const;
96 
97  /// Mark pointer symbols associated with the given memory region released
98  /// in the program state.
99  void markPtrSymbolsReleased(const CallEvent &Call, ProgramStateRef State,
100  const MemRegion *ObjRegion,
101  CheckerContext &C) const;
102 
103  /// Standard library functions that take a non-const `basic_string` argument by
104  /// reference may invalidate its inner pointers. Check for these cases and
105  /// mark the pointers released.
106  void checkFunctionArguments(const CallEvent &Call, ProgramStateRef State,
107  CheckerContext &C) const;
108 
109  /// Record the connection between raw pointers referring to a container
110  /// object's inner buffer and the object's memory region in the program state.
111  /// Mark potentially invalidated pointers released.
112  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
113 
114  /// Clean up the program state map.
115  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
116 };
117 
118 } // end anonymous namespace
119 
120 bool InnerPointerChecker::isInvalidatingMemberFunction(
121  const CallEvent &Call) const {
122  if (const auto *MemOpCall = dyn_cast<CXXMemberOperatorCall>(&Call)) {
123  OverloadedOperatorKind Opc = MemOpCall->getOriginExpr()->getOperator();
124  if (Opc == OO_Equal || Opc == OO_PlusEqual)
125  return true;
126  return false;
127  }
128  return isa<CXXDestructorCall>(Call) ||
129  InvalidatingMemberFunctions.contains(Call);
130 }
131 
132 void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
134  const MemRegion *MR,
135  CheckerContext &C) const {
136  if (const PtrSet *PS = State->get<RawPtrMap>(MR)) {
137  const Expr *Origin = Call.getOriginExpr();
138  for (const auto Symbol : *PS) {
139  // NOTE: `Origin` may be null, and will be stored so in the symbol's
140  // `RefState` in MallocChecker's `RegionState` program state map.
141  State = allocation_state::markReleased(State, Symbol, Origin);
142  }
143  State = State->remove<RawPtrMap>(MR);
144  C.addTransition(State);
145  return;
146  }
147 }
148 
149 void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
151  CheckerContext &C) const {
152  if (const auto *FC = dyn_cast<AnyFunctionCall>(&Call)) {
153  const FunctionDecl *FD = FC->getDecl();
154  if (!FD || !FD->isInStdNamespace())
155  return;
156 
157  for (unsigned I = 0, E = FD->getNumParams(); I != E; ++I) {
158  QualType ParamTy = FD->getParamDecl(I)->getType();
159  if (!ParamTy->isReferenceType() ||
160  ParamTy->getPointeeType().isConstQualified())
161  continue;
162 
163  // In case of member operator calls, `this` is counted as an
164  // argument but not as a parameter.
165  bool isaMemberOpCall = isa<CXXMemberOperatorCall>(FC);
166  unsigned ArgI = isaMemberOpCall ? I+1 : I;
167 
168  SVal Arg = FC->getArgSVal(ArgI);
169  const auto *ArgRegion =
170  dyn_cast_or_null<TypedValueRegion>(Arg.getAsRegion());
171  if (!ArgRegion)
172  continue;
173 
174  // std::addressof functions accepts a non-const reference as an argument,
175  // but doesn't modify it.
176  if (AddressofFunctions.contains(Call))
177  continue;
178 
179  markPtrSymbolsReleased(Call, State, ArgRegion, C);
180  }
181  }
182 }
183 
184 // [string.require]
185 //
186 // "References, pointers, and iterators referring to the elements of a
187 // basic_string sequence may be invalidated by the following uses of that
188 // basic_string object:
189 //
190 // -- As an argument to any standard library function taking a reference
191 // to non-const basic_string as an argument. For example, as an argument to
192 // non-member functions swap(), operator>>(), and getline(), or as an argument
193 // to basic_string::swap().
194 //
195 // -- Calling non-const member functions, except operator[], at, front, back,
196 // begin, rbegin, end, and rend."
197 
198 void InnerPointerChecker::checkPostCall(const CallEvent &Call,
199  CheckerContext &C) const {
200  ProgramStateRef State = C.getState();
201 
202  // TODO: Do we need these to be typed?
203  const TypedValueRegion *ObjRegion = nullptr;
204 
205  if (const auto *ICall = dyn_cast<CXXInstanceCall>(&Call)) {
206  ObjRegion = dyn_cast_or_null<TypedValueRegion>(
207  ICall->getCXXThisVal().getAsRegion());
208 
209  // Check [string.require] / second point.
210  if (isInvalidatingMemberFunction(Call)) {
211  markPtrSymbolsReleased(Call, State, ObjRegion, C);
212  return;
213  }
214  }
215 
216  if (InnerPointerAccessFunctions.contains(Call)) {
217 
218  if (isa<SimpleFunctionCall>(Call)) {
219  // NOTE: As of now, we only have one free access function: std::data.
220  // If we add more functions like this in the list, hardcoded
221  // argument index should be changed.
222  ObjRegion =
223  dyn_cast_or_null<TypedValueRegion>(Call.getArgSVal(0).getAsRegion());
224  }
225 
226  if (!ObjRegion)
227  return;
228 
229  SVal RawPtr = Call.getReturnValue();
230  if (SymbolRef Sym = RawPtr.getAsSymbol(/*IncludeBaseRegions=*/true)) {
231  // Start tracking this raw pointer by adding it to the set of symbols
232  // associated with this container object in the program state map.
233 
234  PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
235  const PtrSet *SetPtr = State->get<RawPtrMap>(ObjRegion);
236  PtrSet Set = SetPtr ? *SetPtr : F.getEmptySet();
237  assert(C.wasInlined || !Set.contains(Sym));
238  Set = F.add(Set, Sym);
239 
240  State = State->set<RawPtrMap>(ObjRegion, Set);
241  C.addTransition(State);
242  }
243 
244  return;
245  }
246 
247  // Check [string.require] / first point.
248  checkFunctionArguments(Call, State, C);
249 }
250 
251 void InnerPointerChecker::checkDeadSymbols(SymbolReaper &SymReaper,
252  CheckerContext &C) const {
253  ProgramStateRef State = C.getState();
254  PtrSet::Factory &F = State->getStateManager().get_context<PtrSet>();
255  RawPtrMapTy RPM = State->get<RawPtrMap>();
256  for (const auto &Entry : RPM) {
257  if (!SymReaper.isLiveRegion(Entry.first)) {
258  // Due to incomplete destructor support, some dead regions might
259  // remain in the program state map. Clean them up.
260  State = State->remove<RawPtrMap>(Entry.first);
261  }
262  if (const PtrSet *OldSet = State->get<RawPtrMap>(Entry.first)) {
263  PtrSet CleanedUpSet = *OldSet;
264  for (const auto Symbol : Entry.second) {
265  if (!SymReaper.isLive(Symbol))
266  CleanedUpSet = F.remove(CleanedUpSet, Symbol);
267  }
268  State = CleanedUpSet.isEmpty()
269  ? State->remove<RawPtrMap>(Entry.first)
270  : State->set<RawPtrMap>(Entry.first, CleanedUpSet);
271  }
272  }
273  C.addTransition(State);
274 }
275 
276 namespace clang {
277 namespace ento {
278 namespace allocation_state {
279 
280 std::unique_ptr<BugReporterVisitor> getInnerPointerBRVisitor(SymbolRef Sym) {
281  return std::make_unique<InnerPointerChecker::InnerPointerBRVisitor>(Sym);
282 }
283 
285  RawPtrMapTy Map = State->get<RawPtrMap>();
286  for (const auto &Entry : Map) {
287  if (Entry.second.contains(Sym)) {
288  return Entry.first;
289  }
290  }
291  return nullptr;
292 }
293 
294 } // end namespace allocation_state
295 } // end namespace ento
296 } // end namespace clang
297 
298 PathDiagnosticPieceRef InnerPointerChecker::InnerPointerBRVisitor::VisitNode(
300  if (!isSymbolTracked(N->getState(), PtrToBuf) ||
301  isSymbolTracked(N->getFirstPred()->getState(), PtrToBuf))
302  return nullptr;
303 
304  const Stmt *S = N->getStmtForDiagnostics();
305  if (!S)
306  return nullptr;
307 
308  const MemRegion *ObjRegion =
310  const auto *TypedRegion = cast<TypedValueRegion>(ObjRegion);
311  QualType ObjTy = TypedRegion->getValueType();
312 
313  SmallString<256> Buf;
314  llvm::raw_svector_ostream OS(Buf);
315  OS << "Pointer to inner buffer of '" << ObjTy << "' obtained here";
317  N->getLocationContext());
318  return std::make_shared<PathDiagnosticEventPiece>(Pos, OS.str(), true);
319 }
320 
321 void ento::registerInnerPointerChecker(CheckerManager &Mgr) {
323  Mgr.registerChecker<InnerPointerChecker>();
324 }
325 
326 bool ento::shouldRegisterInnerPointerChecker(const CheckerManager &mgr) {
327  return true;
328 }
static char ID
Definition: Arena.cpp:183
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
LineState State
bool isInStdNamespace() const
Definition: DeclBase.cpp:403
This represents one expression.
Definition: Expr.h:110
Represents a function declaration or definition.
Definition: Decl.h:1972
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3696
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2709
A (possibly-)qualified type.
Definition: Type.h:940
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:7444
Stmt - This represents one statement.
Definition: Stmt.h:84
bool isReferenceType() const
Definition: Type.h:7636
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
QualType getType() const
Definition: Decl.h:718
const SourceManager & getSourceManager() const
Definition: BugReporter.h:737
BugReporterVisitors are used to add custom diagnostics along a path.
An immutable set of CallDescriptions.
A CallDescription is a pattern that can be used to match calls based on the qualified name and the ar...
@ CXXMethod
Matches a C++ method (may be static, may be virtual, may be an overloaded operator,...
@ SimpleFunc
Matches "simple" functions that are not methods.
Represents an abstract call to a function or method along a particular path.
Definition: CallEvent.h:153
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
ExplodedNode * getFirstPred()
const ProgramStateRef & getState() const
const LocationContext * getLocationContext() const
const Stmt * getStmtForDiagnostics() const
If the node's program point corresponds to a statement, retrieve that statement.
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:96
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:104
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
Symbolic value.
Definition: SymExpr.h:30
A class responsible for cleaning up unused symbols.
bool isLiveRegion(const MemRegion *region)
bool isLive(SymbolRef sym)
TypedRegion - An abstract class representing regions that are typed.
Definition: MemRegion.h:506
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:530
ProgramStateRef markReleased(ProgramStateRef State, SymbolRef Sym, const Expr *Origin)
std::unique_ptr< BugReporterVisitor > getInnerPointerBRVisitor(SymbolRef Sym)
This function provides an additional visitor that augments the bug report with information relevant t...
const MemRegion * getContainerObjRegion(ProgramStateRef State, SymbolRef Sym)
'Sym' represents a pointer to the inner buffer of a container object.
void registerInnerPointerCheckerAux(CheckerManager &Mgr)
Register the part of MallocChecker connected to InnerPointerChecker.
std::shared_ptr< PathDiagnosticPiece > PathDiagnosticPieceRef
bool Call(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize)
Definition: Interp.h:2179
The JSON file list parser is used to communicate input to InstallAPI.
OverloadedOperatorKind
Enumeration specifying the different kinds of C++ overloaded operators.
Definition: OperatorKinds.h:21