clang  19.0.0git
PointerArithChecker.cpp
Go to the documentation of this file.
1 //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines PointerArithChecker, a builtin checker that checks for
10 // pointer arithmetic on locations other than array elements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/DeclCXX.h"
15 #include "clang/AST/ExprCXX.h"
21 #include "llvm/ADT/StringRef.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 enum class AllocKind {
28  SingleObject,
29  Array,
30  Unknown,
31  Reinterpreted // Single object interpreted as an array.
32 };
33 } // end namespace
34 
35 namespace llvm {
36 template <> struct FoldingSetTrait<AllocKind> {
37  static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
38  ID.AddInteger(static_cast<int>(X));
39  }
40 };
41 } // end namespace llvm
42 
43 namespace {
44 class PointerArithChecker
45  : public Checker<
46  check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
47  check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
48  check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
49  check::PostStmt<CallExpr>, check::DeadSymbols> {
50  AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
51  const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
52  AllocKind &AKind, CheckerContext &C) const;
53  const MemRegion *getPointedRegion(const MemRegion *Region,
54  CheckerContext &C) const;
55  void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
56  bool PointedNeeded = false) const;
57  void initAllocIdentifiers(ASTContext &C) const;
58 
59  const BugType BT_pointerArith{this, "Dangerous pointer arithmetic"};
60  const BugType BT_polyArray{this, "Dangerous pointer arithmetic"};
61  mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
62 
63 public:
64  void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
65  void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
66  void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
67  void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
68  void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
69  void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
70  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
71  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
72 };
73 } // end namespace
74 
75 REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
76 
77 void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
78  CheckerContext &C) const {
79  // TODO: intentional leak. Some information is garbage collected too early,
80  // see http://reviews.llvm.org/D14203 for further information.
81  /*ProgramStateRef State = C.getState();
82  RegionStateTy RegionStates = State->get<RegionState>();
83  for (const MemRegion *Reg: llvm::make_first_range(RegionStates)) {
84  if (!SR.isLiveRegion(Reg))
85  State = State->remove<RegionState>(Reg);
86  }
87  C.addTransition(State);*/
88 }
89 
90 AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
91  const FunctionDecl *FD) const {
92  // This checker try not to assume anything about placement and overloaded
93  // new to avoid false positives.
94  if (isa<CXXMethodDecl>(FD))
95  return AllocKind::Unknown;
96  if (FD->getNumParams() != 1 || FD->isVariadic())
97  return AllocKind::Unknown;
98  if (NE->isArray())
99  return AllocKind::Array;
100 
101  return AllocKind::SingleObject;
102 }
103 
104 const MemRegion *
105 PointerArithChecker::getPointedRegion(const MemRegion *Region,
106  CheckerContext &C) const {
107  assert(Region);
108  ProgramStateRef State = C.getState();
109  SVal S = State->getSVal(Region);
110  return S.getAsRegion();
111 }
112 
113 /// Checks whether a region is the part of an array.
114 /// In case there is a derived to base cast above the array element, the
115 /// Polymorphic output value is set to true. AKind output value is set to the
116 /// allocation kind of the inspected region.
117 const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
118  bool &Polymorphic,
119  AllocKind &AKind,
120  CheckerContext &C) const {
121  assert(Region);
122  while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) {
123  Region = BaseRegion->getSuperRegion();
124  Polymorphic = true;
125  }
126  if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) {
127  Region = ElemRegion->getSuperRegion();
128  }
129 
130  ProgramStateRef State = C.getState();
131  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
132  AKind = *Kind;
133  if (*Kind == AllocKind::Array)
134  return Region;
135  else
136  return nullptr;
137  }
138  // When the region is symbolic and we do not have any information about it,
139  // assume that this is an array to avoid false positives.
140  if (isa<SymbolicRegion>(Region))
141  return Region;
142 
143  // No AllocKind stored and not symbolic, assume that it points to a single
144  // object.
145  return nullptr;
146 }
147 
148 void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
149  CheckerContext &C,
150  bool PointedNeeded) const {
151  SourceRange SR = E->getSourceRange();
152  if (SR.isInvalid())
153  return;
154 
155  ProgramStateRef State = C.getState();
156  const MemRegion *Region = C.getSVal(E).getAsRegion();
157  if (!Region)
158  return;
159  if (PointedNeeded)
160  Region = getPointedRegion(Region, C);
161  if (!Region)
162  return;
163 
164  bool IsPolymorphic = false;
165  AllocKind Kind = AllocKind::Unknown;
166  if (const MemRegion *ArrayRegion =
167  getArrayRegion(Region, IsPolymorphic, Kind, C)) {
168  if (!IsPolymorphic)
169  return;
170  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
171  constexpr llvm::StringLiteral Msg =
172  "Pointer arithmetic on a pointer to base class is dangerous "
173  "because derived and base class may have different size.";
174  auto R = std::make_unique<PathSensitiveBugReport>(BT_polyArray, Msg, N);
175  R->addRange(E->getSourceRange());
176  R->markInteresting(ArrayRegion);
177  C.emitReport(std::move(R));
178  }
179  return;
180  }
181 
182  if (Kind == AllocKind::Reinterpreted)
183  return;
184 
185  // We might not have enough information about symbolic regions.
186  if (Kind != AllocKind::SingleObject &&
187  Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
188  return;
189 
190  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
191  constexpr llvm::StringLiteral Msg =
192  "Pointer arithmetic on non-array variables relies on memory layout, "
193  "which is dangerous.";
194  auto R = std::make_unique<PathSensitiveBugReport>(BT_pointerArith, Msg, N);
195  R->addRange(SR);
196  R->markInteresting(Region);
197  C.emitReport(std::move(R));
198  }
199 }
200 
201 void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
202  if (!AllocFunctions.empty())
203  return;
204  AllocFunctions.insert(&C.Idents.get("alloca"));
205  AllocFunctions.insert(&C.Idents.get("malloc"));
206  AllocFunctions.insert(&C.Idents.get("realloc"));
207  AllocFunctions.insert(&C.Idents.get("calloc"));
208  AllocFunctions.insert(&C.Idents.get("valloc"));
209 }
210 
211 void PointerArithChecker::checkPostStmt(const CallExpr *CE,
212  CheckerContext &C) const {
213  ProgramStateRef State = C.getState();
214  const FunctionDecl *FD = C.getCalleeDecl(CE);
215  if (!FD)
216  return;
217  IdentifierInfo *FunI = FD->getIdentifier();
218  initAllocIdentifiers(C.getASTContext());
219  if (AllocFunctions.count(FunI) == 0)
220  return;
221 
222  SVal SV = C.getSVal(CE);
223  const MemRegion *Region = SV.getAsRegion();
224  if (!Region)
225  return;
226  // Assume that C allocation functions allocate arrays to avoid false
227  // positives.
228  // TODO: Add heuristics to distinguish alloc calls that allocates single
229  // objecs.
230  State = State->set<RegionState>(Region, AllocKind::Array);
231  C.addTransition(State);
232 }
233 
234 void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
235  CheckerContext &C) const {
236  const FunctionDecl *FD = NE->getOperatorNew();
237  if (!FD)
238  return;
239 
240  AllocKind Kind = getKindOfNewOp(NE, FD);
241 
242  ProgramStateRef State = C.getState();
243  SVal AllocedVal = C.getSVal(NE);
244  const MemRegion *Region = AllocedVal.getAsRegion();
245  if (!Region)
246  return;
247  State = State->set<RegionState>(Region, Kind);
248  C.addTransition(State);
249 }
250 
251 void PointerArithChecker::checkPostStmt(const CastExpr *CE,
252  CheckerContext &C) const {
253  if (CE->getCastKind() != CastKind::CK_BitCast)
254  return;
255 
256  const Expr *CastedExpr = CE->getSubExpr();
257  ProgramStateRef State = C.getState();
258  SVal CastedVal = C.getSVal(CastedExpr);
259 
260  const MemRegion *Region = CastedVal.getAsRegion();
261  if (!Region)
262  return;
263 
264  // Suppress reinterpret casted hits.
265  State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
266  C.addTransition(State);
267 }
268 
269 void PointerArithChecker::checkPreStmt(const CastExpr *CE,
270  CheckerContext &C) const {
271  if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
272  return;
273 
274  const Expr *CastedExpr = CE->getSubExpr();
275  ProgramStateRef State = C.getState();
276  SVal CastedVal = C.getSVal(CastedExpr);
277 
278  const MemRegion *Region = CastedVal.getAsRegion();
279  if (!Region)
280  return;
281 
282  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
283  if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
284  return;
285  }
286  State = State->set<RegionState>(Region, AllocKind::Array);
287  C.addTransition(State);
288 }
289 
290 void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
291  CheckerContext &C) const {
292  if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
293  return;
294  reportPointerArithMisuse(UOp->getSubExpr(), C, true);
295 }
296 
297 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
298  CheckerContext &C) const {
299  SVal Idx = C.getSVal(SubsExpr->getIdx());
300 
301  // Indexing with 0 is OK.
302  if (Idx.isZeroConstant())
303  return;
304 
305  // Indexing vector-type expressions is also OK.
306  if (SubsExpr->getBase()->getType()->isVectorType())
307  return;
308  reportPointerArithMisuse(SubsExpr->getBase(), C);
309 }
310 
311 void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
312  CheckerContext &C) const {
313  BinaryOperatorKind OpKind = BOp->getOpcode();
314  if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
315  return;
316 
317  const Expr *Lhs = BOp->getLHS();
318  const Expr *Rhs = BOp->getRHS();
319  ProgramStateRef State = C.getState();
320 
321  if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
322  SVal RHSVal = C.getSVal(Rhs);
323  if (State->isNull(RHSVal).isConstrainedTrue())
324  return;
325  reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
326  }
327  // The int += ptr; case is not valid C++.
328  if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
329  SVal LHSVal = C.getSVal(Lhs);
330  if (State->isNull(LHSVal).isConstrainedTrue())
331  return;
332  reportPointerArithMisuse(Rhs, C);
333  }
334 }
335 
336 void ento::registerPointerArithChecker(CheckerManager &mgr) {
337  mgr.registerChecker<PointerArithChecker>();
338 }
339 
340 bool ento::shouldRegisterPointerArithChecker(const CheckerManager &mgr) {
341  return true;
342 }
static char ID
Definition: Arena.cpp:183
Defines the C++ Decl subclasses, other than those for templates (found in DeclTemplate....
Defines the clang::Expr interface and subclasses for C++ expressions.
#define X(type, name)
Definition: Value.h:143
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
LineState State
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:185
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2716
A builtin binary operation expression such as "x + y" or "x <= y".
Definition: Expr.h:3892
static bool isAdditiveOp(Opcode Opc)
Definition: Expr.h:3977
Opcode getOpcode() const
Definition: Expr.h:3936
Expr * getRHS() const
Definition: Expr.h:3943
Expr * getLHS() const
Definition: Expr.h:3941
Represents a new-expression for memory allocation and constructor calls, e.g: "new CXXNewExpr(foo)".
Definition: ExprCXX.h:2236
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2872
CastExpr - Base class for type casts, including both implicit casts (ImplicitCastExpr) and explicit c...
Definition: Expr.h:3535
CastKind getCastKind() const
Definition: Expr.h:3579
Expr * getSubExpr()
Definition: Expr.h:3585
This represents one expression.
Definition: Expr.h:110
QualType getType() const
Definition: Expr.h:142
Represents a function declaration or definition.
Definition: Decl.h:1972
bool isVariadic() const
Whether this function is variadic.
Definition: Decl.cpp:3093
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3696
One of these records is kept for each identifier that is lexed.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Definition: Decl.h:270
A trivial tuple used to represent a source range.
bool isInvalid() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
bool isPointerType() const
Definition: Type.h:7624
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7979
bool isVectorType() const
Definition: Type.h:7730
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Definition: Expr.h:2235
Expr * getSubExpr() const
Definition: Expr.h:2280
static bool isIncrementDecrementOp(Opcode Op)
Definition: Expr.h:2335
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:96
Kind getKind() const
Definition: MemRegion.h:172
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
bool isZeroConstant() const
Definition: SVals.cpp:258
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
A class responsible for cleaning up unused symbols.
bool NE(InterpState &S, CodePtr OpPC)
Definition: Interp.h:869
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
static void Profile(AllocKind X, FoldingSetNodeID &ID)