clang  20.0.0git
DereferenceChecker.cpp
Go to the documentation of this file.
1 //===-- DereferenceChecker.cpp - Null dereference checker -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This defines NullDerefChecker, a builtin check in ExprEngine that performs
10 // checks for null pointers at loads and stores.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/ExprObjC.h"
15 #include "clang/AST/ExprOpenMP.h"
16 #include "clang/Basic/TargetInfo.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace clang;
27 using namespace ento;
28 
29 namespace {
30 class DereferenceChecker
31  : public Checker< check::Location,
32  check::Bind,
33  EventDispatcher<ImplicitNullDerefEvent> > {
34  enum DerefKind { NullPointer, UndefinedPointerValue, AddressOfLabel };
35 
36  BugType BT_Null{this, "Dereference of null pointer", categories::LogicError};
37  BugType BT_Undef{this, "Dereference of undefined pointer value",
39  BugType BT_Label{this, "Dereference of the address of a label",
41 
42  void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S,
43  CheckerContext &C) const;
44 
45  bool suppressReport(CheckerContext &C, const Expr *E) const;
46 
47 public:
48  void checkLocation(SVal location, bool isLoad, const Stmt* S,
49  CheckerContext &C) const;
50  void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
51 
52  static void AddDerefSource(raw_ostream &os,
54  const Expr *Ex, const ProgramState *state,
55  const LocationContext *LCtx,
56  bool loadedFrom = false);
57 
58  bool SuppressAddressSpaces = false;
59 };
60 } // end anonymous namespace
61 
62 void
63 DereferenceChecker::AddDerefSource(raw_ostream &os,
65  const Expr *Ex,
66  const ProgramState *state,
67  const LocationContext *LCtx,
68  bool loadedFrom) {
69  Ex = Ex->IgnoreParenLValueCasts();
70  switch (Ex->getStmtClass()) {
71  default:
72  break;
73  case Stmt::DeclRefExprClass: {
74  const DeclRefExpr *DR = cast<DeclRefExpr>(Ex);
75  if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
76  os << " (" << (loadedFrom ? "loaded from" : "from")
77  << " variable '" << VD->getName() << "')";
78  Ranges.push_back(DR->getSourceRange());
79  }
80  break;
81  }
82  case Stmt::MemberExprClass: {
83  const MemberExpr *ME = cast<MemberExpr>(Ex);
84  os << " (" << (loadedFrom ? "loaded from" : "via")
85  << " field '" << ME->getMemberNameInfo() << "')";
86  SourceLocation L = ME->getMemberLoc();
87  Ranges.push_back(SourceRange(L, L));
88  break;
89  }
90  case Stmt::ObjCIvarRefExprClass: {
91  const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(Ex);
92  os << " (" << (loadedFrom ? "loaded from" : "via")
93  << " ivar '" << IV->getDecl()->getName() << "')";
94  SourceLocation L = IV->getLocation();
95  Ranges.push_back(SourceRange(L, L));
96  break;
97  }
98  }
99 }
100 
101 static const Expr *getDereferenceExpr(const Stmt *S, bool IsBind=false){
102  const Expr *E = nullptr;
103 
104  // Walk through lvalue casts to get the original expression
105  // that syntactically caused the load.
106  if (const Expr *expr = dyn_cast<Expr>(S))
107  E = expr->IgnoreParenLValueCasts();
108 
109  if (IsBind) {
110  const VarDecl *VD;
111  const Expr *Init;
112  std::tie(VD, Init) = parseAssignment(S);
113  if (VD && Init)
114  E = Init;
115  }
116  return E;
117 }
118 
119 bool DereferenceChecker::suppressReport(CheckerContext &C,
120  const Expr *E) const {
121  // Do not report dereferences on memory that use address space #256, #257,
122  // and #258. Those address spaces are used when dereferencing address spaces
123  // relative to the GS, FS, and SS segments on x86/x86-64 targets.
124  // Dereferencing a null pointer in these address spaces is not defined
125  // as an error. All other null dereferences in other address spaces
126  // are defined as an error unless explicitly defined.
127  // See https://clang.llvm.org/docs/LanguageExtensions.html, the section
128  // "X86/X86-64 Language Extensions"
129 
130  QualType Ty = E->getType();
131  if (!Ty.hasAddressSpace())
132  return false;
133  if (SuppressAddressSpaces)
134  return true;
135 
136  const llvm::Triple::ArchType Arch =
137  C.getASTContext().getTargetInfo().getTriple().getArch();
138 
139  if ((Arch == llvm::Triple::x86) || (Arch == llvm::Triple::x86_64)) {
141  case 256:
142  case 257:
143  case 258:
144  return true;
145  }
146  }
147  return false;
148 }
149 
150 static bool isDeclRefExprToReference(const Expr *E) {
151  if (const auto *DRE = dyn_cast<DeclRefExpr>(E))
152  return DRE->getDecl()->getType()->isReferenceType();
153  return false;
154 }
155 
156 void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State,
157  const Stmt *S, CheckerContext &C) const {
158  const BugType *BT = nullptr;
159  llvm::StringRef DerefStr1;
160  llvm::StringRef DerefStr2;
161  switch (K) {
162  case DerefKind::NullPointer:
163  BT = &BT_Null;
164  DerefStr1 = " results in a null pointer dereference";
165  DerefStr2 = " results in a dereference of a null pointer";
166  break;
167  case DerefKind::UndefinedPointerValue:
168  BT = &BT_Undef;
169  DerefStr1 = " results in an undefined pointer dereference";
170  DerefStr2 = " results in a dereference of an undefined pointer value";
171  break;
172  case DerefKind::AddressOfLabel:
173  BT = &BT_Label;
174  DerefStr1 = " results in an undefined pointer dereference";
175  DerefStr2 = " results in a dereference of an address of a label";
176  break;
177  };
178 
179  // Generate an error node.
180  ExplodedNode *N = C.generateErrorNode(State);
181  if (!N)
182  return;
183 
184  SmallString<100> buf;
185  llvm::raw_svector_ostream os(buf);
186 
188 
189  switch (S->getStmtClass()) {
190  case Stmt::ArraySubscriptExprClass: {
191  os << "Array access";
192  const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(S);
193  AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
194  State.get(), N->getLocationContext());
195  os << DerefStr1;
196  break;
197  }
198  case Stmt::ArraySectionExprClass: {
199  os << "Array access";
200  const ArraySectionExpr *AE = cast<ArraySectionExpr>(S);
201  AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
202  State.get(), N->getLocationContext());
203  os << DerefStr1;
204  break;
205  }
206  case Stmt::UnaryOperatorClass: {
207  os << BT->getDescription();
208  const UnaryOperator *U = cast<UnaryOperator>(S);
209  AddDerefSource(os, Ranges, U->getSubExpr()->IgnoreParens(),
210  State.get(), N->getLocationContext(), true);
211  break;
212  }
213  case Stmt::MemberExprClass: {
214  const MemberExpr *M = cast<MemberExpr>(S);
215  if (M->isArrow() || isDeclRefExprToReference(M->getBase())) {
216  os << "Access to field '" << M->getMemberNameInfo() << "'" << DerefStr2;
217  AddDerefSource(os, Ranges, M->getBase()->IgnoreParenCasts(),
218  State.get(), N->getLocationContext(), true);
219  }
220  break;
221  }
222  case Stmt::ObjCIvarRefExprClass: {
223  const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(S);
224  os << "Access to instance variable '" << *IV->getDecl() << "'" << DerefStr2;
225  AddDerefSource(os, Ranges, IV->getBase()->IgnoreParenCasts(),
226  State.get(), N->getLocationContext(), true);
227  break;
228  }
229  default:
230  break;
231  }
232 
233  auto report = std::make_unique<PathSensitiveBugReport>(
234  *BT, buf.empty() ? BT->getDescription() : buf.str(), N);
235 
237 
239  I = Ranges.begin(), E = Ranges.end(); I!=E; ++I)
240  report->addRange(*I);
241 
242  C.emitReport(std::move(report));
243 }
244 
245 void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
246  CheckerContext &C) const {
247  // Check for dereference of an undefined value.
248  if (l.isUndef()) {
249  const Expr *DerefExpr = getDereferenceExpr(S);
250  if (!suppressReport(C, DerefExpr))
251  reportBug(DerefKind::UndefinedPointerValue, C.getState(), DerefExpr, C);
252  return;
253  }
254 
256 
257  // Check for null dereferences.
258  if (!isa<Loc>(location))
259  return;
260 
261  ProgramStateRef state = C.getState();
262 
263  ProgramStateRef notNullState, nullState;
264  std::tie(notNullState, nullState) = state->assume(location);
265 
266  if (nullState) {
267  if (!notNullState) {
268  // We know that 'location' can only be null. This is what
269  // we call an "explicit" null dereference.
270  const Expr *expr = getDereferenceExpr(S);
271  if (!suppressReport(C, expr)) {
272  reportBug(DerefKind::NullPointer, nullState, expr, C);
273  return;
274  }
275  }
276 
277  // Otherwise, we have the case where the location could either be
278  // null or not-null. Record the error node as an "implicit" null
279  // dereference.
280  if (ExplodedNode *N = C.generateSink(nullState, C.getPredecessor())) {
281  ImplicitNullDerefEvent event = {l, isLoad, N, &C.getBugReporter(),
282  /*IsDirectDereference=*/true};
283  dispatchEvent(event);
284  }
285  }
286 
287  // From this point forward, we know that the location is not null.
288  C.addTransition(notNullState);
289 }
290 
291 void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
292  CheckerContext &C) const {
293  // If we're binding to a reference, check if the value is known to be null.
294  if (V.isUndef())
295  return;
296 
297  // One should never write to label addresses.
298  if (auto Label = L.getAs<loc::GotoLabel>()) {
299  reportBug(DerefKind::AddressOfLabel, C.getState(), S, C);
300  return;
301  }
302 
303  const MemRegion *MR = L.getAsRegion();
304  const TypedValueRegion *TVR = dyn_cast_or_null<TypedValueRegion>(MR);
305  if (!TVR)
306  return;
307 
308  if (!TVR->getValueType()->isReferenceType())
309  return;
310 
311  ProgramStateRef State = C.getState();
312 
313  ProgramStateRef StNonNull, StNull;
314  std::tie(StNonNull, StNull) = State->assume(V.castAs<DefinedOrUnknownSVal>());
315 
316  if (StNull) {
317  if (!StNonNull) {
318  const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true);
319  if (!suppressReport(C, expr)) {
320  reportBug(DerefKind::NullPointer, StNull, expr, C);
321  return;
322  }
323  }
324 
325  // At this point the value could be either null or non-null.
326  // Record this as an "implicit" null dereference.
327  if (ExplodedNode *N = C.generateSink(StNull, C.getPredecessor())) {
328  ImplicitNullDerefEvent event = {V, /*isLoad=*/true, N,
329  &C.getBugReporter(),
330  /*IsDirectDereference=*/true};
331  dispatchEvent(event);
332  }
333  }
334 
335  // Unlike a regular null dereference, initializing a reference with a
336  // dereferenced null pointer does not actually cause a runtime exception in
337  // Clang's implementation of references.
338  //
339  // int &r = *p; // safe??
340  // if (p != NULL) return; // uh-oh
341  // r = 5; // trap here
342  //
343  // The standard says this is invalid as soon as we try to create a "null
344  // reference" (there is no such thing), but turning this into an assumption
345  // that 'p' is never null will not match our actual runtime behavior.
346  // So we do not record this assumption, allowing us to warn on the last line
347  // of this example.
348  //
349  // We do need to add a transition because we may have generated a sink for
350  // the "implicit" null dereference.
351  C.addTransition(State, this);
352 }
353 
354 void ento::registerDereferenceChecker(CheckerManager &mgr) {
355  auto *Chk = mgr.registerChecker<DereferenceChecker>();
356  Chk->SuppressAddressSpaces = mgr.getAnalyzerOptions().getCheckerBooleanOption(
357  mgr.getCurrentCheckerName(), "SuppressAddressSpaces");
358 }
359 
360 bool ento::shouldRegisterDereferenceChecker(const CheckerManager &mgr) {
361  return true;
362 }
#define V(N, I)
Definition: ASTContext.h:3346
Expr * E
static const Expr * getDereferenceExpr(const Stmt *S, bool IsBind=false)
static bool isDeclRefExprToReference(const Expr *E)
LineState State
std::string Label
bool getCheckerBooleanOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Interprets an option's string value as a boolean.
This class represents BOTH the OpenMP Array Section and OpenACC 'subarray', with a boolean differenti...
Definition: Expr.h:6957
Expr * getBase()
Get base of the array section.
Definition: Expr.h:7023
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2726
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1265
ValueDecl * getDecl()
Definition: Expr.h:1333
This represents one expression.
Definition: Expr.h:110
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3127
Expr * IgnoreParenLValueCasts() LLVM_READONLY
Skip past any parentheses and lvalue casts which might surround this expression until reaching a fixe...
Definition: Expr.cpp:3139
QualType getType() const
Definition: Expr.h:142
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3239
SourceLocation getMemberLoc() const
getMemberLoc - Return the location of the "member", in X->F, it is the location of 'F'.
Definition: Expr.h:3428
Expr * getBase() const
Definition: Expr.h:3316
DeclarationNameInfo getMemberNameInfo() const
Retrieve the member declaration name info.
Definition: Expr.h:3416
bool isArrow() const
Definition: Expr.h:3423
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
ObjCIvarRefExpr - A reference to an ObjC instance variable.
Definition: ExprObjC.h:549
SourceLocation getLocation() const
Definition: ExprObjC.h:592
const Expr * getBase() const
Definition: ExprObjC.h:583
ObjCIvarDecl * getDecl()
Definition: ExprObjC.h:579
A (possibly-)qualified type.
Definition: Type.h:941
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7886
bool hasAddressSpace() const
Check if this type has any address space qualifier.
Definition: Type.h:7881
Encodes a location in the source.
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
StmtClass getStmtClass() const
Definition: Stmt.h:1358
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
bool isReferenceType() const
Definition: Type.h:8031
UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...
Definition: Expr.h:2240
Represents a variable declaration or definition.
Definition: Decl.h:880
StringRef getDescription() const
Definition: BugType.h:48
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
CheckerNameRef getCurrentCheckerName() const
const LocationContext * getLocationContext() const
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:97
ProgramState - This class encapsulates:
Definition: ProgramState.h:71
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
bool isUndef() const
Definition: SVals.h:104
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:86
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:82
TypedValueRegion - An abstract class representing regions having a typed value.
Definition: MemRegion.h:535
virtual QualType getValueType() const =0
Defines the clang::TargetInfo interface.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
const Expr * getDerefExpr(const Stmt *S)
Given that expression S represents a pointer that would be dereferenced, try to find a sub-expression...
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
std::pair< const clang::VarDecl *, const clang::Expr * > parseAssignment(const Stmt *S)
The JSON file list parser is used to communicate input to InstallAPI.
unsigned toTargetAddressSpace(LangAS AS)
Definition: AddressSpaces.h:81
We dereferenced a location that may be null.
Definition: Checker.h:548