clang  19.0.0git
MIGChecker.cpp
Go to the documentation of this file.
1 //== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines MIGChecker, a Mach Interface Generator calling convention
10 // checker. Namely, in MIG callback implementation the following rules apply:
11 // - When a server routine returns an error code that represents success, it
12 // must take ownership of resources passed to it (and eventually release
13 // them).
14 // - Additionally, when returning success, all out-parameters must be
15 // initialized.
16 // - When it returns any other error code, it must not take ownership,
17 // because the message and its out-of-line parameters will be destroyed
18 // by the client that called the function.
19 // For now we only check the last rule, as its violations lead to dangerous
20 // use-after-free exploits.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "clang/AST/Attr.h"
25 #include "clang/Analysis/AnyCall.h"
33 #include <optional>
34 
35 using namespace clang;
36 using namespace ento;
37 
38 namespace {
39 class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>,
40  check::EndFunction> {
41  BugType BT{this, "Use-after-free (MIG calling convention violation)",
43 
44  // The checker knows that an out-of-line object is deallocated if it is
45  // passed as an argument to one of these functions. If this object is
46  // additionally an argument of a MIG routine, the checker keeps track of that
47  // information and issues a warning when an error is returned from the
48  // respective routine.
49  CallDescriptionMap<unsigned> Deallocators = {
50 #define CALL(required_args, deallocated_arg, ...) \
51  {{CDM::SimpleFunc, {__VA_ARGS__}, required_args}, deallocated_arg}
52  // E.g., if the checker sees a C function 'vm_deallocate' that has
53  // exactly 3 parameters, it knows that argument #1 (starting from 0, i.e.
54  // the second argument) is going to be consumed in the sense of the MIG
55  // consume-on-success convention.
56  CALL(3, 1, "vm_deallocate"),
57  CALL(3, 1, "mach_vm_deallocate"),
58  CALL(2, 0, "mig_deallocate"),
59  CALL(2, 1, "mach_port_deallocate"),
60  CALL(1, 0, "device_deallocate"),
61  CALL(1, 0, "iokit_remove_connect_reference"),
62  CALL(1, 0, "iokit_remove_reference"),
63  CALL(1, 0, "iokit_release_port"),
64  CALL(1, 0, "ipc_port_release"),
65  CALL(1, 0, "ipc_port_release_sonce"),
66  CALL(1, 0, "ipc_voucher_attr_control_release"),
67  CALL(1, 0, "ipc_voucher_release"),
68  CALL(1, 0, "lock_set_dereference"),
69  CALL(1, 0, "memory_object_control_deallocate"),
70  CALL(1, 0, "pset_deallocate"),
71  CALL(1, 0, "semaphore_dereference"),
72  CALL(1, 0, "space_deallocate"),
73  CALL(1, 0, "space_inspect_deallocate"),
74  CALL(1, 0, "task_deallocate"),
75  CALL(1, 0, "task_inspect_deallocate"),
76  CALL(1, 0, "task_name_deallocate"),
77  CALL(1, 0, "thread_deallocate"),
78  CALL(1, 0, "thread_inspect_deallocate"),
79  CALL(1, 0, "upl_deallocate"),
80  CALL(1, 0, "vm_map_deallocate"),
81 #undef CALL
82 #define CALL(required_args, deallocated_arg, ...) \
83  {{CDM::CXXMethod, {__VA_ARGS__}, required_args}, deallocated_arg}
84  // E.g., if the checker sees a method 'releaseAsyncReference64()' that is
85  // defined on class 'IOUserClient' that takes exactly 1 argument, it knows
86  // that the argument is going to be consumed in the sense of the MIG
87  // consume-on-success convention.
88  CALL(1, 0, "IOUserClient", "releaseAsyncReference64"),
89  CALL(1, 0, "IOUserClient", "releaseNotificationPort"),
90 #undef CALL
91  };
92 
93  CallDescription OsRefRetain{CDM::SimpleFunc, {"os_ref_retain"}, 1};
94 
95  void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const;
96 
97 public:
98  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
99 
100  // HACK: We're making two attempts to find the bug: checkEndFunction
101  // should normally be enough but it fails when the return value is a literal
102  // that never gets put into the Environment and ends of function with multiple
103  // returns get agglutinated across returns, preventing us from obtaining
104  // the return value. The problem is similar to https://reviews.llvm.org/D25326
105  // but now we step into it in the top-level function.
106  void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const {
107  checkReturnAux(RS, C);
108  }
109  void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const {
110  checkReturnAux(RS, C);
111  }
112 
113 };
114 } // end anonymous namespace
115 
116 // A flag that says that the programmer has called a MIG destructor
117 // for at least one parameter.
118 REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool)
119 // A set of parameters for which the check is suppressed because
120 // reference counting is being performed.
121 REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *)
122 
124  bool IncludeBaseRegions = false) {
125  // TODO: We should most likely always include base regions here.
126  SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions);
127  if (!Sym)
128  return nullptr;
129 
130  // If we optimistically assume that the MIG routine never re-uses the storage
131  // that was passed to it as arguments when it invalidates it (but at most when
132  // it assigns to parameter variables directly), this procedure correctly
133  // determines if the value was loaded from the transitive closure of MIG
134  // routine arguments in the heap.
135  while (const MemRegion *MR = Sym->getOriginRegion()) {
136  const auto *VR = dyn_cast<VarRegion>(MR);
137  if (VR && VR->hasStackParametersStorage() &&
138  VR->getStackFrame()->inTopFrame())
139  return cast<ParmVarDecl>(VR->getDecl());
140 
141  const SymbolicRegion *SR = MR->getSymbolicBase();
142  if (!SR)
143  return nullptr;
144 
145  Sym = SR->getSymbol();
146  }
147 
148  return nullptr;
149 }
150 
151 static bool isInMIGCall(CheckerContext &C) {
152  const LocationContext *LC = C.getLocationContext();
153  assert(LC && "Unknown location context");
154 
155  const StackFrameContext *SFC;
156  // Find the top frame.
157  while (LC) {
158  SFC = LC->getStackFrame();
159  LC = SFC->getParent();
160  }
161 
162  const Decl *D = SFC->getDecl();
163 
164  if (std::optional<AnyCall> AC = AnyCall::forDecl(D)) {
165  // Even though there's a Sema warning when the return type of an annotated
166  // function is not a kern_return_t, this warning isn't an error, so we need
167  // an extra check here.
168  // FIXME: AnyCall doesn't support blocks yet, so they remain unchecked
169  // for now.
170  if (!AC->getReturnType(C.getASTContext())
171  .getCanonicalType()->isSignedIntegerType())
172  return false;
173  }
174 
175  if (D->hasAttr<MIGServerRoutineAttr>())
176  return true;
177 
178  // See if there's an annotated method in the superclass.
179  if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
180  for (const auto *OMD: MD->overridden_methods())
181  if (OMD->hasAttr<MIGServerRoutineAttr>())
182  return true;
183 
184  return false;
185 }
186 
187 void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
188  if (OsRefRetain.matches(Call)) {
189  // If the code is doing reference counting over the parameter,
190  // it opens up an opportunity for safely calling a destructor function.
191  // TODO: We should still check for over-releases.
192  if (const ParmVarDecl *PVD =
193  getOriginParam(Call.getArgSVal(0), C, /*IncludeBaseRegions=*/true)) {
194  // We never need to clean up the program state because these are
195  // top-level parameters anyway, so they're always live.
196  C.addTransition(C.getState()->add<RefCountedParameters>(PVD));
197  }
198  return;
199  }
200 
201  if (!isInMIGCall(C))
202  return;
203 
204  const unsigned *ArgIdxPtr = Deallocators.lookup(Call);
205  if (!ArgIdxPtr)
206  return;
207 
208  ProgramStateRef State = C.getState();
209  unsigned ArgIdx = *ArgIdxPtr;
210  SVal Arg = Call.getArgSVal(ArgIdx);
211  const ParmVarDecl *PVD = getOriginParam(Arg, C);
212  if (!PVD || State->contains<RefCountedParameters>(PVD))
213  return;
214 
215  const NoteTag *T =
216  C.getNoteTag([this, PVD](PathSensitiveBugReport &BR) -> std::string {
217  if (&BR.getBugType() != &BT)
218  return "";
219  SmallString<64> Str;
220  llvm::raw_svector_ostream OS(Str);
221  OS << "Value passed through parameter '" << PVD->getName()
222  << "\' is deallocated";
223  return std::string(OS.str());
224  });
225  C.addTransition(State->set<ReleasedParameter>(true), T);
226 }
227 
228 // Returns true if V can potentially represent a "successful" kern_return_t.
229 static bool mayBeSuccess(SVal V, CheckerContext &C) {
230  ProgramStateRef State = C.getState();
231 
232  // Can V represent KERN_SUCCESS?
233  if (!State->isNull(V).isConstrainedFalse())
234  return true;
235 
236  SValBuilder &SVB = C.getSValBuilder();
237  ASTContext &ACtx = C.getASTContext();
238 
239  // Can V represent MIG_NO_REPLY?
240  static const int MigNoReply = -305;
241  V = SVB.evalEQ(C.getState(), V, SVB.makeIntVal(MigNoReply, ACtx.IntTy));
242  if (!State->isNull(V).isConstrainedTrue())
243  return true;
244 
245  // If none of the above, it's definitely an error.
246  return false;
247 }
248 
249 void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const {
250  // It is very unlikely that a MIG callback will be called from anywhere
251  // within the project under analysis and the caller isn't itself a routine
252  // that follows the MIG calling convention. Therefore we're safe to believe
253  // that it's always the top frame that is of interest. There's a slight chance
254  // that the user would want to enforce the MIG calling convention upon
255  // a random routine in the middle of nowhere, but given that the convention is
256  // fairly weird and hard to follow in the first place, there's relatively
257  // little motivation to spread it this way.
258  if (!C.inTopFrame())
259  return;
260 
261  if (!isInMIGCall(C))
262  return;
263 
264  // We know that the function is non-void, but what if the return statement
265  // is not there in the code? It's not a compile error, we should not crash.
266  if (!RS)
267  return;
268 
269  ProgramStateRef State = C.getState();
270  if (!State->get<ReleasedParameter>())
271  return;
272 
273  SVal V = C.getSVal(RS);
274  if (mayBeSuccess(V, C))
275  return;
276 
277  ExplodedNode *N = C.generateErrorNode();
278  if (!N)
279  return;
280 
281  auto R = std::make_unique<PathSensitiveBugReport>(
282  BT,
283  "MIG callback fails with error after deallocating argument value. "
284  "This is a use-after-free vulnerability because the caller will try to "
285  "deallocate it again",
286  N);
287 
288  R->addRange(RS->getSourceRange());
290  N, RS->getRetValue(), *R,
291  {bugreporter::TrackingKind::Thorough, /*EnableNullFPSuppression=*/false});
292  C.emitReport(std::move(R));
293 }
294 
295 void ento::registerMIGChecker(CheckerManager &Mgr) {
296  Mgr.registerChecker<MIGChecker>();
297 }
298 
299 bool ento::shouldRegisterMIGChecker(const CheckerManager &mgr) {
300  return true;
301 }
#define V(N, I)
Definition: ASTContext.h:3299
#define CALL(required_args, deallocated_arg,...)
static bool mayBeSuccess(SVal V, CheckerContext &C)
Definition: MIGChecker.cpp:229
static const ParmVarDecl * getOriginParam(SVal V, CheckerContext &C, bool IncludeBaseRegions=false)
Definition: MIGChecker.cpp:123
static bool isInMIGCall(CheckerContext &C)
Definition: MIGChecker.cpp:151
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
#define REGISTER_TRAIT_WITH_PROGRAMSTATE(Name, Type)
Declares a program state trait for type Type called Name, and introduce a type named NameTy.
LineState State
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:185
CanQualType IntTy
Definition: ASTContext.h:1103
static std::optional< AnyCall > forDecl(const Decl *D)
If D is a callable (Objective-C method or a function), return a constructed AnyCall object.
Definition: AnyCall.h:134
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
bool hasAttr() const
Definition: DeclBase.h:583
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
const LocationContext * getParent() const
It might return null.
const Decl * getDecl() const
const StackFrameContext * getStackFrame() const
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
Represents a parameter to a function.
Definition: Decl.h:1762
ReturnStmt - This represents a return, optionally of an expression: return; return 4;.
Definition: Stmt.h:3019
Expr * getRetValue()
Definition: Stmt.h:3050
It represents a stack frame of the call stack (based on CallEvent).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
const BugType & getBugType() const
Definition: BugReporter.h:149
An immutable map from CallDescriptions to arbitrary data.
A CallDescription is a pattern that can be used to match calls based on the qualified name and the ar...
@ SimpleFunc
Matches "simple" functions that are not methods.
Represents an abstract call to a function or method along a particular path.
Definition: CallEvent.h:153
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:96
const SymbolicRegion * getSymbolicBase() const
If this is a symbolic region, returns the region.
Definition: MemRegion.cpp:1401
The tag upon which the TagVisitor reacts.
Definition: BugReporter.h:779
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:290
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
Symbolic value.
Definition: SymExpr.h:30
virtual const MemRegion * getOriginRegion() const
Find the region from which this symbol originates.
Definition: SymExpr.h:104
SymbolicRegion - A special, "non-concrete" region.
Definition: MemRegion.h:775
SymbolRef getSymbol() const
It might return null.
Definition: MemRegion.h:794
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
bool Call(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize)
Definition: Interp.h:2179
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
#define false
Definition: stdbool.h:26