clang  19.0.0git
SimpleSValBuilder.cpp
Go to the documentation of this file.
1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10 //
11 //===----------------------------------------------------------------------===//
12 
18 #include <optional>
19 
20 using namespace clang;
21 using namespace ento;
22 
23 namespace {
24 class SimpleSValBuilder : public SValBuilder {
25 
26  // Query the constraint manager whether the SVal has only one possible
27  // (integer) value. If that is the case, the value is returned. Otherwise,
28  // returns NULL.
29  // This is an implementation detail. Checkers should use `getKnownValue()`
30  // instead.
31  static const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
32 
33  // Helper function that returns the value stored in a nonloc::ConcreteInt or
34  // loc::ConcreteInt.
35  static const llvm::APSInt *getConcreteValue(SVal V);
36 
37  // With one `simplifySValOnce` call, a compound symbols might collapse to
38  // simpler symbol tree that is still possible to further simplify. Thus, we
39  // do the simplification on a new symbol tree until we reach the simplest
40  // form, i.e. the fixpoint.
41  // Consider the following symbol `(b * b) * b * b` which has this tree:
42  // *
43  // / \
44  // * b
45  // / \
46  // / b
47  // (b * b)
48  // Now, if the `b * b == 1` new constraint is added then during the first
49  // iteration we have the following transformations:
50  // * *
51  // / \ / \
52  // * b --> b b
53  // / \
54  // / b
55  // 1
56  // We need another iteration to reach the final result `1`.
57  SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
58 
59  // Recursively descends into symbolic expressions and replaces symbols
60  // with their known values (in the sense of the getConstValue() method).
61  // We traverse the symbol tree and query the constraint values for the
62  // sub-trees and if a value is a constant we do the constant folding.
63  SVal simplifySValOnce(ProgramStateRef State, SVal V);
64 
65 public:
66  SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
67  ProgramStateManager &stateMgr)
68  : SValBuilder(alloc, context, stateMgr) {}
69  ~SimpleSValBuilder() override {}
70 
71  SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
72  NonLoc lhs, NonLoc rhs, QualType resultTy) override;
73  SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
74  Loc lhs, Loc rhs, QualType resultTy) override;
75  SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
76  Loc lhs, NonLoc rhs, QualType resultTy) override;
77 
78  /// Evaluates a given SVal by recursively evaluating and
79  /// simplifying the children SVals. If the SVal has only one possible
80  /// (integer) value, that value is returned. Otherwise, returns NULL.
81  const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
82 
83  /// Evaluates a given SVal by recursively evaluating and simplifying the
84  /// children SVals, then returns its minimal possible (integer) value. If the
85  /// constraint manager cannot provide a meaningful answer, this returns NULL.
86  const llvm::APSInt *getMinValue(ProgramStateRef state, SVal V) override;
87 
88  /// Evaluates a given SVal by recursively evaluating and simplifying the
89  /// children SVals, then returns its maximal possible (integer) value. If the
90  /// constraint manager cannot provide a meaningful answer, this returns NULL.
91  const llvm::APSInt *getMaxValue(ProgramStateRef state, SVal V) override;
92 
93  SVal simplifySVal(ProgramStateRef State, SVal V) override;
94 
95  SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
96  const llvm::APSInt &RHS, QualType resultTy);
97 };
98 } // end anonymous namespace
99 
100 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
101  ASTContext &context,
102  ProgramStateManager &stateMgr) {
103  return new SimpleSValBuilder(alloc, context, stateMgr);
104 }
105 
106 // Checks if the negation the value and flipping sign preserve
107 // the semantics on the operation in the resultType
109  APSIntType ResultType) {
110  const unsigned ValueBits = Value.getSignificantBits();
111  if (ValueBits == ResultType.getBitWidth()) {
112  // The value is the lowest negative value that is representable
113  // in signed integer with bitWith of result type. The
114  // negation is representable if resultType is unsigned.
115  return ResultType.isUnsigned();
116  }
117 
118  // If resultType bitWith is higher that number of bits required
119  // to represent RHS, the sign flip produce same value.
120  return ValueBits < ResultType.getBitWidth();
121 }
122 
123 //===----------------------------------------------------------------------===//
124 // Transfer function for binary operators.
125 //===----------------------------------------------------------------------===//
126 
127 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
129  const llvm::APSInt &RHS,
130  QualType resultTy) {
131  bool isIdempotent = false;
132 
133  // Check for a few special cases with known reductions first.
134  switch (op) {
135  default:
136  // We can't reduce this case; just treat it normally.
137  break;
138  case BO_Mul:
139  // a*0 and a*1
140  if (RHS == 0)
141  return makeIntVal(0, resultTy);
142  else if (RHS == 1)
143  isIdempotent = true;
144  break;
145  case BO_Div:
146  // a/0 and a/1
147  if (RHS == 0)
148  // This is also handled elsewhere.
149  return UndefinedVal();
150  else if (RHS == 1)
151  isIdempotent = true;
152  break;
153  case BO_Rem:
154  // a%0 and a%1
155  if (RHS == 0)
156  // This is also handled elsewhere.
157  return UndefinedVal();
158  else if (RHS == 1)
159  return makeIntVal(0, resultTy);
160  break;
161  case BO_Add:
162  case BO_Sub:
163  case BO_Shl:
164  case BO_Shr:
165  case BO_Xor:
166  // a+0, a-0, a<<0, a>>0, a^0
167  if (RHS == 0)
168  isIdempotent = true;
169  break;
170  case BO_And:
171  // a&0 and a&(~0)
172  if (RHS == 0)
173  return makeIntVal(0, resultTy);
174  else if (RHS.isAllOnes())
175  isIdempotent = true;
176  break;
177  case BO_Or:
178  // a|0 and a|(~0)
179  if (RHS == 0)
180  isIdempotent = true;
181  else if (RHS.isAllOnes()) {
182  const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
183  return nonloc::ConcreteInt(Result);
184  }
185  break;
186  }
187 
188  // Idempotent ops (like a*1) can still change the type of an expression.
189  // Wrap the LHS up in a NonLoc again and let evalCast do the
190  // dirty work.
191  if (isIdempotent)
192  return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
193 
194  // If we reach this point, the expression cannot be simplified.
195  // Make a SymbolVal for the entire expression, after converting the RHS.
196  const llvm::APSInt *ConvertedRHS = &RHS;
198  // We're looking for a type big enough to compare the symbolic value
199  // with the given constant.
200  // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
201  ASTContext &Ctx = getContext();
202  QualType SymbolType = LHS->getType();
203  uint64_t ValWidth = RHS.getBitWidth();
204  uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
205 
206  if (ValWidth < TypeWidth) {
207  // If the value is too small, extend it.
208  ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
209  } else if (ValWidth == TypeWidth) {
210  // If the value is signed but the symbol is unsigned, do the comparison
211  // in unsigned space. [C99 6.3.1.8]
212  // (For the opposite case, the value is already unsigned.)
213  if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
214  ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
215  }
216  } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) {
217  // Change a+(-N) into a-N, and a-(-N) into a+N
218  // Adjust addition/subtraction of negative value, to
219  // subtraction/addition of the negated value.
220  APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy);
221  if (isNegationValuePreserving(RHS, resultIntTy)) {
222  ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS));
223  op = (op == BO_Add) ? BO_Sub : BO_Add;
224  } else {
225  ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
226  }
227  } else
228  ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
229 
230  return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
231 }
232 
233 // See if Sym is known to be a relation Rel with Bound.
236  SValBuilder &SVB = State->getStateManager().getSValBuilder();
237  SVal Result =
238  SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
239  nonloc::ConcreteInt(Bound), SVB.getConditionType());
240  if (auto DV = Result.getAs<DefinedSVal>()) {
241  return !State->assume(*DV, false);
242  }
243  return false;
244 }
245 
246 // See if Sym is known to be within [min/4, max/4], where min and max
247 // are the bounds of the symbol's integral type. With such symbols,
248 // some manipulations can be performed without the risk of overflow.
249 // assume() doesn't cause infinite recursion because we should be dealing
250 // with simpler symbols on every recursive call.
253  SValBuilder &SVB = State->getStateManager().getSValBuilder();
255 
256  QualType T = Sym->getType();
258  "This only works with signed integers!");
259  APSIntType AT = BV.getAPSIntType(T);
260 
261  llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
262  return isInRelation(BO_LE, Sym, Max, State) &&
263  isInRelation(BO_GE, Sym, Min, State);
264 }
265 
266 // Same for the concrete integers: see if I is within [min/4, max/4].
268  APSIntType AT(I);
269  assert(!AT.isUnsigned() &&
270  "This only works with signed integers!");
271 
272  llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
273  return (I <= Max) && (I >= -Max);
274 }
275 
276 static std::pair<SymbolRef, llvm::APSInt>
278  if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
279  if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
280  return std::make_pair(SymInt->getLHS(),
281  (SymInt->getOpcode() == BO_Add) ?
282  (SymInt->getRHS()) :
283  (-SymInt->getRHS()));
284 
285  // Fail to decompose: "reduce" the problem to the "$x + 0" case.
286  return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
287 }
288 
289 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
290 // same signed integral type and no overflows occur (which should be checked
291 // by the caller).
294  SymbolRef LSym, llvm::APSInt LInt,
295  SymbolRef RSym, llvm::APSInt RInt) {
296  SValBuilder &SVB = State->getStateManager().getSValBuilder();
298  SymbolManager &SymMgr = SVB.getSymbolManager();
299 
300  QualType SymTy = LSym->getType();
301  assert(SymTy == RSym->getType() &&
302  "Symbols are not of the same type!");
303  assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
304  "Integers are not of the same type as symbols!");
305  assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
306  "Integers are not of the same type as symbols!");
307 
308  QualType ResultTy;
310  ResultTy = SVB.getConditionType();
311  else if (BinaryOperator::isAdditiveOp(Op))
312  ResultTy = SymTy;
313  else
314  llvm_unreachable("Operation not suitable for unchecked rearrangement!");
315 
316  if (LSym == RSym)
317  return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
318  nonloc::ConcreteInt(RInt), ResultTy)
319  .castAs<NonLoc>();
320 
321  SymbolRef ResultSym = nullptr;
322  BinaryOperator::Opcode ResultOp;
323  llvm::APSInt ResultInt;
325  // Prefer comparing to a non-negative number.
326  // FIXME: Maybe it'd be better to have consistency in
327  // "$x - $y" vs. "$y - $x" because those are solver's keys.
328  if (LInt > RInt) {
329  ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
331  ResultInt = LInt - RInt; // Opposite order!
332  } else {
333  ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
334  ResultOp = Op;
335  ResultInt = RInt - LInt; // Opposite order!
336  }
337  } else {
338  ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
339  ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
340  ResultOp = BO_Add;
341  // Bring back the cosmetic difference.
342  if (ResultInt < 0) {
343  ResultInt = -ResultInt;
344  ResultOp = BO_Sub;
345  } else if (ResultInt == 0) {
346  // Shortcut: Simplify "$x + 0" to "$x".
347  return nonloc::SymbolVal(ResultSym);
348  }
349  }
350  const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
351  return nonloc::SymbolVal(
352  SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
353 }
354 
355 // Rearrange if symbol type matches the result type and if the operator is a
356 // comparison operator, both symbol and constant must be within constant
357 // overflow bounds.
359  SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
360  return Sym->getType() == Ty &&
364 }
365 
366 static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
368  NonLoc Rhs, QualType ResultTy) {
369  ProgramStateManager &StateMgr = State->getStateManager();
370  SValBuilder &SVB = StateMgr.getSValBuilder();
371 
372  // We expect everything to be of the same type - this type.
373  QualType SingleTy;
374 
375  // FIXME: After putting complexity threshold to the symbols we can always
376  // rearrange additive operations but rearrange comparisons only if
377  // option is set.
378  if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
379  return std::nullopt;
380 
381  SymbolRef LSym = Lhs.getAsSymbol();
382  if (!LSym)
383  return std::nullopt;
384 
386  SingleTy = LSym->getType();
387  if (ResultTy != SVB.getConditionType())
388  return std::nullopt;
389  // Initialize SingleTy later with a symbol's type.
390  } else if (BinaryOperator::isAdditiveOp(Op)) {
391  SingleTy = ResultTy;
392  if (LSym->getType() != SingleTy)
393  return std::nullopt;
394  } else {
395  // Don't rearrange other operations.
396  return std::nullopt;
397  }
398 
399  assert(!SingleTy.isNull() && "We should have figured out the type by now!");
400 
401  // Rearrange signed symbolic expressions only
402  if (!SingleTy->isSignedIntegerOrEnumerationType())
403  return std::nullopt;
404 
405  SymbolRef RSym = Rhs.getAsSymbol();
406  if (!RSym || RSym->getType() != SingleTy)
407  return std::nullopt;
408 
409  BasicValueFactory &BV = State->getBasicVals();
410  llvm::APSInt LInt, RInt;
411  std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
412  std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
413  if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
414  !shouldRearrange(State, Op, RSym, RInt, SingleTy))
415  return std::nullopt;
416 
417  // We know that no overflows can occur anymore.
418  return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
419 }
420 
421 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
423  NonLoc lhs, NonLoc rhs,
424  QualType resultTy) {
425  NonLoc InputLHS = lhs;
426  NonLoc InputRHS = rhs;
427 
428  // Constraints may have changed since the creation of a bound SVal. Check if
429  // the values can be simplified based on those new constraints.
430  SVal simplifiedLhs = simplifySVal(state, lhs);
431  SVal simplifiedRhs = simplifySVal(state, rhs);
432  if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
433  lhs = *simplifiedLhsAsNonLoc;
434  if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
435  rhs = *simplifiedRhsAsNonLoc;
436 
437  // Handle trivial case where left-side and right-side are the same.
438  if (lhs == rhs)
439  switch (op) {
440  default:
441  break;
442  case BO_EQ:
443  case BO_LE:
444  case BO_GE:
445  return makeTruthVal(true, resultTy);
446  case BO_LT:
447  case BO_GT:
448  case BO_NE:
449  return makeTruthVal(false, resultTy);
450  case BO_Xor:
451  case BO_Sub:
452  if (resultTy->isIntegralOrEnumerationType())
453  return makeIntVal(0, resultTy);
454  return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
455  QualType{});
456  case BO_Or:
457  case BO_And:
458  return evalCast(lhs, resultTy, QualType{});
459  }
460 
461  while (true) {
462  switch (lhs.getKind()) {
463  default:
464  return makeSymExprValNN(op, lhs, rhs, resultTy);
465  case nonloc::PointerToMemberKind: {
466  assert(rhs.getKind() == nonloc::PointerToMemberKind &&
467  "Both SVals should have pointer-to-member-type");
468  auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
469  RPTM = rhs.castAs<nonloc::PointerToMember>();
470  auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
471  switch (op) {
472  case BO_EQ:
473  return makeTruthVal(LPTMD == RPTMD, resultTy);
474  case BO_NE:
475  return makeTruthVal(LPTMD != RPTMD, resultTy);
476  default:
477  return UnknownVal();
478  }
479  }
480  case nonloc::LocAsIntegerKind: {
481  Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
482  switch (rhs.getKind()) {
483  case nonloc::LocAsIntegerKind:
484  // FIXME: at the moment the implementation
485  // of modeling "pointers as integers" is not complete.
487  return UnknownVal();
488  return evalBinOpLL(state, op, lhsL,
490  resultTy);
491  case nonloc::ConcreteIntKind: {
492  // FIXME: at the moment the implementation
493  // of modeling "pointers as integers" is not complete.
495  return UnknownVal();
496  // Transform the integer into a location and compare.
497  // FIXME: This only makes sense for comparisons. If we want to, say,
498  // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
499  // then pack it back into a LocAsInteger.
500  llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
501  // If the region has a symbolic base, pay attention to the type; it
502  // might be coming from a non-default address space. For non-symbolic
503  // regions it doesn't matter that much because such comparisons would
504  // most likely evaluate to concrete false anyway. FIXME: We might
505  // still need to handle the non-comparison case.
506  if (SymbolRef lSym = lhs.getAsLocSymbol(true))
507  BasicVals.getAPSIntType(lSym->getType()).apply(i);
508  else
509  BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
510  return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
511  }
512  default:
513  switch (op) {
514  case BO_EQ:
515  return makeTruthVal(false, resultTy);
516  case BO_NE:
517  return makeTruthVal(true, resultTy);
518  default:
519  // This case also handles pointer arithmetic.
520  return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
521  }
522  }
523  }
524  case nonloc::ConcreteIntKind: {
525  llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
526 
527  // If we're dealing with two known constants, just perform the operation.
528  if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) {
529  llvm::APSInt RHSValue = *KnownRHSValue;
531  // We're looking for a type big enough to compare the two values.
532  // FIXME: This is not correct. char + short will result in a promotion
533  // to int. Unfortunately we have lost types by this point.
534  APSIntType CompareType = std::max(APSIntType(LHSValue),
535  APSIntType(RHSValue));
536  CompareType.apply(LHSValue);
537  CompareType.apply(RHSValue);
538  } else if (!BinaryOperator::isShiftOp(op)) {
539  APSIntType IntType = BasicVals.getAPSIntType(resultTy);
540  IntType.apply(LHSValue);
541  IntType.apply(RHSValue);
542  }
543 
544  const llvm::APSInt *Result =
545  BasicVals.evalAPSInt(op, LHSValue, RHSValue);
546  if (!Result) {
547  if (op == BO_Shl || op == BO_Shr) {
548  // FIXME: At this point the constant folding claims that the result
549  // of a bitwise shift is undefined. However, constant folding
550  // relies on the inaccurate type information that is stored in the
551  // bit size of APSInt objects, and if we reached this point, then
552  // the checker core.BitwiseShift already determined that the shift
553  // is valid (in a PreStmt callback, by querying the real type from
554  // the AST node).
555  // To avoid embarrassing false positives, let's just say that we
556  // don't know anything about the result of the shift.
557  return UnknownVal();
558  }
559  return UndefinedVal();
560  }
561 
562  return nonloc::ConcreteInt(*Result);
563  }
564 
565  // Swap the left and right sides and flip the operator if doing so
566  // allows us to better reason about the expression (this is a form
567  // of expression canonicalization).
568  // While we're at it, catch some special cases for non-commutative ops.
569  switch (op) {
570  case BO_LT:
571  case BO_GT:
572  case BO_LE:
573  case BO_GE:
575  [[fallthrough]];
576  case BO_EQ:
577  case BO_NE:
578  case BO_Add:
579  case BO_Mul:
580  case BO_And:
581  case BO_Xor:
582  case BO_Or:
583  std::swap(lhs, rhs);
584  continue;
585  case BO_Shr:
586  // (~0)>>a
587  if (LHSValue.isAllOnes() && LHSValue.isSigned())
588  return evalCast(lhs, resultTy, QualType{});
589  [[fallthrough]];
590  case BO_Shl:
591  // 0<<a and 0>>a
592  if (LHSValue == 0)
593  return evalCast(lhs, resultTy, QualType{});
594  return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
595  case BO_Div:
596  // 0 / x == 0
597  case BO_Rem:
598  // 0 % x == 0
599  if (LHSValue == 0)
600  return makeZeroVal(resultTy);
601  [[fallthrough]];
602  default:
603  return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
604  }
605  }
606  case nonloc::SymbolValKind: {
607  // We only handle LHS as simple symbols or SymIntExprs.
608  SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
609 
610  // LHS is a symbolic expression.
611  if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
612 
613  // Is this a logical not? (!x is represented as x == 0.)
614  if (op == BO_EQ && rhs.isZeroConstant()) {
615  // We know how to negate certain expressions. Simplify them here.
616 
617  BinaryOperator::Opcode opc = symIntExpr->getOpcode();
618  switch (opc) {
619  default:
620  // We don't know how to negate this operation.
621  // Just handle it as if it were a normal comparison to 0.
622  break;
623  case BO_LAnd:
624  case BO_LOr:
625  llvm_unreachable("Logical operators handled by branching logic.");
626  case BO_Assign:
627  case BO_MulAssign:
628  case BO_DivAssign:
629  case BO_RemAssign:
630  case BO_AddAssign:
631  case BO_SubAssign:
632  case BO_ShlAssign:
633  case BO_ShrAssign:
634  case BO_AndAssign:
635  case BO_XorAssign:
636  case BO_OrAssign:
637  case BO_Comma:
638  llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
639  case BO_PtrMemD:
640  case BO_PtrMemI:
641  llvm_unreachable("Pointer arithmetic not handled here.");
642  case BO_LT:
643  case BO_GT:
644  case BO_LE:
645  case BO_GE:
646  case BO_EQ:
647  case BO_NE:
648  assert(resultTy->isBooleanType() ||
649  resultTy == getConditionType());
650  assert(symIntExpr->getType()->isBooleanType() ||
651  getContext().hasSameUnqualifiedType(symIntExpr->getType(),
652  getConditionType()));
653  // Negate the comparison and make a value.
655  return makeNonLoc(symIntExpr->getLHS(), opc,
656  symIntExpr->getRHS(), resultTy);
657  }
658  }
659 
660  // For now, only handle expressions whose RHS is a constant.
661  if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) {
662  // If both the LHS and the current expression are additive,
663  // fold their constants and try again.
665  BinaryOperator::Opcode lop = symIntExpr->getOpcode();
666  if (BinaryOperator::isAdditiveOp(lop)) {
667  // Convert the two constants to a common type, then combine them.
668 
669  // resultTy may not be the best type to convert to, but it's
670  // probably the best choice in expressions with mixed type
671  // (such as x+1U+2LL). The rules for implicit conversions should
672  // choose a reasonable type to preserve the expression, and will
673  // at least match how the value is going to be used.
674  APSIntType IntType = BasicVals.getAPSIntType(resultTy);
675  const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
676  const llvm::APSInt &second = IntType.convert(*RHSValue);
677 
678  // If the op and lop agrees, then we just need to
679  // sum the constants. Otherwise, we change to operation
680  // type if substraction would produce negative value
681  // (and cause overflow for unsigned integers),
682  // as consequence x+1U-10 produces x-9U, instead
683  // of x+4294967287U, that would be produced without this
684  // additional check.
685  const llvm::APSInt *newRHS;
686  if (lop == op) {
687  newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
688  } else if (first >= second) {
689  newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
690  op = lop;
691  } else {
692  newRHS = BasicVals.evalAPSInt(BO_Sub, second, first);
693  }
694 
695  assert(newRHS && "Invalid operation despite common type!");
696  rhs = nonloc::ConcreteInt(*newRHS);
697  lhs = nonloc::SymbolVal(symIntExpr->getLHS());
698  continue;
699  }
700  }
701 
702  // Otherwise, make a SymIntExpr out of the expression.
703  return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
704  }
705  }
706 
707  // Is the RHS a constant?
708  if (const llvm::APSInt *RHSValue = getConstValue(state, rhs))
709  return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
710 
711  if (std::optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
712  return *V;
713 
714  // Give up -- this is not a symbolic expression we can handle.
715  return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
716  }
717  }
718  }
719 }
720 
722  const FieldRegion *RightFR,
724  QualType resultTy,
725  SimpleSValBuilder &SVB) {
726  // Only comparisons are meaningful here!
728  return UnknownVal();
729 
730  // Next, see if the two FRs have the same super-region.
731  // FIXME: This doesn't handle casts yet, and simply stripping the casts
732  // doesn't help.
733  if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
734  return UnknownVal();
735 
736  const FieldDecl *LeftFD = LeftFR->getDecl();
737  const FieldDecl *RightFD = RightFR->getDecl();
738  const RecordDecl *RD = LeftFD->getParent();
739 
740  // Make sure the two FRs are from the same kind of record. Just in case!
741  // FIXME: This is probably where inheritance would be a problem.
742  if (RD != RightFD->getParent())
743  return UnknownVal();
744 
745  // We know for sure that the two fields are not the same, since that
746  // would have given us the same SVal.
747  if (op == BO_EQ)
748  return SVB.makeTruthVal(false, resultTy);
749  if (op == BO_NE)
750  return SVB.makeTruthVal(true, resultTy);
751 
752  // Iterate through the fields and see which one comes first.
753  // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
754  // members and the units in which bit-fields reside have addresses that
755  // increase in the order in which they are declared."
756  bool leftFirst = (op == BO_LT || op == BO_LE);
757  for (const auto *I : RD->fields()) {
758  if (I == LeftFD)
759  return SVB.makeTruthVal(leftFirst, resultTy);
760  if (I == RightFD)
761  return SVB.makeTruthVal(!leftFirst, resultTy);
762  }
763 
764  llvm_unreachable("Fields not found in parent record's definition");
765 }
766 
767 // This is used in debug builds only for now because some downstream users
768 // may hit this assert in their subsequent merges.
769 // There are still places in the analyzer where equal bitwidth Locs
770 // are compared, and need to be found and corrected. Recent previous fixes have
771 // addressed the known problems of making NULLs with specific bitwidths
772 // for Loc comparisons along with deprecation of APIs for the same purpose.
773 //
775  Loc LhsLoc) {
776  // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
777  ASTContext &Ctx = State->getStateManager().getContext();
778  uint64_t RhsBitwidth =
779  RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
780  uint64_t LhsBitwidth =
781  LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
782  if (RhsBitwidth && LhsBitwidth && (LhsLoc.getKind() == RhsLoc.getKind())) {
783  assert(RhsBitwidth == LhsBitwidth &&
784  "RhsLoc and LhsLoc bitwidth must be same!");
785  }
786 }
787 
788 // FIXME: all this logic will change if/when we have MemRegion::getLocation().
789 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
791  Loc lhs, Loc rhs,
792  QualType resultTy) {
793 
794  // Assert that bitwidth of lhs and rhs are the same.
795  // This can happen if two different address spaces are used,
796  // and the bitwidths of the address spaces are different.
797  // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
798  // FIXME: See comment above in the function assertEqualBitWidths
799  assertEqualBitWidths(state, rhs, lhs);
800 
801  // Only comparisons and subtractions are valid operations on two pointers.
802  // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
803  // However, if a pointer is casted to an integer, evalBinOpNN may end up
804  // calling this function with another operation (PR7527). We don't attempt to
805  // model this for now, but it could be useful, particularly when the
806  // "location" is actually an integer value that's been passed through a void*.
807  if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
808  return UnknownVal();
809 
810  // Special cases for when both sides are identical.
811  if (lhs == rhs) {
812  switch (op) {
813  default:
814  llvm_unreachable("Unimplemented operation for two identical values");
815  case BO_Sub:
816  return makeZeroVal(resultTy);
817  case BO_EQ:
818  case BO_LE:
819  case BO_GE:
820  return makeTruthVal(true, resultTy);
821  case BO_NE:
822  case BO_LT:
823  case BO_GT:
824  return makeTruthVal(false, resultTy);
825  }
826  }
827 
828  switch (lhs.getKind()) {
829  default:
830  llvm_unreachable("Ordering not implemented for this Loc.");
831 
832  case loc::GotoLabelKind:
833  // The only thing we know about labels is that they're non-null.
834  if (rhs.isZeroConstant()) {
835  switch (op) {
836  default:
837  break;
838  case BO_Sub:
839  return evalCast(lhs, resultTy, QualType{});
840  case BO_EQ:
841  case BO_LE:
842  case BO_LT:
843  return makeTruthVal(false, resultTy);
844  case BO_NE:
845  case BO_GT:
846  case BO_GE:
847  return makeTruthVal(true, resultTy);
848  }
849  }
850  // There may be two labels for the same location, and a function region may
851  // have the same address as a label at the start of the function (depending
852  // on the ABI).
853  // FIXME: we can probably do a comparison against other MemRegions, though.
854  // FIXME: is there a way to tell if two labels refer to the same location?
855  return UnknownVal();
856 
857  case loc::ConcreteIntKind: {
858  auto L = lhs.castAs<loc::ConcreteInt>();
859 
860  // If one of the operands is a symbol and the other is a constant,
861  // build an expression for use by the constraint manager.
862  if (SymbolRef rSym = rhs.getAsLocSymbol()) {
863  // We can only build expressions with symbols on the left,
864  // so we need a reversible operator.
865  if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
866  return UnknownVal();
867 
869  return makeNonLoc(rSym, op, L.getValue(), resultTy);
870  }
871 
872  // If both operands are constants, just perform the operation.
873  if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
874  assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
875 
876  if (const auto *ResultInt =
877  BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue()))
878  return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
879  return UnknownVal();
880  }
881 
882  // Special case comparisons against NULL.
883  // This must come after the test if the RHS is a symbol, which is used to
884  // build constraints. The address of any non-symbolic region is guaranteed
885  // to be non-NULL, as is any label.
886  assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
887  if (lhs.isZeroConstant()) {
888  switch (op) {
889  default:
890  break;
891  case BO_EQ:
892  case BO_GT:
893  case BO_GE:
894  return makeTruthVal(false, resultTy);
895  case BO_NE:
896  case BO_LT:
897  case BO_LE:
898  return makeTruthVal(true, resultTy);
899  }
900  }
901 
902  // Comparing an arbitrary integer to a region or label address is
903  // completely unknowable.
904  return UnknownVal();
905  }
906  case loc::MemRegionValKind: {
907  if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
908  // If one of the operands is a symbol and the other is a constant,
909  // build an expression for use by the constraint manager.
910  if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
912  return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
913  return UnknownVal();
914  }
915  // Special case comparisons to NULL.
916  // This must come after the test if the LHS is a symbol, which is used to
917  // build constraints. The address of any non-symbolic region is guaranteed
918  // to be non-NULL.
919  if (rInt->isZeroConstant()) {
920  if (op == BO_Sub)
921  return evalCast(lhs, resultTy, QualType{});
922 
924  QualType boolType = getContext().BoolTy;
925  NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
926  NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
927  return evalBinOpNN(state, op, l, r, resultTy);
928  }
929  }
930 
931  // Comparing a region to an arbitrary integer is completely unknowable.
932  return UnknownVal();
933  }
934 
935  // Get both values as regions, if possible.
936  const MemRegion *LeftMR = lhs.getAsRegion();
937  assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
938 
939  const MemRegion *RightMR = rhs.getAsRegion();
940  if (!RightMR)
941  // The RHS is probably a label, which in theory could address a region.
942  // FIXME: we can probably make a more useful statement about non-code
943  // regions, though.
944  return UnknownVal();
945 
946  const MemRegion *LeftBase = LeftMR->getBaseRegion();
947  const MemRegion *RightBase = RightMR->getBaseRegion();
948  const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
949  const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
950  const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
951 
952  // If the two regions are from different known memory spaces they cannot be
953  // equal. Also, assume that no symbolic region (whose memory space is
954  // unknown) is on the stack.
955  if (LeftMS != RightMS &&
956  ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
957  (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
958  switch (op) {
959  default:
960  return UnknownVal();
961  case BO_EQ:
962  return makeTruthVal(false, resultTy);
963  case BO_NE:
964  return makeTruthVal(true, resultTy);
965  }
966  }
967 
968  // If both values wrap regions, see if they're from different base regions.
969  // Note, heap base symbolic regions are assumed to not alias with
970  // each other; for example, we assume that malloc returns different address
971  // on each invocation.
972  // FIXME: ObjC object pointers always reside on the heap, but currently
973  // we treat their memory space as unknown, because symbolic pointers
974  // to ObjC objects may alias. There should be a way to construct
975  // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
976  // guesses memory space for ObjC object pointers manually instead of
977  // relying on us.
978  if (LeftBase != RightBase &&
979  ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
980  (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
981  switch (op) {
982  default:
983  return UnknownVal();
984  case BO_EQ:
985  return makeTruthVal(false, resultTy);
986  case BO_NE:
987  return makeTruthVal(true, resultTy);
988  }
989  }
990 
991  // Handle special cases for when both regions are element regions.
992  const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
993  const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
994  if (RightER && LeftER) {
995  // Next, see if the two ERs have the same super-region and matching types.
996  // FIXME: This should do something useful even if the types don't match,
997  // though if both indexes are constant the RegionRawOffset path will
998  // give the correct answer.
999  if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1000  LeftER->getElementType() == RightER->getElementType()) {
1001  // Get the left index and cast it to the correct type.
1002  // If the index is unknown or undefined, bail out here.
1003  SVal LeftIndexVal = LeftER->getIndex();
1004  std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1005  if (!LeftIndex)
1006  return UnknownVal();
1007  LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
1008  LeftIndex = LeftIndexVal.getAs<NonLoc>();
1009  if (!LeftIndex)
1010  return UnknownVal();
1011 
1012  // Do the same for the right index.
1013  SVal RightIndexVal = RightER->getIndex();
1014  std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1015  if (!RightIndex)
1016  return UnknownVal();
1017  RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
1018  RightIndex = RightIndexVal.getAs<NonLoc>();
1019  if (!RightIndex)
1020  return UnknownVal();
1021 
1022  // Actually perform the operation.
1023  // evalBinOpNN expects the two indexes to already be the right type.
1024  return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
1025  }
1026  }
1027 
1028  // Special handling of the FieldRegions, even with symbolic offsets.
1029  const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1030  const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1031  if (RightFR && LeftFR) {
1032  SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1033  *this);
1034  if (!R.isUnknown())
1035  return R;
1036  }
1037 
1038  // Compare the regions using the raw offsets.
1039  RegionOffset LeftOffset = LeftMR->getAsOffset();
1040  RegionOffset RightOffset = RightMR->getAsOffset();
1041 
1042  if (LeftOffset.getRegion() != nullptr &&
1043  LeftOffset.getRegion() == RightOffset.getRegion() &&
1044  !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1045  int64_t left = LeftOffset.getOffset();
1046  int64_t right = RightOffset.getOffset();
1047 
1048  switch (op) {
1049  default:
1050  return UnknownVal();
1051  case BO_LT:
1052  return makeTruthVal(left < right, resultTy);
1053  case BO_GT:
1054  return makeTruthVal(left > right, resultTy);
1055  case BO_LE:
1056  return makeTruthVal(left <= right, resultTy);
1057  case BO_GE:
1058  return makeTruthVal(left >= right, resultTy);
1059  case BO_EQ:
1060  return makeTruthVal(left == right, resultTy);
1061  case BO_NE:
1062  return makeTruthVal(left != right, resultTy);
1063  }
1064  }
1065 
1066  // At this point we're not going to get a good answer, but we can try
1067  // conjuring an expression instead.
1068  SymbolRef LHSSym = lhs.getAsLocSymbol();
1069  SymbolRef RHSSym = rhs.getAsLocSymbol();
1070  if (LHSSym && RHSSym)
1071  return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1072 
1073  // If we get here, we have no way of comparing the regions.
1074  return UnknownVal();
1075  }
1076  }
1077 }
1078 
1079 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1080  BinaryOperator::Opcode op, Loc lhs,
1081  NonLoc rhs, QualType resultTy) {
1082  if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1083  if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1084  if (PTMSV->isNullMemberPointer())
1085  return UndefinedVal();
1086 
1087  auto getFieldLValue = [&](const auto *FD) -> SVal {
1088  SVal Result = lhs;
1089 
1090  for (const auto &I : *PTMSV)
1091  Result = StateMgr.getStoreManager().evalDerivedToBase(
1092  Result, I->getType(), I->isVirtual());
1093 
1094  return state->getLValue(FD, Result);
1095  };
1096 
1097  if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1098  return getFieldLValue(FD);
1099  }
1100  if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1101  return getFieldLValue(FD);
1102  }
1103  }
1104 
1105  return rhs;
1106  }
1107 
1108  assert(!BinaryOperator::isComparisonOp(op) &&
1109  "arguments to comparison ops must be of the same type");
1110 
1111  // Special case: rhs is a zero constant.
1112  if (rhs.isZeroConstant())
1113  return lhs;
1114 
1115  // Perserve the null pointer so that it can be found by the DerefChecker.
1116  if (lhs.isZeroConstant())
1117  return lhs;
1118 
1119  // We are dealing with pointer arithmetic.
1120 
1121  // Handle pointer arithmetic on constant values.
1122  if (std::optional<nonloc::ConcreteInt> rhsInt =
1123  rhs.getAs<nonloc::ConcreteInt>()) {
1124  if (std::optional<loc::ConcreteInt> lhsInt =
1125  lhs.getAs<loc::ConcreteInt>()) {
1126  const llvm::APSInt &leftI = lhsInt->getValue();
1127  assert(leftI.isUnsigned());
1128  llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1129 
1130  // Convert the bitwidth of rightI. This should deal with overflow
1131  // since we are dealing with concrete values.
1132  rightI = rightI.extOrTrunc(leftI.getBitWidth());
1133 
1134  // Offset the increment by the pointer size.
1135  llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1136  QualType pointeeType = resultTy->getPointeeType();
1137  Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1138  rightI *= Multiplicand;
1139 
1140  // Compute the adjusted pointer.
1141  switch (op) {
1142  case BO_Add:
1143  rightI = leftI + rightI;
1144  break;
1145  case BO_Sub:
1146  rightI = leftI - rightI;
1147  break;
1148  default:
1149  llvm_unreachable("Invalid pointer arithmetic operation");
1150  }
1151  return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1152  }
1153  }
1154 
1155  // Handle cases where 'lhs' is a region.
1156  if (const MemRegion *region = lhs.getAsRegion()) {
1157  rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1158  SVal index = UnknownVal();
1159  const SubRegion *superR = nullptr;
1160  // We need to know the type of the pointer in order to add an integer to it.
1161  // Depending on the type, different amount of bytes is added.
1162  QualType elementType;
1163 
1164  if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1165  assert(op == BO_Add || op == BO_Sub);
1166  index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1167  getArrayIndexType());
1168  superR = cast<SubRegion>(elemReg->getSuperRegion());
1169  elementType = elemReg->getElementType();
1170  }
1171  else if (isa<SubRegion>(region)) {
1172  assert(op == BO_Add || op == BO_Sub);
1173  index = (op == BO_Add) ? rhs : evalMinus(rhs);
1174  superR = cast<SubRegion>(region);
1175  // TODO: Is this actually reliable? Maybe improving our MemRegion
1176  // hierarchy to provide typed regions for all non-void pointers would be
1177  // better. For instance, we cannot extend this towards LocAsInteger
1178  // operations, where result type of the expression is integer.
1179  if (resultTy->isAnyPointerType())
1180  elementType = resultTy->getPointeeType();
1181  }
1182 
1183  // Represent arithmetic on void pointers as arithmetic on char pointers.
1184  // It is fine when a TypedValueRegion of char value type represents
1185  // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1186  if (elementType->isVoidType())
1187  elementType = getContext().CharTy;
1188 
1189  if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1190  return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1191  superR, getContext()));
1192  }
1193  }
1194  return UnknownVal();
1195 }
1196 
1197 const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1198  SVal V) {
1199  if (const llvm::APSInt *Res = getConcreteValue(V))
1200  return Res;
1201 
1202  if (SymbolRef Sym = V.getAsSymbol())
1203  return state->getConstraintManager().getSymVal(state, Sym);
1204 
1205  return nullptr;
1206 }
1207 
1209  if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1210  return &X->getValue();
1211 
1212  if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1213  return &X->getValue();
1214 
1215  return nullptr;
1216 }
1217 
1218 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1219  SVal V) {
1220  return getConstValue(state, simplifySVal(state, V));
1221 }
1222 
1223 const llvm::APSInt *SimpleSValBuilder::getMinValue(ProgramStateRef state,
1224  SVal V) {
1225  V = simplifySVal(state, V);
1226 
1227  if (const llvm::APSInt *Res = getConcreteValue(V))
1228  return Res;
1229 
1230  if (SymbolRef Sym = V.getAsSymbol())
1231  return state->getConstraintManager().getSymMinVal(state, Sym);
1232 
1233  return nullptr;
1234 }
1235 
1236 const llvm::APSInt *SimpleSValBuilder::getMaxValue(ProgramStateRef state,
1237  SVal V) {
1238  V = simplifySVal(state, V);
1239 
1240  if (const llvm::APSInt *Res = getConcreteValue(V))
1241  return Res;
1242 
1243  if (SymbolRef Sym = V.getAsSymbol())
1244  return state->getConstraintManager().getSymMaxVal(state, Sym);
1245 
1246  return nullptr;
1247 }
1248 
1249 SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1250  SVal SimplifiedVal = simplifySValOnce(State, Val);
1251  while (SimplifiedVal != Val) {
1252  Val = SimplifiedVal;
1253  SimplifiedVal = simplifySValOnce(State, Val);
1254  }
1255  return SimplifiedVal;
1256 }
1257 
1258 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1259  return simplifyUntilFixpoint(State, V);
1260 }
1261 
1262 SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1263  // For now, this function tries to constant-fold symbols inside a
1264  // nonloc::SymbolVal, and does nothing else. More simplifications should
1265  // be possible, such as constant-folding an index in an ElementRegion.
1266 
1267  class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1269  SValBuilder &SVB;
1270 
1271  // Cache results for the lifetime of the Simplifier. Results change every
1272  // time new constraints are added to the program state, which is the whole
1273  // point of simplifying, and for that very reason it's pointless to maintain
1274  // the same cache for the duration of the whole analysis.
1275  llvm::DenseMap<SymbolRef, SVal> Cached;
1276 
1277  static bool isUnchanged(SymbolRef Sym, SVal Val) {
1278  return Sym == Val.getAsSymbol();
1279  }
1280 
1281  SVal cache(SymbolRef Sym, SVal V) {
1282  Cached[Sym] = V;
1283  return V;
1284  }
1285 
1286  SVal skip(SymbolRef Sym) {
1287  return cache(Sym, SVB.makeSymbolVal(Sym));
1288  }
1289 
1290  // Return the known const value for the Sym if available, or return Undef
1291  // otherwise.
1292  SVal getConst(SymbolRef Sym) {
1293  const llvm::APSInt *Const =
1294  State->getConstraintManager().getSymVal(State, Sym);
1295  if (Const)
1296  return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1297  : (SVal)SVB.makeIntVal(*Const);
1298  return UndefinedVal();
1299  }
1300 
1301  SVal getConstOrVisit(SymbolRef Sym) {
1302  const SVal Ret = getConst(Sym);
1303  if (Ret.isUndef())
1304  return Visit(Sym);
1305  return Ret;
1306  }
1307 
1308  public:
1309  Simplifier(ProgramStateRef State)
1310  : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1311 
1312  SVal VisitSymbolData(const SymbolData *S) {
1313  // No cache here.
1314  if (const llvm::APSInt *I =
1315  State->getConstraintManager().getSymVal(State, S))
1316  return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1317  : (SVal)SVB.makeIntVal(*I);
1318  return SVB.makeSymbolVal(S);
1319  }
1320 
1321  SVal VisitSymIntExpr(const SymIntExpr *S) {
1322  auto I = Cached.find(S);
1323  if (I != Cached.end())
1324  return I->second;
1325 
1326  SVal LHS = getConstOrVisit(S->getLHS());
1327  if (isUnchanged(S->getLHS(), LHS))
1328  return skip(S);
1329 
1330  SVal RHS;
1331  // By looking at the APSInt in the right-hand side of S, we cannot
1332  // figure out if it should be treated as a Loc or as a NonLoc.
1333  // So make our guess by recalling that we cannot multiply pointers
1334  // or compare a pointer to an integer.
1335  if (Loc::isLocType(S->getLHS()->getType()) &&
1336  BinaryOperator::isComparisonOp(S->getOpcode())) {
1337  // The usual conversion of $sym to &SymRegion{$sym}, as they have
1338  // the same meaning for Loc-type symbols, but the latter form
1339  // is preferred in SVal computations for being Loc itself.
1340  if (SymbolRef Sym = LHS.getAsSymbol()) {
1341  assert(Loc::isLocType(Sym->getType()));
1342  LHS = SVB.makeLoc(Sym);
1343  }
1344  RHS = SVB.makeIntLocVal(S->getRHS());
1345  } else {
1346  RHS = SVB.makeIntVal(S->getRHS());
1347  }
1348 
1349  return cache(
1350  S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1351  }
1352 
1353  SVal VisitIntSymExpr(const IntSymExpr *S) {
1354  auto I = Cached.find(S);
1355  if (I != Cached.end())
1356  return I->second;
1357 
1358  SVal RHS = getConstOrVisit(S->getRHS());
1359  if (isUnchanged(S->getRHS(), RHS))
1360  return skip(S);
1361 
1362  SVal LHS = SVB.makeIntVal(S->getLHS());
1363  return cache(
1364  S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1365  }
1366 
1367  SVal VisitSymSymExpr(const SymSymExpr *S) {
1368  auto I = Cached.find(S);
1369  if (I != Cached.end())
1370  return I->second;
1371 
1372  // For now don't try to simplify mixed Loc/NonLoc expressions
1373  // because they often appear from LocAsInteger operations
1374  // and we don't know how to combine a LocAsInteger
1375  // with a concrete value.
1376  if (Loc::isLocType(S->getLHS()->getType()) !=
1377  Loc::isLocType(S->getRHS()->getType()))
1378  return skip(S);
1379 
1380  SVal LHS = getConstOrVisit(S->getLHS());
1381  SVal RHS = getConstOrVisit(S->getRHS());
1382 
1383  if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1384  return skip(S);
1385 
1386  return cache(
1387  S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1388  }
1389 
1390  SVal VisitSymbolCast(const SymbolCast *S) {
1391  auto I = Cached.find(S);
1392  if (I != Cached.end())
1393  return I->second;
1394  const SymExpr *OpSym = S->getOperand();
1395  SVal OpVal = getConstOrVisit(OpSym);
1396  if (isUnchanged(OpSym, OpVal))
1397  return skip(S);
1398 
1399  return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType()));
1400  }
1401 
1402  SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1403  auto I = Cached.find(S);
1404  if (I != Cached.end())
1405  return I->second;
1406  SVal Op = getConstOrVisit(S->getOperand());
1407  if (isUnchanged(S->getOperand(), Op))
1408  return skip(S);
1409 
1410  return cache(
1411  S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType()));
1412  }
1413 
1414  SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1415 
1416  SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1417 
1418  SVal VisitSymbolVal(nonloc::SymbolVal V) {
1419  // Simplification is much more costly than computing complexity.
1420  // For high complexity, it may be not worth it.
1421  return Visit(V.getSymbol());
1422  }
1423 
1424  SVal VisitSVal(SVal V) { return V; }
1425  };
1426 
1427  SVal SimplifiedV = Simplifier(State).Visit(V);
1428  return SimplifiedV;
1429 }
#define V(N, I)
Definition: ASTContext.h:3299
static std::optional< int64_t > getConcreteValue(NonLoc SV)
llvm::APSInt APSInt
#define X(type, name)
Definition: Value.h:143
static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, llvm::APSInt Bound, ProgramStateRef State)
static NonLoc doRearrangeUnchecked(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef LSym, llvm::APSInt LInt, SymbolRef RSym, llvm::APSInt RInt)
static bool isNegationValuePreserving(const llvm::APSInt &Value, APSIntType ResultType)
static std::pair< SymbolRef, llvm::APSInt > decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV)
static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, SymbolRef Sym, llvm::APSInt Int, QualType Ty)
static std::optional< NonLoc > tryRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, NonLoc Lhs, NonLoc Rhs, QualType ResultTy)
static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, const FieldRegion *RightFR, BinaryOperator::Opcode op, QualType resultTy, SimpleSValBuilder &SVB)
static bool isWithinConstantOverflowBounds(SymbolRef Sym, ProgramStateRef State)
static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, Loc LhsLoc)
LineState State
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:185
CanQualType VoidPtrTy
Definition: ASTContext.h:1121
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2355
bool isComparisonOp() const
Definition: Expr.h:3992
static Opcode negateComparisonOp(Opcode Opc)
Definition: Expr.h:3997
static Opcode reverseComparisonOp(Opcode Opc)
Definition: Expr.h:4010
bool isShiftOp() const
Definition: Expr.h:3980
bool isAdditiveOp() const
Definition: Expr.h:3978
Represents a member of a struct/union/class.
Definition: Decl.h:3060
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition: Decl.h:3273
Represents a field injected from an anonymous union/struct into the parent scope.
Definition: Decl.h:3344
A (possibly-)qualified type.
Definition: Type.h:940
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1007
Represents a struct/union/class.
Definition: Decl.h:4171
field_range fields() const
Definition: Decl.h:4377
bool isVoidType() const
Definition: Type.h:7939
bool isBooleanType() const
Definition: Type.h:8067
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2166
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8054
bool isAnyPointerType() const
Definition: Type.h:7628
A record of the "type" of an APSInt, used for conversions.
Definition: APSIntType.h:19
bool isUnsigned() const
Definition: APSIntType.h:31
uint32_t getBitWidth() const
Definition: APSIntType.h:30
llvm::APSInt getMaxValue() const LLVM_READONLY
Returns the maximum value for this type.
Definition: APSIntType.h:65
void apply(llvm::APSInt &Value) const
Convert a given APSInt, in place, to match this type.
Definition: APSIntType.h:37
llvm::APSInt convert(const llvm::APSInt &Value) const LLVM_READONLY
Convert and return a new APSInt with the given value, but this type's bit width and signedness.
Definition: APSIntType.h:48
llvm::APSInt getValue(uint64_t RawValue) const LLVM_READONLY
Definition: APSIntType.h:69
APSIntType getAPSIntType(QualType T) const
Returns the type of the APSInt used to store values of the given QualType.
Template implementation for all binary symbolic expressions.
ElementRegion is used to represent both array elements and casts.
Definition: MemRegion.h:1194
QualType getElementType() const
Definition: MemRegion.h:1218
NonLoc getIndex() const
Definition: MemRegion.h:1214
LLVM_ATTRIBUTE_RETURNS_NONNULL const FieldDecl * getDecl() const override
Definition: MemRegion.h:1120
FullSValVisitor - a convenient mixed visitor for all three: SVal, SymExpr and MemRegion subclasses.
Definition: SValVisitor.h:130
static bool isLocType(QualType T)
Definition: SVals.h:259
MemRegion - The root abstract class for all memory regions.
Definition: MemRegion.h:96
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemSpaceRegion * getMemorySpace() const
Definition: MemRegion.cpp:1317
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
Definition: MemRegion.cpp:1651
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Definition: MemRegion.cpp:1343
MemSpaceRegion - A memory region that represents a "memory space"; for example, the set of global var...
Definition: MemRegion.h:203
Represent a region's offset within the top level base region.
Definition: MemRegion.h:63
bool hasSymbolicOffset() const
Definition: MemRegion.h:81
int64_t getOffset() const
Definition: MemRegion.h:83
const MemRegion * getRegion() const
It might return null.
Definition: MemRegion.h:79
const AnalyzerOptions & getAnalyzerOptions() const
Definition: SValBuilder.h:170
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
Definition: SValBuilder.h:290
BasicValueFactory & getBasicValueFactory()
Definition: SValBuilder.h:161
loc::MemRegionVal makeLoc(SymbolRef sym)
Definition: SValBuilder.h:377
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
DefinedSVal makeSymbolVal(SymbolRef Sym)
Make an SVal that represents the given symbol.
Definition: SValBuilder.h:400
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
Definition: SValBuilder.h:153
SVal evalUnaryOp(ProgramStateRef state, UnaryOperator::Opcode opc, SVal operand, QualType type)
SymbolManager & getSymbolManager()
Definition: SValBuilder.h:164
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
loc::ConcreteInt makeIntLocVal(const llvm::APSInt &integer)
Definition: SValBuilder.h:306
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition: SVals.h:55
bool isZeroConstant() const
Definition: SVals.cpp:258
SValKind getKind() const
Definition: SVals.h:90
SymbolRef getAsSymbol(bool IncludeBaseRegions=false) const
If this SVal wraps a symbol return that SymbolRef.
Definition: SVals.cpp:104
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
Definition: SVals.cpp:181
SymbolRef getAsLocSymbol(bool IncludeBaseRegions=false) const
If this SVal is a location and wraps a symbol, return that SymbolRef.
Definition: SVals.cpp:68
const MemRegion * getAsRegion() const
Definition: SVals.cpp:120
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition: SVals.h:86
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition: SVals.h:82
bool isUnknown() const
Definition: SVals.h:102
SubRegion - A region that subsets another larger region.
Definition: MemRegion.h:441
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition: MemRegion.h:454
Symbolic value.
Definition: SymExpr.h:30
virtual QualType getType() const =0
Represents a cast expression.
A symbol representing data which can be stored in a memory location (region).
Definition: SymExpr.h:119
const SymIntExpr * getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const llvm::APSInt &rhs, QualType t)
const SymSymExpr * getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, QualType t)
Represents a symbolic expression involving a unary operator.
Value representing integer constant.
Definition: SVals.h:297
Value representing pointer-to-member.
Definition: SVals.h:382
const PTMDataType getPTMData() const
Definition: SVals.h:389
Represents symbolic expression that isn't a location.
Definition: SVals.h:276
SValBuilder * createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr)
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:218
bool Const(InterpState &S, CodePtr OpPC, const T &Arg)
Definition: Interp.h:940
The JSON file list parser is used to communicate input to InstallAPI.
BinaryOperatorKind
const FunctionProtoType * T
unsigned long uint64_t
long int64_t