29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/Support/YAMLTraits.h"
38 #define DEBUG_TYPE "taint-checker"
40 using namespace clang;
42 using namespace taint;
44 using llvm::ImmutableSet;
48 class GenericTaintChecker;
51 constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52 "Untrusted data is used as a format string "
53 "(CWE-134: Uncontrolled Format String)";
58 constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59 "Untrusted data is passed to a system call "
60 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
63 constexpr llvm::StringLiteral MsgCustomSink =
64 "Untrusted data is passed to a user-defined sink";
70 constexpr ArgIdxTy ReturnValueIndex{-1};
72 static ArgIdxTy fromArgumentCount(
unsigned Count) {
75 "ArgIdxTy is not large enough to represent the number of arguments.");
86 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.
getAsRegion());
92 dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
98 if (
const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
99 D = D->getCanonicalDecl();
100 if (D->getName() ==
"stdin" && D->hasExternalStorage() && D->isExternC()) {
102 const QualType Ty = D->getType().getCanonicalType();
114 return State->getSVal(LValue);
118 return State->getSVal(LValue,
State->getStateManager().getContext().CharTy);
124 return getPointeeOf(
State, *LValue);
133 if (
auto Pointee = getPointeeOf(
State, Arg))
143 return getTaintedPointeeOrPointer(
State, ExprSVal).has_value();
150 std::vector<SymbolRef> TaintedSymbols,
151 std::vector<ArgIdxTy> TaintedArgs,
153 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
154 TaintedArgs = std::move(TaintedArgs), CallLocation](
162 if (TaintedSymbols.empty())
163 return "Taint originated here";
165 for (
auto Sym : TaintedSymbols) {
168 LLVM_DEBUG(
for (
auto Arg
170 llvm::dbgs() <<
"Taint Propagated from argument " << Arg + 1 <<
"\n";
179 const NoteTag *taintPropagationExplainerTag(
181 std::vector<ArgIdxTy> TaintedArgs,
const LocationContext *CallLocation) {
182 assert(TaintedSymbols.size() == TaintedArgs.size());
183 return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
184 TaintedArgs = std::move(TaintedArgs), CallLocation](
187 llvm::raw_svector_ostream Out(Msg);
189 if (TaintedSymbols.empty() ||
193 int nofTaintedArgs = 0;
194 for (
auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
197 if (TaintedArgs[Idx] != ReturnValueIndex) {
198 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to argument "
199 << TaintedArgs[Idx] + 1 <<
"\n");
200 if (nofTaintedArgs == 0)
201 Out <<
"Taint propagated to the ";
204 Out << TaintedArgs[Idx] + 1
205 << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) <<
" argument";
208 LLVM_DEBUG(llvm::dbgs() <<
"Taint Propagated to return value.\n");
209 Out <<
"Taint propagated to the return value";
213 return std::string(Out.str());
223 ArgSet(ArgVecTy &&DiscreteArgs,
224 std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
225 : DiscreteArgs(std::move(DiscreteArgs)),
226 VariadicIndex(std::move(VariadicIndex)) {}
228 bool contains(ArgIdxTy ArgIdx)
const {
229 if (llvm::is_contained(DiscreteArgs, ArgIdx))
232 return VariadicIndex && ArgIdx >= *VariadicIndex;
235 bool isEmpty()
const {
return DiscreteArgs.empty() && !VariadicIndex; }
238 ArgVecTy DiscreteArgs;
239 std::optional<ArgIdxTy> VariadicIndex;
247 class GenericTaintRule {
260 std::optional<StringRef> SinkMsg;
262 GenericTaintRule() =
default;
264 GenericTaintRule(ArgSet &&Sink, ArgSet &&
Filter, ArgSet &&Src, ArgSet &&Dst,
265 std::optional<StringRef> SinkMsg = std::nullopt)
266 : SinkArgs(std::move(Sink)), FilterArgs(std::move(
Filter)),
267 PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
273 static GenericTaintRule Sink(ArgSet &&SinkArgs,
274 std::optional<StringRef> Msg = std::nullopt) {
275 return {std::move(SinkArgs), {}, {}, {}, Msg};
279 static GenericTaintRule
Filter(ArgSet &&FilterArgs) {
280 return {{}, std::move(FilterArgs), {}, {}};
285 static GenericTaintRule Source(ArgSet &&SourceArgs) {
286 return {{}, {}, {}, std::move(SourceArgs)};
290 static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
291 return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
300 static const Expr *GetArgExpr(ArgIdxTy ArgIdx,
const CallEvent &Call) {
301 return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()
302 : Call.getArgExpr(ArgIdx);
312 struct TaintConfiguration {
313 using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
314 enum class VariadicType { None, Src, Dst };
321 struct Sink : Common {
329 struct Propagation : Common {
332 VariadicType VarType;
336 std::vector<Propagation> Propagations;
337 std::vector<Filter> Filters;
338 std::vector<Sink> Sinks;
340 TaintConfiguration() =
default;
341 TaintConfiguration(
const TaintConfiguration &) =
default;
342 TaintConfiguration(TaintConfiguration &&) =
default;
343 TaintConfiguration &operator=(
const TaintConfiguration &) =
default;
344 TaintConfiguration &operator=(TaintConfiguration &&) =
default;
347 struct GenericTaintRuleParser {
352 using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
354 TaintConfiguration &&Config)
const;
361 void validateArgVector(
const std::string &Option,
const ArgVecTy &Args)
const;
363 template <
typename Config>
static NamePartsTy parseNameParts(
const Config &C);
367 template <
typename Config>
368 static void consumeRulesFromConfig(
const Config &C, GenericTaintRule &&Rule,
371 void parseConfig(
const std::string &Option, TaintConfiguration::Sink &&
P,
372 RulesContTy &Rules)
const;
374 RulesContTy &Rules)
const;
375 void parseConfig(
const std::string &Option,
376 TaintConfiguration::Propagation &&
P,
377 RulesContTy &Rules)
const;
382 class GenericTaintChecker :
public Checker<check::PreCall, check::PostCall> {
388 const char *Sep)
const override;
391 bool generateReportIfTainted(
const Expr *E, StringRef Msg,
397 bool checkUncontrolledFormatString(
const CallEvent &Call,
400 void taintUnsafeSocketProtocol(
const CallEvent &Call,
411 mutable std::optional<RuleLookupTy> StaticTaintRules;
412 mutable std::optional<RuleLookupTy> DynamicTaintRules;
417 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
419 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
423 template <>
struct MappingTraits<TaintConfiguration> {
424 static void mapping(IO &IO, TaintConfiguration &Config) {
425 IO.mapOptional(
"Propagations", Config.Propagations);
426 IO.mapOptional(
"Filters", Config.Filters);
427 IO.mapOptional(
"Sinks", Config.Sinks);
431 template <>
struct MappingTraits<TaintConfiguration::Sink> {
432 static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
433 IO.mapRequired(
"Name", Sink.Name);
434 IO.mapOptional(
"Scope", Sink.Scope);
435 IO.mapRequired(
"Args", Sink.SinkArgs);
439 template <>
struct MappingTraits<TaintConfiguration::
Filter> {
441 IO.mapRequired(
"Name",
Filter.Name);
442 IO.mapOptional(
"Scope",
Filter.Scope);
443 IO.mapRequired(
"Args",
Filter.FilterArgs);
447 template <>
struct MappingTraits<TaintConfiguration::Propagation> {
448 static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
449 IO.mapRequired(
"Name", Propagation.Name);
450 IO.mapOptional(
"Scope", Propagation.Scope);
451 IO.mapOptional(
"SrcArgs", Propagation.SrcArgs);
452 IO.mapOptional(
"DstArgs", Propagation.DstArgs);
453 IO.mapOptional(
"VariadicType", Propagation.VarType);
454 IO.mapOptional(
"VariadicIndex", Propagation.VarIndex);
458 template <>
struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
461 IO.enumCase(
Value,
"Src", TaintConfiguration::VariadicType::Src);
462 IO.enumCase(
Value,
"Dst", TaintConfiguration::VariadicType::Dst);
473 ImmutableSet<ArgIdxTy>)
476 void GenericTaintRuleParser::validateArgVector(
const std::string &Option,
477 const ArgVecTy &Args)
const {
478 for (ArgIdxTy Arg : Args) {
479 if (Arg < ReturnValueIndex) {
480 Mgr.reportInvalidCheckerOptionValue(
481 Mgr.getChecker<GenericTaintChecker>(), Option,
482 "an argument number for propagation rules greater or equal to -1");
487 template <
typename Config>
489 GenericTaintRuleParser::parseNameParts(
const Config &C) {
490 NamePartsTy NameParts;
491 if (!
C.Scope.empty()) {
494 StringRef{
C.Scope}.split(NameParts,
"::", -1,
497 NameParts.emplace_back(
C.Name);
501 template <
typename Config>
502 void GenericTaintRuleParser::consumeRulesFromConfig(
const Config &C,
503 GenericTaintRule &&Rule,
504 RulesContTy &Rules) {
505 NamePartsTy NameParts = parseNameParts(C);
510 void GenericTaintRuleParser::parseConfig(
const std::string &Option,
511 TaintConfiguration::Sink &&S,
512 RulesContTy &Rules)
const {
513 validateArgVector(Option, S.SinkArgs);
514 consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
518 void GenericTaintRuleParser::parseConfig(
const std::string &Option,
520 RulesContTy &Rules)
const {
521 validateArgVector(Option, S.FilterArgs);
526 void GenericTaintRuleParser::parseConfig(
const std::string &Option,
527 TaintConfiguration::Propagation &&
P,
528 RulesContTy &Rules)
const {
529 validateArgVector(Option,
P.SrcArgs);
530 validateArgVector(Option,
P.DstArgs);
531 bool IsSrcVariadic =
P.VarType == TaintConfiguration::VariadicType::Src;
532 bool IsDstVariadic =
P.VarType == TaintConfiguration::VariadicType::Dst;
533 std::optional<ArgIdxTy> JustVarIndex =
P.VarIndex;
535 ArgSet SrcDesc(std::move(
P.SrcArgs),
536 IsSrcVariadic ? JustVarIndex : std::nullopt);
537 ArgSet DstDesc(std::move(
P.DstArgs),
538 IsDstVariadic ? JustVarIndex : std::nullopt);
540 consumeRulesFromConfig(
541 P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
544 GenericTaintRuleParser::RulesContTy
546 TaintConfiguration &&Config)
const {
550 for (
auto &F : Config.Filters)
551 parseConfig(Option, std::move(F), Rules);
553 for (
auto &S : Config.Sinks)
554 parseConfig(Option, std::move(S), Rules);
556 for (
auto &
P : Config.Propagations)
557 parseConfig(Option, std::move(
P), Rules);
562 void GenericTaintChecker::initTaintRules(
CheckerContext &C)
const {
566 if (StaticTaintRules || DynamicTaintRules)
569 using RulesConstructionTy =
570 std::vector<std::pair<CallDescription, GenericTaintRule>>;
571 using TR = GenericTaintRule;
573 RulesConstructionTy GlobalCRules{
575 {{
CDM::CLibrary, {
"fdopen"}}, TR::Source({{ReturnValueIndex}})},
576 {{
CDM::CLibrary, {
"fopen"}}, TR::Source({{ReturnValueIndex}})},
577 {{
CDM::CLibrary, {
"freopen"}}, TR::Source({{ReturnValueIndex}})},
578 {{
CDM::CLibrary, {
"getch"}}, TR::Source({{ReturnValueIndex}})},
579 {{
CDM::CLibrary, {
"getchar"}}, TR::Source({{ReturnValueIndex}})},
580 {{
CDM::CLibrary, {
"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
581 {{
CDM::CLibrary, {
"gets"}}, TR::Source({{0, ReturnValueIndex}})},
582 {{
CDM::CLibrary, {
"gets_s"}}, TR::Source({{0, ReturnValueIndex}})},
585 {{
CDM::CLibrary, {
"wgetch"}}, TR::Source({{ReturnValueIndex}})},
591 {{
CDM::CLibrary, {
"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
592 {{
CDM::CLibrary, {
"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
593 {{
CDM::CLibrary, {
"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
594 {{
CDM::CLibrary, {
"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
595 {{
CDM::CLibrary, {
"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
597 TR::Source({{ReturnValueIndex}})},
600 {{
CDM::CLibrary, {
"getseuserbyname"}}, TR::Source({{1, 2}})},
601 {{
CDM::CLibrary, {
"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
602 {{
CDM::CLibrary, {
"getlogin"}}, TR::Source({{ReturnValueIndex}})},
606 {{
CDM::CLibrary, {
"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
607 {{
CDM::CLibrary, {
"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
608 {{
CDM::CLibrary, {
"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
609 {{
CDM::CLibrary, {
"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
610 {{
CDM::CLibrary, {
"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
611 {{
CDM::CLibrary, {
"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
613 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
615 TR::Prop({{2}}, {{0, ReturnValueIndex}})},
621 {{
CDM::CLibrary, {
"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
623 TR::Prop({{0}}, {{ReturnValueIndex}})},
629 {{
CDM::CLibrary, {
"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
631 TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
633 TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
635 TR::Prop({{3}}, {{0, ReturnValueIndex}})},
637 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
639 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
641 {{
CDM::CLibrary, {
"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
643 TR::Prop({{0}}, {{1, ReturnValueIndex}})},
645 {{
CDM::CLibrary, {
"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646 {{
CDM::CLibrary, {
"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
647 {{
CDM::CLibrary, {
"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
649 {{
CDM::CLibrary, {
"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
650 {{
CDM::CLibrary, {
"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
651 {{
CDM::CLibrary, {
"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
654 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
656 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
658 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
665 {{
CDM::CLibrary, {
"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
666 {{
CDM::CLibrary, {
"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
667 {{
CDM::CLibrary, {
"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
672 TR::Prop({{0}}, {{ReturnValueIndex}})},
674 TR::Prop({{0}}, {{ReturnValueIndex}})},
675 {{
CDM::CLibrary, {
"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
677 TR::Prop({{0}}, {{ReturnValueIndex}})},
679 TR::Prop({{0}}, {{ReturnValueIndex}})},
681 TR::Prop({{0}}, {{ReturnValueIndex}})},
682 {{
CDM::CLibrary, {
"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
683 {{
CDM::CLibrary, {
"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
690 {{
CDM::CLibrary, {
"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
692 TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
694 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
696 TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
697 {{
CDM::CLibrary, {
"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
698 {{
CDM::CLibrary, {
"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
699 {{
CDM::CLibrary, {
"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
701 {{
CDM::CLibrary, {
"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
702 {{
CDM::CLibrary, {
"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
703 {{
CDM::CLibrary, {
"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
704 {{
CDM::CLibrary, {
"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
705 {{
CDM::CLibrary, {
"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710 {{
CDM::CLibrary, {
"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
711 {{
CDM::CLibrary, {
"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
712 {{
CDM::CLibrary, {
"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
713 {{
CDM::CLibrary, {
"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
715 {{
CDM::CLibrary, {
"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716 {{
CDM::CLibrary, {
"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
718 {{
CDM::CLibrary, {
"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
719 {{
CDM::CLibrary, {
"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
720 {{
CDM::CLibrary, {
"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
721 {{
CDM::CLibrary, {
"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
722 {{
CDM::CLibrary, {
"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
723 {{
CDM::CLibrary, {
"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
724 {{
CDM::CLibrary, {
"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
725 {{
CDM::CLibrary, {
"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
726 {{
CDM::CLibrary, {
"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
727 {{
CDM::CLibrary, {
"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
728 {{
CDM::CLibrary, {
"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
729 {{
CDM::CLibrary, {
"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
730 {{
CDM::CLibrary, {
"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
733 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
735 TR::Prop({{1}}, {{0, ReturnValueIndex}})},
737 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
739 TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
741 TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
743 TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})},
754 TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})},
757 TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
761 TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})},
765 TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})},
768 {{
CDM::CLibrary, {
"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
769 {{
CDM::CLibrary, {
"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
770 {{
CDM::CLibrary, {
"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
771 {{
CDM::CLibrary, {
"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
772 {{
CDM::CLibrary, {
"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},
773 {{
CDM::CLibrary, {
"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
775 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
777 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
778 {{
CDM::CLibrary, {
"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
780 TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
781 {{
CDM::CLibrary, {
"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
790 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
792 TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};
794 if (TR::UntrustedEnv(C)) {
797 GlobalCRules.push_back({{
CDM::CLibrary, {
"setproctitle_init"}},
798 TR::Sink({{1, 2}}, MsgCustomSink)});
801 GlobalCRules.push_back(
802 {{
CDM::CLibrary, {
"getenv"}}, TR::Source({{ReturnValueIndex}})});
805 StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
806 std::make_move_iterator(GlobalCRules.end()));
811 GenericTaintRuleParser ConfigParser{*Mgr};
812 std::string Option{
"Config"};
813 StringRef ConfigFile =
815 std::optional<TaintConfiguration> Config =
816 getConfiguration<TaintConfiguration>(*Mgr,
this, Option, ConfigFile);
819 DynamicTaintRules = RuleLookupTy{};
823 GenericTaintRuleParser::RulesContTy Rules{
824 ConfigParser.parseConfiguration(Option, std::move(*Config))};
826 DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
827 std::make_move_iterator(Rules.end()));
830 void GenericTaintChecker::checkPreCall(
const CallEvent &Call,
835 if (
const auto *Rule =
836 Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) :
nullptr)
837 Rule->process(*
this, Call, C);
838 else if (
const auto *Rule = DynamicTaintRules->lookup(Call))
839 Rule->process(*
this, Call, C);
846 checkUncontrolledFormatString(Call, C);
850 taintUnsafeSocketProtocol(Call, C);
853 void GenericTaintChecker::checkPostCall(
const CallEvent &Call,
863 TaintArgsOnPostVisitTy TaintArgsMap =
State->get<TaintArgsOnPostVisit>();
865 const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
868 assert(!TaintArgs->isEmpty());
870 LLVM_DEBUG(
for (ArgIdxTy I
872 llvm::dbgs() <<
"PostCall<";
873 Call.dump(llvm::dbgs());
874 llvm::dbgs() <<
"> actually wants to taint arg index: " << I <<
'\n';
877 const NoteTag *InjectionTag =
nullptr;
878 std::vector<SymbolRef> TaintedSymbols;
879 std::vector<ArgIdxTy> TaintedIndexes;
880 for (ArgIdxTy ArgNum : *TaintArgs) {
882 if (ArgNum == ReturnValueIndex) {
884 std::vector<SymbolRef> TaintedSyms =
886 if (!TaintedSyms.empty()) {
887 TaintedSymbols.push_back(TaintedSyms[0]);
888 TaintedIndexes.push_back(ArgNum);
894 if (
auto V = getPointeeOf(
State,
Call.getArgSVal(ArgNum))) {
897 if (!TaintedSyms.empty()) {
898 TaintedSymbols.push_back(TaintedSyms[0]);
899 TaintedIndexes.push_back(ArgNum);
905 InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes,
906 Call.getCalleeStackFrame(0));
908 State =
State->remove<TaintArgsOnPostVisit>(CurrentFrame);
909 C.addTransition(
State, InjectionTag);
913 const char *NL,
const char *Sep)
const {
917 void GenericTaintRule::process(
const GenericTaintChecker &
Checker,
920 const ArgIdxTy CallNumArgs = fromArgumentCount(
Call.getNumArgs());
923 const auto ForEachCallArg = [&
C, &
Call, CallNumArgs](
auto &&Fun) {
924 for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
925 const Expr *E = GetArgExpr(I, Call);
926 Fun(I, E,
C.getSVal(E));
933 if (isStdin(
C.getSVal(E),
C.getASTContext())) {
934 State = addTaint(State, C.getSVal(E));
936 if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(
State,
C.getSVal(E)))
937 Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
941 ForEachCallArg([
this, &
State](ArgIdxTy I,
const Expr *E,
SVal S) {
942 if (FilterArgs.contains(I)) {
943 State = removeTaint(State, S);
944 if (auto P = getPointeeOf(State, S))
945 State = removeTaint(State, *P);
954 bool IsMatching = PropSrcArgs.isEmpty();
955 std::vector<SymbolRef> TaintedSymbols;
956 std::vector<ArgIdxTy> TaintedIndexes;
957 ForEachCallArg([
this, &C, &IsMatching, &
State, &TaintedSymbols,
958 &TaintedIndexes](ArgIdxTy I,
const Expr *E,
SVal) {
959 std::optional<SVal> TaintedSVal =
960 getTaintedPointeeOrPointer(
State,
C.getSVal(E));
962 IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
965 if (TaintedSVal && !isStdin(*TaintedSVal,
C.getASTContext())) {
966 std::vector<SymbolRef> TaintedArgSyms =
968 if (!TaintedArgSyms.empty()) {
969 llvm::append_range(TaintedSymbols, TaintedArgSyms);
970 TaintedIndexes.push_back(I);
984 const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
985 const bool IsNonConstPtr =
986 Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
988 return IsNonConstRef || IsNonConstPtr;
992 auto &F =
State->getStateManager().get_context<ArgIdxFactory>();
993 ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
996 if (PropDstArgs.contains(I)) {
997 LLVM_DEBUG(llvm::dbgs() <<
"PreCall<"; Call.dump(llvm::dbgs());
999 <<
"> prepares tainting arg index: " << I <<
'\n';);
1000 Result = F.add(Result, I);
1009 if (WouldEscape(
V, E->
getType()) && getTaintedPointeeOrPointer(
State,
V)) {
1010 LLVM_DEBUG(
if (!Result.contains(I)) {
1011 llvm::dbgs() <<
"PreCall<";
1012 Call.dump(llvm::dbgs());
1013 llvm::dbgs() <<
"> prepares tainting arg index: " << I <<
'\n';
1015 Result = F.
add(Result, I);
1019 if (!Result.isEmpty())
1020 State =
State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1021 const NoteTag *InjectionTag = taintOriginTrackerTag(
1022 C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1023 Call.getCalleeStackFrame(0));
1024 C.addTransition(
State, InjectionTag);
1028 return !
C.getAnalysisManager()
1029 .getAnalyzerOptions()
1030 .ShouldAssumeControlledEnvironment;
1033 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E, StringRef Msg,
1036 std::optional<SVal> TaintedSVal =
1037 getTaintedPointeeOrPointer(
C.getState(),
C.getSVal(E));
1044 auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
1047 report->markInteresting(TaintedSym);
1050 C.emitReport(std::move(report));
1068 const Decl *CallDecl = Call.getDecl();
1075 const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
1078 ArgNum = Format->getFormatIdx() - 1;
1079 if ((Format->getType()->getName() ==
"printf") && CallNumArgs > ArgNum)
1086 bool GenericTaintChecker::checkUncontrolledFormatString(
1089 ArgIdxTy ArgNum = 0;
1095 return generateReportIfTainted(
Call.getArgExpr(ArgNum),
1096 MsgUncontrolledFormatString, C);
1099 void GenericTaintChecker::taintUnsafeSocketProtocol(
const CallEvent &Call,
1101 if (
Call.getNumArgs() < 1)
1106 if (
ID->getName() !=
"socket")
1110 StringRef DomName =
C.getMacroNameOrSpelling(DomLoc);
1112 bool SafeProtocol = DomName ==
"AF_SYSTEM" || DomName ==
"AF_LOCAL" ||
1113 DomName ==
"AF_UNIX" || DomName ==
"AF_RESERVED_36";
1118 auto &F =
State->getStateManager().get_context<ArgIdxFactory>();
1119 ImmutableSet<ArgIdxTy> Result = F.
add(F.getEmptySet(), ReturnValueIndex);
1120 State =
State->set<TaintArgsOnPostVisit>(
C.getStackFrame(), Result);
1129 bool ento::shouldRegisterGenericTaintChecker(
const CheckerManager &mgr) {
Defines enum values for all the target-independent builtin functions.
static bool getPrintfFormatArgumentNum(const CallEvent &Call, const CheckerContext &C, ArgIdxTy &ArgNum)
TODO: remove checking for printf format attributes and socket whitelisting from GenericTaintChecker,...
REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, ImmutableSet< ArgIdxTy >) void GenericTaintRuleParser
A set which is used to pass information from call pre-visit instruction to the call post-visit.
#define REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set type Name and registers the factory for such sets in the program state,...
static bool contains(const std::set< tok::TokenKind > &Terminators, const Token &Tok)
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
QualType getFILEType() const
Retrieve the C FILE type.
StringRef getCheckerStringOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents=false) const
Query an option's string value.
Decl - This represents one declaration (or definition), e.g.
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
This represents one expression.
Represents a function declaration or definition.
One of these records is kept for each identifier that is lexed.
It wraps the AnalysisDeclContext to represent both the call stack with the help of StackFrameContext ...
A (possibly-)qualified type.
QualType getCanonicalType() const
Scope - A scope is a transient data structure that is used while parsing the program.
Encodes a location in the source.
It represents a stack frame of the call stack (based on CallEvent).
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
bool isPointerType() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
const BugType & getBugType() const
StringRef getCategory() const
An immutable map from CallDescriptions to arbitrary data.
A CallDescription is a pattern that can be used to match calls based on the qualified name and the ar...
@ CLibrary
Match calls to functions from the C standard library.
@ Unspecified
Match any CallEvent that is not an ObjCMethodCall.
@ CLibraryMaybeHardened
An extended version of the CLibrary mode that also matches the hardened variants like __FOO_chk() and...
Represents an abstract call to a function or method along a particular path.
const AnalyzerOptions & getAnalyzerOptions() const
CHECKER * registerChecker(AT &&... Args)
Used to register checkers.
The tag upon which the TagVisitor reacts.
void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind=bugreporter::TrackingKind::Thorough)
Marks a symbol as interesting.
bool isInteresting(SymbolRef sym) const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
QualType getType(const ASTContext &) const
Try to get a reasonable type for the given value.
const MemRegion * getAsRegion() const
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
constexpr XRayInstrMask None
const char *const TaintedData
ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
std::vector< SymbolRef > getTaintedSymbols(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Returns the tainted Symbols for a given Statement and state.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\n", const char *sep="")
bool Call(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize)
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
for(const auto &A :T->param_types())
Diagnostic wrappers for TextAPI types for error reporting.
static void mapping(IO &IO, TaintConfiguration &Config)
static void mapping(IO &IO, TaintConfiguration::Filter &Filter)
static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation)
static void mapping(IO &IO, TaintConfiguration::Sink &Sink)
static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value)