clang  20.0.0git
ScanfFormatString.cpp
Go to the documentation of this file.
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in scanf and friends. The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
17 
26 using namespace clang;
27 
30 
33  const char *&Beg, const char *E) {
34  const char *I = Beg;
35  const char *start = I - 1;
36  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37 
38  // No more characters?
39  if (I == E) {
40  H.HandleIncompleteScanList(start, I);
41  return true;
42  }
43 
44  // Special case: ']' is the first character.
45  if (*I == ']') {
46  if (++I == E) {
47  H.HandleIncompleteScanList(start, I - 1);
48  return true;
49  }
50  }
51 
52  // Special case: "^]" are the first characters.
53  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54  I += 2;
55  if (I == E) {
56  H.HandleIncompleteScanList(start, I - 1);
57  return true;
58  }
59  }
60 
61  // Look for a ']' character which denotes the end of the scan list.
62  while (*I != ']') {
63  if (++I == E) {
64  H.HandleIncompleteScanList(start, I - 1);
65  return true;
66  }
67  }
68 
69  CS.setEndScanList(I);
70  return false;
71 }
72 
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
76  const char *&Beg,
77  const char *E,
78  unsigned &argIndex,
79  const LangOptions &LO,
80  const TargetInfo &Target) {
81  using namespace clang::analyze_format_string;
82  using namespace clang::analyze_scanf;
83  const char *I = Beg;
84  const char *Start = nullptr;
85  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86 
87  // Look for a '%' character that indicates the start of a format specifier.
88  for ( ; I != E ; ++I) {
89  char c = *I;
90  if (c == '\0') {
91  // Detect spurious null characters, which are likely errors.
92  H.HandleNullChar(I);
93  return true;
94  }
95  if (c == '%') {
96  Start = I++; // Record the start of the format specifier.
97  break;
98  }
99  }
100 
101  // No format specifier found?
102  if (!Start)
103  return false;
104 
105  if (I == E) {
106  // No more characters left?
107  H.HandleIncompleteSpecifier(Start, E - Start);
108  return true;
109  }
110 
111  ScanfSpecifier FS;
112  if (ParseArgPosition(H, FS, Start, I, E))
113  return true;
114 
115  if (I == E) {
116  // No more characters left?
117  H.HandleIncompleteSpecifier(Start, E - Start);
118  return true;
119  }
120 
121  // Look for '*' flag if it is present.
122  if (*I == '*') {
123  FS.setSuppressAssignment(I);
124  if (++I == E) {
125  H.HandleIncompleteSpecifier(Start, E - Start);
126  return true;
127  }
128  }
129 
130  // Look for the field width (if any). Unlike printf, this is either
131  // a fixed integer or isn't present.
134  assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135  FS.setFieldWidth(Amt);
136 
137  if (I == E) {
138  // No more characters left?
139  H.HandleIncompleteSpecifier(Start, E - Start);
140  return true;
141  }
142  }
143 
144  // Look for the length modifier.
145  if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146  // No more characters left?
147  H.HandleIncompleteSpecifier(Start, E - Start);
148  return true;
149  }
150 
151  // Detect spurious null characters, which are likely errors.
152  if (*I == '\0') {
153  H.HandleNullChar(I);
154  return true;
155  }
156 
157  // Finally, look for the conversion specifier.
158  const char *conversionPosition = I++;
159  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160  switch (*conversionPosition) {
161  default:
162  break;
163  case '%': k = ConversionSpecifier::PercentArg; break;
164  case 'b': k = ConversionSpecifier::bArg; break;
165  case 'A': k = ConversionSpecifier::AArg; break;
166  case 'E': k = ConversionSpecifier::EArg; break;
167  case 'F': k = ConversionSpecifier::FArg; break;
168  case 'G': k = ConversionSpecifier::GArg; break;
169  case 'X': k = ConversionSpecifier::XArg; break;
170  case 'a': k = ConversionSpecifier::aArg; break;
171  case 'd': k = ConversionSpecifier::dArg; break;
172  case 'e': k = ConversionSpecifier::eArg; break;
173  case 'f': k = ConversionSpecifier::fArg; break;
174  case 'g': k = ConversionSpecifier::gArg; break;
175  case 'i': k = ConversionSpecifier::iArg; break;
176  case 'n': k = ConversionSpecifier::nArg; break;
177  case 'c': k = ConversionSpecifier::cArg; break;
178  case 'C': k = ConversionSpecifier::CArg; break;
179  case 'S': k = ConversionSpecifier::SArg; break;
180  case '[': k = ConversionSpecifier::ScanListArg; break;
181  case 'u': k = ConversionSpecifier::uArg; break;
182  case 'x': k = ConversionSpecifier::xArg; break;
183  case 'o': k = ConversionSpecifier::oArg; break;
184  case 's': k = ConversionSpecifier::sArg; break;
185  case 'p': k = ConversionSpecifier::pArg; break;
186  // Apple extensions
187  // Apple-specific
188  case 'D':
189  if (Target.getTriple().isOSDarwin())
191  break;
192  case 'O':
193  if (Target.getTriple().isOSDarwin())
195  break;
196  case 'U':
197  if (Target.getTriple().isOSDarwin())
199  break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203  if (ParseScanList(H, CS, I, E))
204  return true;
205  }
206  FS.setConversionSpecifier(CS);
207  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208  && !FS.usesPositionalArg())
209  FS.setArgIndex(argIndex++);
210 
211  // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215  unsigned Len = I - Beg;
216  if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217  CS.setEndScanList(Beg + Len);
218  FS.setConversionSpecifier(CS);
219  }
220  // Assume the conversion takes one argument.
221  return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222  }
223  return ScanfSpecifierResult(Start, FS);
224 }
225 
226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227  const ScanfConversionSpecifier &CS = getConversionSpecifier();
228 
229  if (!CS.consumesDataArgument())
230  return ArgType::Invalid();
231 
232  switch(CS.getKind()) {
233  // Signed int.
237  switch (LM.getKind()) {
239  return ArgType::PtrTo(Ctx.IntTy);
243  return ArgType::PtrTo(Ctx.ShortTy);
245  return ArgType::PtrTo(Ctx.LongTy);
248  return ArgType::PtrTo(Ctx.LongLongTy);
250  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
252  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
254  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
256  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
258  // GNU extension.
259  return ArgType::PtrTo(Ctx.LongLongTy);
266  return ArgType::Invalid();
267  }
268  llvm_unreachable("Unsupported LengthModifier Type");
269 
270  // Unsigned int.
278  switch (LM.getKind()) {
280  return ArgType::PtrTo(Ctx.UnsignedIntTy);
282  return ArgType::PtrTo(Ctx.UnsignedCharTy);
284  return ArgType::PtrTo(Ctx.UnsignedShortTy);
286  return ArgType::PtrTo(Ctx.UnsignedLongTy);
291  return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
293  return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
295  return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
297  return ArgType::PtrTo(
298  ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
300  // GNU extension.
308  return ArgType::Invalid();
309  }
310  llvm_unreachable("Unsupported LengthModifier Type");
311 
312  // Float.
321  switch (LM.getKind()) {
323  return ArgType::PtrTo(Ctx.FloatTy);
325  return ArgType::PtrTo(Ctx.DoubleTy);
327  return ArgType::PtrTo(Ctx.LongDoubleTy);
328  default:
329  return ArgType::Invalid();
330  }
331 
332  // Char, string and scanlist.
336  switch (LM.getKind()) {
341  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
346  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
348  [[fallthrough]];
349  default:
350  return ArgType::Invalid();
351  }
354  // FIXME: Mac OS X specific?
355  switch (LM.getKind()) {
358  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
361  return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
363  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
365  [[fallthrough]];
366  default:
367  return ArgType::Invalid();
368  }
369 
370  // Pointer.
373 
374  // Write-back.
376  switch (LM.getKind()) {
378  return ArgType::PtrTo(Ctx.IntTy);
380  return ArgType::PtrTo(Ctx.SignedCharTy);
382  return ArgType::PtrTo(Ctx.ShortTy);
384  return ArgType::PtrTo(Ctx.LongTy);
387  return ArgType::PtrTo(Ctx.LongLongTy);
389  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
391  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
393  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
395  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
397  return ArgType(); // FIXME: Is this a known extension?
404  return ArgType::Invalid();
405  }
406 
407  default:
408  break;
409  }
410 
411  return ArgType();
412 }
413 
414 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
415  const LangOptions &LangOpt,
416  ASTContext &Ctx) {
417 
418  // %n is different from other conversion specifiers; don't try to fix it.
419  if (CS.getKind() == ConversionSpecifier::nArg)
420  return false;
421 
422  if (!QT->isPointerType())
423  return false;
424 
425  QualType PT = QT->getPointeeType();
426 
427  // If it's an enum, get its underlying type.
428  if (const EnumType *ETy = PT->getAs<EnumType>()) {
429  // Don't try to fix incomplete enums.
430  if (!ETy->getDecl()->isComplete())
431  return false;
432  PT = ETy->getDecl()->getIntegerType();
433  }
434 
435  const BuiltinType *BT = PT->getAs<BuiltinType>();
436  if (!BT)
437  return false;
438 
439  // Pointer to a character.
440  if (PT->isAnyCharacterType()) {
441  CS.setKind(ConversionSpecifier::sArg);
442  if (PT->isWideCharType())
443  LM.setKind(LengthModifier::AsWideChar);
444  else
445  LM.setKind(LengthModifier::None);
446 
447  // If we know the target array length, we can use it as a field width.
448  if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
449  if (CAT->getSizeModifier() == ArraySizeModifier::Normal)
451  CAT->getZExtSize() - 1, "", 0, false);
452  }
453  return true;
454  }
455 
456  // Figure out the length modifier.
457  switch (BT->getKind()) {
458  // no modifier
459  case BuiltinType::UInt:
460  case BuiltinType::Int:
461  case BuiltinType::Float:
462  LM.setKind(LengthModifier::None);
463  break;
464 
465  // hh
466  case BuiltinType::Char_U:
467  case BuiltinType::UChar:
468  case BuiltinType::Char_S:
469  case BuiltinType::SChar:
470  LM.setKind(LengthModifier::AsChar);
471  break;
472 
473  // h
474  case BuiltinType::Short:
475  case BuiltinType::UShort:
476  LM.setKind(LengthModifier::AsShort);
477  break;
478 
479  // l
480  case BuiltinType::Long:
481  case BuiltinType::ULong:
482  case BuiltinType::Double:
483  LM.setKind(LengthModifier::AsLong);
484  break;
485 
486  // ll
487  case BuiltinType::LongLong:
488  case BuiltinType::ULongLong:
489  LM.setKind(LengthModifier::AsLongLong);
490  break;
491 
492  // L
493  case BuiltinType::LongDouble:
494  LM.setKind(LengthModifier::AsLongDouble);
495  break;
496 
497  // Don't know.
498  default:
499  return false;
500  }
501 
502  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503  if (LangOpt.C99 || LangOpt.CPlusPlus11)
504  namedTypeToLengthModifier(PT, LM);
505 
506  // If fixing the length modifier was enough, we are done.
507  if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508  const analyze_scanf::ArgType &AT = getArgType(Ctx);
509  if (AT.isValid() && AT.matchesType(Ctx, QT))
510  return true;
511  }
512 
513  // Figure out the conversion specifier.
514  if (PT->isRealFloatingType())
515  CS.setKind(ConversionSpecifier::fArg);
516  else if (PT->isSignedIntegerType())
517  CS.setKind(ConversionSpecifier::dArg);
518  else if (PT->isUnsignedIntegerType())
519  CS.setKind(ConversionSpecifier::uArg);
520  else
521  llvm_unreachable("Unexpected type");
522 
523  return true;
524 }
525 
526 void ScanfSpecifier::toString(raw_ostream &os) const {
527  os << "%";
528 
529  if (usesPositionalArg())
530  os << getPositionalArgIndex() << "$";
531  if (SuppressAssignment)
532  os << "*";
533 
534  FieldWidth.toString(os);
535  os << LM.toString();
536  os << CS.toString();
537 }
538 
540  const char *I,
541  const char *E,
542  const LangOptions &LO,
543  const TargetInfo &Target) {
544 
545  unsigned argIndex = 0;
546 
547  // Keep looking for a format specifier until we have exhausted the string.
548  while (I != E) {
549  const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550  LO, Target);
551  // Did a fail-stop error of any kind occur when parsing the specifier?
552  // If so, don't do any more processing.
553  if (FSR.shouldStop())
554  return true;
555  // Did we exhaust the string or encounter an error that
556  // we can recover from?
557  if (!FSR.hasValue())
558  continue;
559  // We have a format specifier. Pass it to the callback.
560  if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561  I - FSR.getStart())) {
562  return true;
563  }
564  }
565  assert(I == E && "Format string not exhausted");
566  return false;
567 }
enum clang::sema::@1659::IndirectLocalPathEntry::EntryKind Kind
Expr * E
llvm::MachO::Target Target
Definition: MachO.h:51
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
__device__ __2f16 float c
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
CanQualType LongTy
Definition: ASTContext.h:1128
QualType getUnsignedPointerDiffType() const
Return the unique unsigned counterpart of "ptrdiff_t" integer type.
CanQualType FloatTy
Definition: ASTContext.h:1131
CanQualType DoubleTy
Definition: ASTContext.h:1131
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
CanQualType LongDoubleTy
Definition: ASTContext.h:1131
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2830
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
CanQualType UnsignedLongTy
Definition: ASTContext.h:1129
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CanQualType IntTy
Definition: ASTContext.h:1128
CanQualType SignedCharTy
Definition: ASTContext.h:1128
CanQualType UnsignedCharTy
Definition: ASTContext.h:1129
CanQualType UnsignedIntTy
Definition: ASTContext.h:1129
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:1130
CanQualType UnsignedShortTy
Definition: ASTContext.h:1129
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:779
CanQualType ShortTy
Definition: ASTContext.h:1128
CanQualType LongLongTy
Definition: ASTContext.h:1128
QualType getWideCharType() const
Return the type of wide characters.
Definition: ASTContext.h:1852
CanQualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t.
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
This class is used for builtin types like 'int'.
Definition: Type.h:3029
Kind getKind() const
Definition: Type.h:3081
Represents the canonical version of C arrays with a specified constant size.
Definition: Type.h:3614
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums.
Definition: Type.h:6001
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:480
A (possibly-)qualified type.
Definition: Type.h:941
Exposes information about the current target.
Definition: TargetInfo.h:218
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1256
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isPointerType() const
Definition: Type.h:8013
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:2125
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:2266
bool isWideCharType() const
Definition: Type.cpp:2098
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8568
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
Definition: FormatString.h:318
MatchKind matchesType(ASTContext &C, QualType argTy) const
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:776
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:721
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:730
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:770
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:763
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:65
Defines the clang::TargetInfo interface.
Common components of both fprintf and fscanf format strings.
Definition: FormatString.h:30
OptionalAmount ParseAmount(const char *&Beg, const char *E)
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
Returns true if a LengthModifier was parsed and installed in the FormatSpecifier& argument,...
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len)
Returns true if the invalid specifier in SpecifierBegin is a UTF-8 string; check that it won't go fur...
Pieces specific to fscanf format strings.
Definition: FormatString.h:649
std::string toString(const til::SExpr *E)
The JSON file list parser is used to communicate input to InstallAPI.