clang  19.0.0git
IntegerLiteralSeparatorFixer.cpp
Go to the documentation of this file.
1 //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer
11 /// literal separators.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 
17 namespace clang {
18 namespace format {
19 
20 enum class Base { Binary, Decimal, Hex, Other };
21 
22 static Base getBase(const StringRef IntegerLiteral) {
23  assert(IntegerLiteral.size() > 1);
24 
25  if (IntegerLiteral[0] > '0') {
26  assert(IntegerLiteral[0] <= '9');
27  return Base::Decimal;
28  }
29 
30  assert(IntegerLiteral[0] == '0');
31 
32  switch (IntegerLiteral[1]) {
33  case 'b':
34  case 'B':
35  return Base::Binary;
36  case 'x':
37  case 'X':
38  return Base::Hex;
39  default:
40  return Base::Other;
41  }
42 }
43 
44 std::pair<tooling::Replacements, unsigned>
46  const FormatStyle &Style) {
47  switch (Style.Language) {
50  Separator = '\'';
51  break;
55  Separator = '_';
56  break;
57  default:
58  return {};
59  }
60 
61  const auto &Option = Style.IntegerLiteralSeparator;
62  const auto Binary = Option.Binary;
63  const auto Decimal = Option.Decimal;
64  const auto Hex = Option.Hex;
65  const bool SkipBinary = Binary == 0;
66  const bool SkipDecimal = Decimal == 0;
67  const bool SkipHex = Hex == 0;
68 
69  if (SkipBinary && SkipDecimal && SkipHex)
70  return {};
71 
72  const auto BinaryMinDigits =
73  std::max((int)Option.BinaryMinDigits, Binary + 1);
74  const auto DecimalMinDigits =
75  std::max((int)Option.DecimalMinDigits, Decimal + 1);
76  const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1);
77 
78  const auto &SourceMgr = Env.getSourceManager();
79  AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges());
80 
81  const auto ID = Env.getFileID();
82  const auto LangOpts = getFormattingLangOpts(Style);
83  Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
84  Lex.SetCommentRetentionState(true);
85 
86  Token Tok;
87  tooling::Replacements Result;
88 
89  for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) {
90  auto Length = Tok.getLength();
91  if (Length < 2)
92  continue;
93  auto Location = Tok.getLocation();
94  auto Text = StringRef(SourceMgr.getCharacterData(Location), Length);
95  if (Tok.is(tok::comment)) {
97  Skip = true;
98  else if (isClangFormatOn(Text))
99  Skip = false;
100  continue;
101  }
102  if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' ||
103  !AffectedRangeMgr.affectsCharSourceRange(
104  CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) {
105  continue;
106  }
107  const auto B = getBase(Text);
108  const bool IsBase2 = B == Base::Binary;
109  const bool IsBase10 = B == Base::Decimal;
110  const bool IsBase16 = B == Base::Hex;
111  if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) ||
112  (IsBase16 && SkipHex) || B == Base::Other) {
113  continue;
114  }
115  if (Style.isCpp()) {
116  // Hex alpha digits a-f/A-F must be at the end of the string literal.
117  StringRef Suffixes = "_himnsuyd";
118  if (const auto Pos =
119  Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes);
120  Pos != StringRef::npos) {
121  Text = Text.substr(0, Pos);
122  Length = Pos;
123  }
124  }
125  if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) ||
126  (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) {
127  continue;
128  }
129  const auto Start = Text[0] == '0' ? 2 : 0;
130  auto End = Text.find_first_of("uUlLzZn", Start);
131  if (End == StringRef::npos)
132  End = Length;
133  if (Start > 0 || End < Length) {
134  Length = End - Start;
135  Text = Text.substr(Start, Length);
136  }
137  auto DigitsPerGroup = Decimal;
138  auto MinDigits = DecimalMinDigits;
139  if (IsBase2) {
140  DigitsPerGroup = Binary;
141  MinDigits = BinaryMinDigits;
142  } else if (IsBase16) {
143  DigitsPerGroup = Hex;
144  MinDigits = HexMinDigits;
145  }
146  const auto SeparatorCount = Text.count(Separator);
147  const int DigitCount = Length - SeparatorCount;
148  const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits;
149  if (RemoveSeparator && SeparatorCount == 0)
150  continue;
151  if (!RemoveSeparator && SeparatorCount > 0 &&
152  checkSeparator(Text, DigitsPerGroup)) {
153  continue;
154  }
155  const auto &Formatted =
156  format(Text, DigitsPerGroup, DigitCount, RemoveSeparator);
157  assert(Formatted != Text);
158  if (Start > 0)
159  Location = Location.getLocWithOffset(Start);
160  cantFail(Result.add(
161  tooling::Replacement(SourceMgr, Location, Length, Formatted)));
162  }
163 
164  return {Result, 0};
165 }
166 
167 bool IntegerLiteralSeparatorFixer::checkSeparator(
168  const StringRef IntegerLiteral, int DigitsPerGroup) const {
169  assert(DigitsPerGroup > 0);
170 
171  int I = 0;
172  for (auto C : llvm::reverse(IntegerLiteral)) {
173  if (C == Separator) {
174  if (I < DigitsPerGroup)
175  return false;
176  I = 0;
177  } else {
178  if (I == DigitsPerGroup)
179  return false;
180  ++I;
181  }
182  }
183 
184  return true;
185 }
186 
187 std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
188  int DigitsPerGroup,
189  int DigitCount,
190  bool RemoveSeparator) const {
191  assert(DigitsPerGroup != 0);
192 
193  std::string Formatted;
194 
195  if (RemoveSeparator) {
196  for (auto C : IntegerLiteral)
197  if (C != Separator)
198  Formatted.push_back(C);
199  return Formatted;
200  }
201 
202  int Remainder = DigitCount % DigitsPerGroup;
203 
204  int I = 0;
205  for (auto C : IntegerLiteral) {
206  if (C == Separator)
207  continue;
208  if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) {
209  Formatted.push_back(Separator);
210  I = 0;
211  Remainder = 0;
212  }
213  Formatted.push_back(C);
214  ++I;
215  }
216 
217  return Formatted;
218 }
219 
220 } // namespace format
221 } // namespace clang
static char ID
Definition: Arena.cpp:183
static constexpr CPUSuffix Suffixes[]
Definition: Hexagon.cpp:231
StringRef Text
Definition: Format.cpp:2977
const Environment & Env
Definition: HTMLLogger.cpp:148
This file declares IntegerLiteralSeparatorFixer that fixes C++ integer literal separators.
SourceLocation End
__DEVICE__ int max(int __a, int __b)
static CharSourceRange getCharRange(SourceRange R)
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:78
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Definition: Lexer.h:236
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
Definition: Lexer.h:269
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
SourceLocation getEndLoc() const
Definition: Token.h:159
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:132
unsigned getLength() const
Definition: Token.h:135
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
bool affectsCharSourceRange(const CharSourceRange &Range)
std::pair< tooling::Replacements, unsigned > process(const Environment &Env, const FormatStyle &Style)
A text replacement.
Definition: Replacement.h:83
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
static Base getBase(const StringRef IntegerLiteral)
bool isClangFormatOff(StringRef Comment)
Definition: Format.cpp:4138
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3841
bool isClangFormatOn(StringRef Comment)
Definition: Format.cpp:4134
The JSON file list parser is used to communicate input to InstallAPI.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
@ LK_CSharp
Should be used for C#.
Definition: Format.h:3158
@ LK_Java
Should be used for Java.
Definition: Format.h:3160
@ LK_Cpp
Should be used for C, C++.
Definition: Format.h:3156
@ LK_JavaScript
Should be used for JavaScript.
Definition: Format.h:3162
@ LK_ObjC
Should be used for Objective-C, Objective-C++.
Definition: Format.h:3166