clang  20.0.0git
FormatToken.cpp
Go to the documentation of this file.
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements specific functions of \c FormatTokens and their
11 /// roles.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Debug.h"
19 #include <climits>
20 
21 namespace clang {
22 namespace format {
23 
25  static const char *const TokNames[] = {
26 #define TYPE(X) #X,
28 #undef TYPE
29  nullptr};
30 
31  if (Type < NUM_TOKEN_TYPES)
32  return TokNames[Type];
33  llvm_unreachable("unknown TokenType");
34  return nullptr;
35 }
36 
37 // Sorted common C++ non-keyword types.
39  "clock_t", "int16_t", "int32_t", "int64_t", "int8_t",
40  "intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t",
41  "uint32_t", "uint64_t", "uint8_t", "uintptr_t",
42 };
43 
44 bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
45  const bool IsCpp = LangOpts.CXXOperatorNames;
46  return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) ||
47  (IsCpp && is(tok::identifier) &&
48  std::binary_search(CppNonKeywordTypes.begin(),
50 }
51 
52 bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {
53  return isTypeName(LangOpts) || isOneOf(tok::kw_auto, tok::identifier);
54 }
55 
57  assert(is(tok::r_brace));
58  if (!Style.Cpp11BracedListStyle ||
59  Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
60  return false;
61  }
62  const auto *LBrace = MatchingParen;
63  assert(LBrace && LBrace->is(tok::l_brace));
64  if (LBrace->is(BK_BracedInit))
65  return true;
66  if (LBrace->Previous && LBrace->Previous->is(tok::equal))
67  return true;
68  return false;
69 }
70 
72  // C# Does not indent object initialisers as continuations.
73  if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
74  return true;
75  if (is(TT_TemplateString) && opensScope())
76  return true;
77  return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
78  (is(tok::l_brace) &&
79  (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
80  (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
81  (is(tok::less) && Style.isProto());
82 }
83 
85 
87 
90  bool DryRun) {
91  if (!State.NextToken || !State.NextToken->Previous)
92  return 0;
93 
94  if (Formats.size() <= 1)
95  return 0; // Handled by formatFromToken (1) or avoid severe penalty (0).
96 
97  // Ensure that we start on the opening brace.
98  const FormatToken *LBrace =
99  State.NextToken->Previous->getPreviousNonComment();
100  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
101  LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
102  LBrace->Next->is(TT_DesignatedInitializerPeriod)) {
103  return 0;
104  }
105 
106  // Calculate the number of code points we have to format this list. As the
107  // first token is already placed, we have to subtract it.
108  unsigned RemainingCodePoints =
109  Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
110 
111  // Find the best ColumnFormat, i.e. the best number of columns to use.
112  const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
113 
114  // If no ColumnFormat can be used, the braced list would generally be
115  // bin-packed. Add a severe penalty to this so that column layouts are
116  // preferred if possible.
117  if (!Format)
118  return 10'000;
119 
120  // Format the entire list.
121  unsigned Penalty = 0;
122  unsigned Column = 0;
123  unsigned Item = 0;
124  while (State.NextToken != LBrace->MatchingParen) {
125  bool NewLine = false;
126  unsigned ExtraSpaces = 0;
127 
128  // If the previous token was one of our commas, we are now on the next item.
129  if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
130  if (!State.NextToken->isTrailingComment()) {
131  ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
132  ++Column;
133  }
134  ++Item;
135  }
136 
137  if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
138  Column = 0;
139  NewLine = true;
140  }
141 
142  // Place token using the continuation indenter and store the penalty.
143  Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
144  }
145  return Penalty;
146 }
147 
150  bool DryRun) {
151  // Formatting with 1 Column isn't really a column layout, so we don't need the
152  // special logic here. We can just avoid bin packing any of the parameters.
153  if (Formats.size() == 1 || HasNestedBracedList)
154  State.Stack.back().AvoidBinPacking = true;
155  return 0;
156 }
157 
158 // Returns the lengths in code points between Begin and End (both included),
159 // assuming that the entire sequence is put on a single line.
160 static unsigned CodePointsBetween(const FormatToken *Begin,
161  const FormatToken *End) {
162  assert(End->TotalLength >= Begin->TotalLength);
163  return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
164 }
165 
167  // FIXME: At some point we might want to do this for other lists, too.
168  if (!Token->MatchingParen ||
169  !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
170  return;
171  }
172 
173  // In C++11 braced list style, we should not format in columns unless they
174  // have many items (20 or more) or we allow bin-packing of function call
175  // arguments.
177  Commas.size() < 19) {
178  return;
179  }
180 
181  // Limit column layout for JavaScript array initializers to 20 or more items
182  // for now to introduce it carefully. We can become more aggressive if this
183  // necessary.
184  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
185  return;
186 
187  // Column format doesn't really make sense if we don't align after brackets.
189  return;
190 
191  FormatToken *ItemBegin = Token->Next;
192  while (ItemBegin->isTrailingComment())
193  ItemBegin = ItemBegin->Next;
194  SmallVector<bool, 8> MustBreakBeforeItem;
195 
196  // The lengths of an item if it is put at the end of the line. This includes
197  // trailing comments which are otherwise ignored for column alignment.
198  SmallVector<unsigned, 8> EndOfLineItemLength;
199  MustBreakBeforeItem.reserve(Commas.size() + 1);
200  EndOfLineItemLength.reserve(Commas.size() + 1);
201  ItemLengths.reserve(Commas.size() + 1);
202 
203  bool HasSeparatingComment = false;
204  for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
205  assert(ItemBegin);
206  // Skip comments on their own line.
207  while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
208  ItemBegin = ItemBegin->Next;
209  HasSeparatingComment = i > 0;
210  }
211 
212  MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
213  if (ItemBegin->is(tok::l_brace))
214  HasNestedBracedList = true;
215  const FormatToken *ItemEnd = nullptr;
216  if (i == Commas.size()) {
217  ItemEnd = Token->MatchingParen;
218  const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
219  ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
221  !ItemEnd->Previous->isTrailingComment()) {
222  // In Cpp11 braced list style, the } and possibly other subsequent
223  // tokens will need to stay on a line with the last element.
224  while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
225  ItemEnd = ItemEnd->Next;
226  } else {
227  // In other braced lists styles, the "}" can be wrapped to the new line.
228  ItemEnd = Token->MatchingParen->Previous;
229  }
230  } else {
231  ItemEnd = Commas[i];
232  // The comma is counted as part of the item when calculating the length.
233  ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
234 
235  // Consume trailing comments so the are included in EndOfLineItemLength.
236  if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
237  ItemEnd->Next->isTrailingComment()) {
238  ItemEnd = ItemEnd->Next;
239  }
240  }
241  EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
242  // If there is a trailing comma in the list, the next item will start at the
243  // closing brace. Don't create an extra item for this.
244  if (ItemEnd->getNextNonComment() == Token->MatchingParen)
245  break;
246  ItemBegin = ItemEnd->Next;
247  }
248 
249  // Don't use column layout for lists with few elements and in presence of
250  // separating comments.
251  if (Commas.size() < 5 || HasSeparatingComment)
252  return;
253 
254  if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
255  return;
256 
257  // We can never place more than ColumnLimit / 3 items in a row (because of the
258  // spaces and the comma).
259  unsigned MaxItems = Style.ColumnLimit / 3;
260  SmallVector<unsigned> MinSizeInColumn;
261  MinSizeInColumn.reserve(MaxItems);
262  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
263  ColumnFormat Format;
264  Format.Columns = Columns;
265  Format.ColumnSizes.resize(Columns);
266  MinSizeInColumn.assign(Columns, UINT_MAX);
267  Format.LineCount = 1;
268  bool HasRowWithSufficientColumns = false;
269  unsigned Column = 0;
270  for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
271  assert(i < MustBreakBeforeItem.size());
272  if (MustBreakBeforeItem[i] || Column == Columns) {
273  ++Format.LineCount;
274  Column = 0;
275  }
276  if (Column == Columns - 1)
277  HasRowWithSufficientColumns = true;
278  unsigned Length =
279  (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
280  Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
281  MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
282  ++Column;
283  }
284  // If all rows are terminated early (e.g. by trailing comments), we don't
285  // need to look further.
286  if (!HasRowWithSufficientColumns)
287  break;
288  Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
289 
290  for (unsigned i = 0; i < Columns; ++i)
291  Format.TotalWidth += Format.ColumnSizes[i];
292 
293  // Don't use this Format, if the difference between the longest and shortest
294  // element in a column exceeds a threshold to avoid excessive spaces.
295  if ([&] {
296  for (unsigned i = 0; i < Columns - 1; ++i)
297  if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
298  return true;
299  return false;
300  }()) {
301  continue;
302  }
303 
304  // Ignore layouts that are bound to violate the column limit.
305  if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
306  continue;
307 
308  Formats.push_back(Format);
309  }
310 }
311 
312 const CommaSeparatedList::ColumnFormat *
313 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
314  const ColumnFormat *BestFormat = nullptr;
315  for (const ColumnFormat &Format : llvm::reverse(Formats)) {
316  if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) {
317  if (BestFormat && Format.LineCount > BestFormat->LineCount)
318  break;
319  BestFormat = &Format;
320  }
321  }
322  return BestFormat;
323 }
324 
325 } // namespace format
326 } // namespace clang
This file implements an indenter that manages the indentation of continuations.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:27
static const char *const TokNames[]
Definition: TokenKinds.cpp:17
SourceLocation End
SourceLocation Begin
ContinuationIndenter * Indenter
LineState State
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:480
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const
Definition: Token.h:101
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
Definition: Lexer.cpp:78
The base class of the type hierarchy.
Definition: Type.h:1829
unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Apply the special formatting that the given role demands.
unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.cpp:88
void precomputeFormattingInfos(const FormatToken *Token) override
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
virtual void precomputeFormattingInfos(const FormatToken *Token)
After the TokenAnnotator has finished annotating all the tokens, this function precomputes required i...
Definition: FormatToken.cpp:86
const FormatStyle & Style
Definition: FormatToken.h:961
#define UINT_MAX
Definition: limits.h:64
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:24
static SmallVector< StringRef > CppNonKeywordTypes
Definition: FormatToken.cpp:38
static unsigned CodePointsBetween(const FormatToken *Begin, const FormatToken *End)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:207
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2494
@ BAS_DontAlign
Don't align, instead use ContinuationIndentWidth, e.g.
Definition: Format.h:78
@ BAS_BlockIndent
Always break after an open bracket, if the parameters don't fit on a single line.
Definition: Format.h:99
bool BinPackArguments
If false, a function call's arguments will either be all on the same line or will have one line each.
Definition: Format.h:1193
BracketAlignmentStyle AlignAfterOpenBracket
If true, horizontally aligns arguments after an open bracket.
Definition: Format.h:107
unsigned ColumnLimit
The column limit.
Definition: Format.h:2372
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:292
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:515
bool isTypeName(const LangOptions &LangOpts) const
Definition: FormatToken.cpp:44
unsigned CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:347
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:312
bool opensScope() const
Returns whether Tok is ([{ or an opening < of a template or in protos.
Definition: FormatToken.h:700
BraceBlockKind getBlockKind() const
Definition: FormatToken.h:384
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:564
unsigned MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:337
unsigned HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:325
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:832
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:840
bool isBlockIndentedInitRBrace(const FormatStyle &Style) const
Returns true if this token ends a block indented initializer list.
Definition: FormatToken.cpp:56
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:604
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.cpp:71
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:616
bool isTrailingComment() const
Definition: FormatToken.h:774
bool isTypeOrIdentifier(const LangOptions &LangOpts) const
Definition: FormatToken.cpp:52
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:558
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:561
The current state when indenting a unwrapped line.