clang  19.0.0git
ContinuationIndenter.h
Go to the documentation of this file.
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 class AnnotatedLine;
27 class BreakableToken;
28 struct FormatToken;
29 struct LineState;
30 struct ParenState;
31 struct RawStringFormatStyleManager;
32 class WhitespaceManager;
33 
35  llvm::StringMap<FormatStyle> DelimiterStyle;
36  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37 
38  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39 
40  std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41 
42  std::optional<FormatStyle>
43  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44 };
45 
47 public:
48  /// Constructs a \c ContinuationIndenter to format \p Line starting in
49  /// column \p FirstIndent.
50  ContinuationIndenter(const FormatStyle &Style,
51  const AdditionalKeywords &Keywords,
52  const SourceManager &SourceMgr,
53  WhitespaceManager &Whitespaces,
54  encoding::Encoding Encoding,
55  bool BinPackInconclusiveFunctions);
56 
57  /// Get the initial state, i.e. the state after placing \p Line's
58  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
59  /// the case of formatting inside raw string literals, \p FirstStartColumn is
60  /// the column at which the state of the parent formatter is.
61  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62  const AnnotatedLine *Line, bool DryRun);
63 
64  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65  // better home.
66  /// Returns \c true, if a line break after \p State is allowed.
67  bool canBreak(const LineState &State);
68 
69  /// Returns \c true, if a line break after \p State is mandatory.
70  bool mustBreak(const LineState &State);
71 
72  /// Appends the next token to \p State and updates information
73  /// necessary for indentation.
74  ///
75  /// Puts the token on the current line if \p Newline is \c false and adds a
76  /// line break and necessary indentation otherwise.
77  ///
78  /// If \p DryRun is \c false, also creates and stores the required
79  /// \c Replacement.
80  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81  unsigned ExtraSpaces = 0);
82 
83  /// Get the column limit for this line. This is the style's column
84  /// limit, potentially reduced for preprocessor definitions.
85  unsigned getColumnLimit(const LineState &State) const;
86 
87 private:
88  /// Mark the next token as consumed in \p State and modify its stacks
89  /// accordingly.
90  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91 
92  /// Update 'State' according to the next token's fake left parentheses.
93  void moveStatePastFakeLParens(LineState &State, bool Newline);
94  /// Update 'State' according to the next token's fake r_parens.
95  void moveStatePastFakeRParens(LineState &State);
96 
97  /// Update 'State' according to the next token being one of "(<{[".
98  void moveStatePastScopeOpener(LineState &State, bool Newline);
99  /// Update 'State' according to the next token being one of ")>}]".
100  void moveStatePastScopeCloser(LineState &State);
101  /// Update 'State' with the next token opening a nested block.
102  void moveStateToNewBlock(LineState &State, bool NewLine);
103 
104  /// Reformats a raw string literal.
105  ///
106  /// \returns An extra penalty induced by reformatting the token.
107  unsigned reformatRawStringLiteral(const FormatToken &Current,
108  LineState &State,
109  const FormatStyle &RawStringStyle,
110  bool DryRun, bool Newline);
111 
112  /// If the current token is at the end of the current line, handle
113  /// the transition to the next line.
114  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115  bool DryRun, bool AllowBreak, bool Newline);
116 
117  /// If \p Current is a raw string that is configured to be reformatted,
118  /// return the style to be used.
119  std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120  const LineState &State);
121 
122  /// If the current token sticks out over the end of the line, break
123  /// it if possible.
124  ///
125  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126  /// when tokens are broken or lines exceed the column limit, and exceeded
127  /// indicates whether the algorithm purposefully left lines exceeding the
128  /// column limit.
129  ///
130  /// The returned penalty will cover the cost of the additional line breaks
131  /// and column limit violation in all lines except for the last one. The
132  /// penalty for the column limit violation in the last line (and in single
133  /// line tokens) is handled in \c addNextStateToQueue.
134  ///
135  /// \p Strict indicates whether reflowing is allowed to leave characters
136  /// protruding the column limit; if true, lines will be split strictly within
137  /// the column limit where possible; if false, words are allowed to protrude
138  /// over the column limit as long as the penalty is less than the penalty
139  /// of a break.
140  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141  LineState &State,
142  bool AllowBreak, bool DryRun,
143  bool Strict);
144 
145  /// Returns the \c BreakableToken starting at \p Current, or nullptr
146  /// if the current token cannot be broken.
147  std::unique_ptr<BreakableToken>
148  createBreakableToken(const FormatToken &Current, LineState &State,
149  bool AllowBreak);
150 
151  /// Appends the next token to \p State and updates information
152  /// necessary for indentation.
153  ///
154  /// Puts the token on the current line.
155  ///
156  /// If \p DryRun is \c false, also creates and stores the required
157  /// \c Replacement.
158  void addTokenOnCurrentLine(LineState &State, bool DryRun,
159  unsigned ExtraSpaces);
160 
161  /// Appends the next token to \p State and updates information
162  /// necessary for indentation.
163  ///
164  /// Adds a line break and necessary indentation.
165  ///
166  /// If \p DryRun is \c false, also creates and stores the required
167  /// \c Replacement.
168  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169 
170  /// Calculate the new column for a line wrap before the next token.
171  unsigned getNewLineColumn(const LineState &State);
172 
173  /// Adds a multiline token to the \p State.
174  ///
175  /// \returns Extra penalty for the first line of the literal: last line is
176  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177  /// matter, as we don't change them.
178  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179 
180  /// Returns \c true if the next token starts a multiline string
181  /// literal.
182  ///
183  /// This includes implicitly concatenated strings, strings that will be broken
184  /// by clang-format and string literals with escaped newlines.
185  bool nextIsMultilineString(const LineState &State);
186 
187  FormatStyle Style;
188  const AdditionalKeywords &Keywords;
189  const SourceManager &SourceMgr;
190  WhitespaceManager &Whitespaces;
191  encoding::Encoding Encoding;
192  bool BinPackInconclusiveFunctions;
193  llvm::Regex CommentPragmasRegex;
194  const RawStringFormatStyleManager RawStringFormats;
195 };
196 
197 struct ParenState {
198  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199  bool AvoidBinPacking, bool NoLineBreak)
211 
212  /// \brief The token opening this parenthesis level, or nullptr if this level
213  /// is opened by fake parenthesis.
214  ///
215  /// Not considered for memoization as it will always have the same value at
216  /// the same token.
217  const FormatToken *Tok;
218 
219  /// The position to which a specific parenthesis level needs to be
220  /// indented.
221  unsigned Indent;
222 
223  /// The position of the last space on each level.
224  ///
225  /// Used e.g. to break like:
226  /// functionCall(Parameter, otherCall(
227  /// OtherParameter));
228  unsigned LastSpace;
229 
230  /// If a block relative to this parenthesis level gets wrapped, indent
231  /// it this much.
233 
234  /// The position the first "<<" operator encountered on each level.
235  ///
236  /// Used to align "<<" operators. 0 if no such operator has been encountered
237  /// on a level.
238  unsigned FirstLessLess = 0;
239 
240  /// The column of a \c ? in a conditional expression;
241  unsigned QuestionColumn = 0;
242 
243  /// The position of the colon in an ObjC method declaration/call.
244  unsigned ColonPos = 0;
245 
246  /// The start of the most recent function in a builder-type call.
247  unsigned StartOfFunctionCall = 0;
248 
249  /// Contains the start of array subscript expressions, so that they
250  /// can be aligned.
252 
253  /// If a nested name specifier was broken over multiple lines, this
254  /// contains the start column of the second line. Otherwise 0.
256 
257  /// If a call expression was broken over multiple lines, this
258  /// contains the start column of the second line. Otherwise 0.
259  unsigned CallContinuation = 0;
260 
261  /// The column of the first variable name in a variable declaration.
262  ///
263  /// Used to align further variables if necessary.
264  unsigned VariablePos = 0;
265 
266  /// Whether this block's indentation is used for alignment.
267  bool IsAligned : 1;
268 
269  /// Whether a newline needs to be inserted before the block's closing
270  /// brace.
271  ///
272  /// We only want to insert a newline before the closing brace if there also
273  /// was a newline after the beginning left brace.
275 
276  /// Whether a newline needs to be inserted before the block's closing
277  /// paren.
278  ///
279  /// We only want to insert a newline before the closing paren if there also
280  /// was a newline after the beginning left paren.
282 
283  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
284  /// lines, in this context.
285  bool AvoidBinPacking : 1;
286 
287  /// Break after the next comma (or all the commas in this context if
288  /// \c AvoidBinPacking is \c true).
290 
291  /// Line breaking in this context would break a formatting rule.
292  bool NoLineBreak : 1;
293 
294  /// Same as \c NoLineBreak, but is restricted until the end of the
295  /// operand (including the next ",").
297 
298  /// True if the last binary operator on this level was wrapped to the
299  /// next line.
301 
302  /// \c true if this \c ParenState already contains a line-break.
303  ///
304  /// The first line break in a certain \c ParenState causes extra penalty so
305  /// that clang-format prefers similar breaks, i.e. breaks in the same
306  /// parenthesis.
308 
309  /// \c true if this \c ParenState contains multiple segments of a
310  /// builder-type call on one line.
312 
313  /// \c true if the colons of the curren ObjC method expression should
314  /// be aligned.
315  ///
316  /// Not considered for memoization as it will always have the same value at
317  /// the same token.
318  bool AlignColons : 1;
319 
320  /// \c true if at least one selector name was found in the current
321  /// ObjC method expression.
322  ///
323  /// Not considered for memoization as it will always have the same value at
324  /// the same token.
326 
327  /// \c true if there are multiple nested blocks inside these parens.
328  ///
329  /// Not considered for memoization as it will always have the same value at
330  /// the same token.
332 
333  /// The start of a nested block (e.g. lambda introducer in C++ or
334  /// "function" in JavaScript) is not wrapped to a new line.
336 
337  /// \c true if the current \c ParenState represents an Objective-C
338  /// array literal.
340 
342 
343  /// \brief true if the current \c ParenState represents the false branch of
344  /// a chained conditional expression (e.g. else-if)
346 
347  /// \brief true if there conditionnal was wrapped on the first operator (the
348  /// question mark)
350 
351  /// \brief Indicates the indent should be reduced by the length of the
352  /// operator.
354 
355  bool operator<(const ParenState &Other) const {
356  if (Indent != Other.Indent)
357  return Indent < Other.Indent;
358  if (LastSpace != Other.LastSpace)
359  return LastSpace < Other.LastSpace;
360  if (NestedBlockIndent != Other.NestedBlockIndent)
361  return NestedBlockIndent < Other.NestedBlockIndent;
362  if (FirstLessLess != Other.FirstLessLess)
363  return FirstLessLess < Other.FirstLessLess;
364  if (IsAligned != Other.IsAligned)
365  return IsAligned;
366  if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
368  if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
370  if (QuestionColumn != Other.QuestionColumn)
371  return QuestionColumn < Other.QuestionColumn;
372  if (AvoidBinPacking != Other.AvoidBinPacking)
373  return AvoidBinPacking;
374  if (BreakBeforeParameter != Other.BreakBeforeParameter)
375  return BreakBeforeParameter;
376  if (NoLineBreak != Other.NoLineBreak)
377  return NoLineBreak;
378  if (LastOperatorWrapped != Other.LastOperatorWrapped)
379  return LastOperatorWrapped;
380  if (ColonPos != Other.ColonPos)
381  return ColonPos < Other.ColonPos;
382  if (StartOfFunctionCall != Other.StartOfFunctionCall)
383  return StartOfFunctionCall < Other.StartOfFunctionCall;
384  if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
385  return StartOfArraySubscripts < Other.StartOfArraySubscripts;
386  if (CallContinuation != Other.CallContinuation)
387  return CallContinuation < Other.CallContinuation;
388  if (VariablePos != Other.VariablePos)
389  return VariablePos < Other.VariablePos;
390  if (ContainsLineBreak != Other.ContainsLineBreak)
391  return ContainsLineBreak;
392  if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
394  if (NestedBlockInlined != Other.NestedBlockInlined)
395  return NestedBlockInlined;
396  if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
398  if (IsChainedConditional != Other.IsChainedConditional)
399  return IsChainedConditional;
400  if (IsWrappedConditional != Other.IsWrappedConditional)
401  return IsWrappedConditional;
402  if (UnindentOperator != Other.UnindentOperator)
403  return UnindentOperator;
404  return false;
405  }
406 };
407 
408 /// The current state when indenting a unwrapped line.
409 ///
410 /// As the indenting tries different combinations this is copied by value.
411 struct LineState {
412  /// The number of used columns in the current line.
413  unsigned Column;
414 
415  /// The token that needs to be next formatted.
417 
418  /// \c true if \p NextToken should not continue this line.
420 
421  /// The \c NestingLevel at the start of this line.
423 
424  /// The lowest \c NestingLevel on the current line.
426 
427  /// The start column of the string literal, if we're in a string
428  /// literal sequence, 0 otherwise.
430 
431  /// Disallow line breaks for this line.
433 
434  /// A stack keeping track of properties applying to parenthesis
435  /// levels.
437 
438  /// Ignore the stack of \c ParenStates for state comparison.
439  ///
440  /// In long and deeply nested unwrapped lines, the current algorithm can
441  /// be insufficient for finding the best formatting with a reasonable amount
442  /// of time and memory. Setting this flag will effectively lead to the
443  /// algorithm not analyzing some combinations. However, these combinations
444  /// rarely contain the optimal solution: In short, accepting a higher
445  /// penalty early would need to lead to different values in the \c
446  /// ParenState stack (in an otherwise identical state) and these different
447  /// values would need to lead to a significant amount of avoided penalty
448  /// later.
449  ///
450  /// FIXME: Come up with a better algorithm instead.
452 
453  /// The indent of the first token.
454  unsigned FirstIndent;
455 
456  /// The line that is being formatted.
457  ///
458  /// Does not need to be considered for memoization because it doesn't change.
460 
461  /// Comparison operator to be able to used \c LineState in \c map.
462  bool operator<(const LineState &Other) const {
463  if (NextToken != Other.NextToken)
464  return NextToken < Other.NextToken;
465  if (Column != Other.Column)
466  return Column < Other.Column;
467  if (NoContinuation != Other.NoContinuation)
468  return NoContinuation;
469  if (StartOfLineLevel != Other.StartOfLineLevel)
470  return StartOfLineLevel < Other.StartOfLineLevel;
471  if (LowestLevelOnLine != Other.LowestLevelOnLine)
472  return LowestLevelOnLine < Other.LowestLevelOnLine;
473  if (StartOfStringLiteral != Other.StartOfStringLiteral)
474  return StartOfStringLiteral < Other.StartOfStringLiteral;
475  if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
476  return false;
477  return Stack < Other.Stack;
478  }
479 };
480 
481 } // end namespace format
482 } // end namespace clang
483 
484 #endif
Contains functions for text encoding manipulation.
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
LineState State
This class handles loading and caching of source files into memory.
bool canBreak(const LineState &State)
Returns true, if a line break after State is allowed.
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces=0)
Appends the next token to State and updates information necessary for indentation.
unsigned getColumnLimit(const LineState &State) const
Get the column limit for this line.
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun)
Get the initial state, i.e.
ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, WhitespaceManager &Whitespaces, encoding::Encoding Encoding, bool BinPackInconclusiveFunctions)
Constructs a ContinuationIndenter to format Line starting in column FirstIndent.
bool mustBreak(const LineState &State)
Returns true, if a line break after State is mandatory.
Manages the whitespaces around tokens and their replacements.
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
#define true
Definition: stdbool.h:25
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:995
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
The current state when indenting a unwrapped line.
unsigned StartOfStringLiteral
The start column of the string literal, if we're in a string literal sequence, 0 otherwise.
bool IgnoreStackForComparison
Ignore the stack of ParenStates for state comparison.
bool operator<(const LineState &Other) const
Comparison operator to be able to used LineState in map.
unsigned LowestLevelOnLine
The lowest NestingLevel on the current line.
const AnnotatedLine * Line
The line that is being formatted.
bool NoLineBreak
Disallow line breaks for this line.
unsigned Column
The number of used columns in the current line.
SmallVector< ParenState > Stack
A stack keeping track of properties applying to parenthesis levels.
bool NoContinuation
true if NextToken should not continue this line.
unsigned StartOfLineLevel
The NestingLevel at the start of this line.
unsigned FirstIndent
The indent of the first token.
FormatToken * NextToken
The token that needs to be next formatted.
bool AvoidBinPacking
Avoid bin packing, i.e.
unsigned NestedNameSpecifierContinuation
If a nested name specifier was broken over multiple lines, this contains the start column of the seco...
bool AlignColons
true if the colons of the curren ObjC method expression should be aligned.
unsigned Indent
The position to which a specific parenthesis level needs to be indented.
bool BreakBeforeClosingParen
Whether a newline needs to be inserted before the block's closing paren.
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, bool NoLineBreak)
bool BreakBeforeParameter
Break after the next comma (or all the commas in this context if AvoidBinPacking is true).
const FormatToken * Tok
The token opening this parenthesis level, or nullptr if this level is opened by fake parenthesis.
bool BreakBeforeClosingBrace
Whether a newline needs to be inserted before the block's closing brace.
unsigned FirstLessLess
The position the first "<<" operator encountered on each level.
bool LastOperatorWrapped
True if the last binary operator on this level was wrapped to the next line.
unsigned VariablePos
The column of the first variable name in a variable declaration.
unsigned LastSpace
The position of the last space on each level.
bool NoLineBreak
Line breaking in this context would break a formatting rule.
unsigned QuestionColumn
The column of a ? in a conditional expression;.
unsigned StartOfFunctionCall
The start of the most recent function in a builder-type call.
bool ObjCSelectorNameFound
true if at least one selector name was found in the current ObjC method expression.
bool NoLineBreakInOperand
Same as NoLineBreak, but is restricted until the end of the operand (including the next ",...
bool operator<(const ParenState &Other) const
bool ContainsUnwrappedBuilder
true if this ParenState contains multiple segments of a builder-type call on one line.
unsigned ColonPos
The position of the colon in an ObjC method declaration/call.
bool UnindentOperator
Indicates the indent should be reduced by the length of the operator.
bool ContainsLineBreak
true if this ParenState already contains a line-break.
bool IsAligned
Whether this block's indentation is used for alignment.
unsigned StartOfArraySubscripts
Contains the start of array subscript expressions, so that they can be aligned.
bool NestedBlockInlined
The start of a nested block (e.g.
bool HasMultipleNestedBlocks
true if there are multiple nested blocks inside these parens.
bool IsChainedConditional
true if the current ParenState represents the false branch of a chained conditional expression (e....
unsigned CallContinuation
If a call expression was broken over multiple lines, this contains the start column of the second lin...
bool IsWrappedConditional
true if there conditionnal was wrapped on the first operator (the question mark)
unsigned NestedBlockIndent
If a block relative to this parenthesis level gets wrapped, indent it this much.
bool IsInsideObjCArrayLiteral
true if the current ParenState represents an Objective-C array literal.
llvm::StringMap< FormatStyle > EnclosingFunctionStyle
std::optional< FormatStyle > getDelimiterStyle(StringRef Delimiter) const
std::optional< FormatStyle > getEnclosingFunctionStyle(StringRef EnclosingFunction) const
RawStringFormatStyleManager(const FormatStyle &CodeStyle)
llvm::StringMap< FormatStyle > DelimiterStyle