clang  19.0.0git
WhitespaceManager.h
Go to the documentation of this file.
1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// WhitespaceManager class manages whitespace around tokens and their
11 /// replacements.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17 
18 #include "TokenAnnotator.h"
20 
21 namespace clang {
22 namespace format {
23 
24 /// Manages the whitespaces around tokens and their replacements.
25 ///
26 /// This includes special handling for certain constructs, e.g. the alignment of
27 /// trailing line comments.
28 ///
29 /// To guarantee correctness of alignment operations, the \c WhitespaceManager
30 /// must be informed about every token in the source file; for each token, there
31 /// must be exactly one call to either \c replaceWhitespace or
32 /// \c addUntouchableToken.
33 ///
34 /// There may be multiple calls to \c breakToken for a given token.
36 public:
37  WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
38  bool UseCRLF)
39  : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
40 
41  bool useCRLF() const { return UseCRLF; }
42 
43  /// Infers whether the input is using CRLF.
44  static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);
45 
46  /// Replaces the whitespace in front of \p Tok. Only call once for
47  /// each \c AnnotatedToken.
48  ///
49  /// \p StartOfTokenColumn is the column at which the token will start after
50  /// this replacement. It is needed for determining how \p Spaces is turned
51  /// into tabs and spaces for some format styles.
52  void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
53  unsigned StartOfTokenColumn, bool IsAligned = false,
54  bool InPPDirective = false);
55 
56  /// Adds information about an unchangeable token's whitespace.
57  ///
58  /// Needs to be called for every token for which \c replaceWhitespace
59  /// was not called.
60  void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
61 
62  llvm::Error addReplacement(const tooling::Replacement &Replacement);
63 
64  /// Inserts or replaces whitespace in the middle of a token.
65  ///
66  /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
67  /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
68  /// characters.
69  ///
70  /// Note: \p Spaces can be negative to retain information about initial
71  /// relative column offset between a line of a block comment and the start of
72  /// the comment. This negative offset may be compensated by trailing comment
73  /// alignment here. In all other cases negative \p Spaces will be truncated to
74  /// 0.
75  ///
76  /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
77  /// used to align backslashes correctly.
78  void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
79  unsigned ReplaceChars,
80  StringRef PreviousPostfix,
81  StringRef CurrentPrefix, bool InPPDirective,
82  unsigned Newlines, int Spaces);
83 
84  /// Returns all the \c Replacements created during formatting.
86 
87  /// Represents a change before a token, a break inside a token,
88  /// or the layout of an unchanged token (or whitespace within).
89  struct Change {
90  /// Functor to sort changes in original source order.
92  public:
93  IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
94  bool operator()(const Change &C1, const Change &C2) const;
95 
96  private:
97  const SourceManager &SourceMgr;
98  };
99 
100  /// Creates a \c Change.
101  ///
102  /// The generated \c Change will replace the characters at
103  /// \p OriginalWhitespaceRange with a concatenation of
104  /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
105  /// and \p CurrentLinePrefix.
106  ///
107  /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
108  /// trailing comments and escaped newlines.
111  unsigned StartOfTokenColumn, unsigned NewlinesBefore,
112  StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
113  bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken);
114 
115  // The kind of the token whose whitespace this change replaces, or in which
116  // this change inserts whitespace.
117  // FIXME: Currently this is not set correctly for breaks inside comments, as
118  // the \c BreakableToken is still doing its own alignment.
119  const FormatToken *Tok;
120 
122  // Changes might be in the middle of a token, so we cannot just keep the
123  // FormatToken around to query its information.
126  unsigned NewlinesBefore;
127  std::string PreviousLinePostfix;
128  std::string CurrentLinePrefix;
129  bool IsAligned;
131 
132  // The number of spaces in front of the token or broken part of the token.
133  // This will be adapted when aligning tokens.
134  // Can be negative to retain information about the initial relative offset
135  // of the lines in a block comment. This is used when aligning trailing
136  // comments. Uncompensated negative offset is truncated to 0.
137  int Spaces;
138 
139  // If this change is inside of a token but not at the start of the token or
140  // directly after a newline.
142 
143  // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
144  // \c EscapedNewlineColumn will be calculated in
145  // \c calculateLineBreakInformation.
147  unsigned TokenLength;
150 
151  // These fields are used to retain correct relative line indentation in a
152  // block comment when aligning trailing comments.
153  //
154  // If this Change represents a continuation of a block comment,
155  // \c StartOfBlockComment is pointer to the first Change in the block
156  // comment. \c IndentationOffset is a relative column offset to this
157  // change, so that the correct column can be reconstructed at the end of
158  // the alignment process.
161 
162  // Depth of conditionals. Computed from tracking fake parenthesis, except
163  // it does not increase the indent for "chained" conditionals.
165 
166  // A combination of indent, nesting and conditionals levels, which are used
167  // in tandem to compute lexical scope, for the purposes of deciding
168  // when to stop consecutive alignment runs.
169  std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
170  return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
172  }
173  };
174 
175 private:
176  struct CellDescription {
177  unsigned Index = 0;
178  unsigned Cell = 0;
179  unsigned EndIndex = 0;
180  bool HasSplit = false;
181  CellDescription *NextColumnElement = nullptr;
182 
183  constexpr bool operator==(const CellDescription &Other) const {
184  return Index == Other.Index && Cell == Other.Cell &&
185  EndIndex == Other.EndIndex;
186  }
187  constexpr bool operator!=(const CellDescription &Other) const {
188  return !(*this == Other);
189  }
190  };
191 
192  struct CellDescriptions {
193  SmallVector<CellDescription> Cells;
194  SmallVector<unsigned> CellCounts;
195  unsigned InitialSpaces = 0;
196 
197  // Determine if every row in the array
198  // has the same number of columns.
199  bool isRectangular() const {
200  if (CellCounts.size() < 2)
201  return false;
202 
203  for (auto NumberOfColumns : CellCounts)
204  if (NumberOfColumns != CellCounts[0])
205  return false;
206  return true;
207  }
208  };
209 
210  /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
211  /// or token parts in a line and \c PreviousEndOfTokenColumn and
212  /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
213  void calculateLineBreakInformation();
214 
215  /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
216  void alignConsecutiveMacros();
217 
218  /// Align consecutive assignments over all \c Changes.
219  void alignConsecutiveAssignments();
220 
221  /// Align consecutive bitfields over all \c Changes.
222  void alignConsecutiveBitFields();
223 
224  /// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions.
225  void
226  alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,
227  TokenType Type);
228 
229  /// Align consecutive declarations over all \c Changes.
230  void alignConsecutiveDeclarations();
231 
232  /// Align consecutive declarations over all \c Changes.
233  void alignChainedConditionals();
234 
235  /// Align consecutive short case statements over all \c Changes.
236  void alignConsecutiveShortCaseStatements(bool IsExpr);
237 
238  /// Align consecutive TableGen DAGArg colon over all \c Changes.
239  void alignConsecutiveTableGenBreakingDAGArgColons();
240 
241  /// Align consecutive TableGen cond operator colon over all \c Changes.
242  void alignConsecutiveTableGenCondOperatorColons();
243 
244  /// Align consecutive TableGen definitions over all \c Changes.
245  void alignConsecutiveTableGenDefinitions();
246 
247  /// Align trailing comments over all \c Changes.
248  void alignTrailingComments();
249 
250  /// Align trailing comments from change \p Start to change \p End at
251  /// the specified \p Column.
252  void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
253 
254  /// Align escaped newlines over all \c Changes.
255  void alignEscapedNewlines();
256 
257  /// Align escaped newlines from change \p Start to change \p End at
258  /// the specified \p Column.
259  void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
260 
261  /// Align Array Initializers over all \c Changes.
262  void alignArrayInitializers();
263 
264  /// Align Array Initializers from change \p Start to change \p End at
265  /// the specified \p Column.
266  void alignArrayInitializers(unsigned Start, unsigned End);
267 
268  /// Align Array Initializers being careful to right justify the columns
269  /// as described by \p CellDescs.
270  void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);
271 
272  /// Align Array Initializers being careful to left justify the columns
273  /// as described by \p CellDescs.
274  void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);
275 
276  /// Calculate the cell width between two indexes.
277  unsigned calculateCellWidth(unsigned Start, unsigned End,
278  bool WithSpaces = false) const;
279 
280  /// Get a set of fully specified CellDescriptions between \p Start and
281  /// \p End of the change list.
282  CellDescriptions getCells(unsigned Start, unsigned End);
283 
284  /// Does this \p Cell contain a split element?
285  static bool isSplitCell(const CellDescription &Cell);
286 
287  /// Get the width of the preceding cells from \p Start to \p End.
288  template <typename I>
289  auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
290  auto NetWidth = InitialSpaces;
291  for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {
292  // If we broke the line the initial spaces are already
293  // accounted for.
294  assert(PrevIter->Index < Changes.size());
295  if (Changes[PrevIter->Index].NewlinesBefore > 0)
296  NetWidth = 0;
297  NetWidth +=
298  calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;
299  }
300  return NetWidth;
301  }
302 
303  /// Get the maximum width of a cell in a sequence of columns.
304  template <typename I>
305  unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {
306  unsigned CellWidth =
307  calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);
308  if (Changes[CellIter->Index].NewlinesBefore == 0)
309  CellWidth += NetWidth;
310  for (const auto *Next = CellIter->NextColumnElement; Next;
311  Next = Next->NextColumnElement) {
312  auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);
313  if (Changes[Next->Index].NewlinesBefore == 0)
314  ThisWidth += NetWidth;
315  CellWidth = std::max(CellWidth, ThisWidth);
316  }
317  return CellWidth;
318  }
319 
320  /// Get The maximum width of all columns to a given cell.
321  template <typename I>
322  unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,
323  unsigned InitialSpaces, unsigned CellCount,
324  unsigned MaxRowCount) const {
325  auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);
326  auto RowCount = 1U;
327  auto Offset = std::distance(CellStart, CellStop);
328  for (const auto *Next = CellStop->NextColumnElement; Next;
329  Next = Next->NextColumnElement) {
330  if (RowCount >= MaxRowCount)
331  break;
332  auto Start = (CellStart + RowCount * CellCount);
333  auto End = Start + Offset;
334  MaxNetWidth =
335  std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));
336  ++RowCount;
337  }
338  return MaxNetWidth;
339  }
340 
341  /// Align a split cell with a newline to the first element in the cell.
342  void alignToStartOfCell(unsigned Start, unsigned End);
343 
344  /// Link the Cell pointers in the list of Cells.
345  static CellDescriptions linkCells(CellDescriptions &&CellDesc);
346 
347  /// Fill \c Replaces with the replacements for all effective changes.
348  void generateChanges();
349 
350  /// Stores \p Text as the replacement for the whitespace in \p Range.
351  void storeReplacement(SourceRange Range, StringRef Text);
352  void appendNewlineText(std::string &Text, unsigned Newlines);
353  void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
354  unsigned PreviousEndOfTokenColumn,
355  unsigned EscapedNewlineColumn);
356  void appendIndentText(std::string &Text, unsigned IndentLevel,
357  unsigned Spaces, unsigned WhitespaceStartColumn,
358  bool IsAligned);
359  unsigned appendTabIndent(std::string &Text, unsigned Spaces,
360  unsigned Indentation);
361 
362  SmallVector<Change, 16> Changes;
363  const SourceManager &SourceMgr;
364  tooling::Replacements Replaces;
365  const FormatStyle &Style;
366  bool UseCRLF;
367 };
368 
369 } // namespace format
370 } // namespace clang
371 
372 #endif
StringRef Text
Definition: Format.cpp:2977
unsigned Offset
Definition: Format.cpp:2978
Defines the SourceManager interface.
This file implements a token annotator, i.e.
SourceLocation End
__DEVICE__ int max(int __a, int __b)
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Functor to sort changes in original source order.
bool operator()(const Change &C1, const Change &C2) const
Manages the whitespaces around tokens and their replacements.
void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces)
Inserts or replaces whitespace in the middle of a token.
void addUntouchableToken(const FormatToken &Tok, bool InPPDirective)
Adds information about an unchangeable token's whitespace.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
llvm::Error addReplacement(const tooling::Replacement &Replacement)
const tooling::Replacements & generateReplacements()
Returns all the Replacements created during formatting.
void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned=false, bool InPPDirective=false)
Replaces the whitespace in front of Tok.
WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, bool UseCRLF)
A text replacement.
Definition: Replacement.h:83
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:205
The JSON file list parser is used to communicate input to InstallAPI.
bool operator==(const CallGraphNode::CallRecord &LHS, const CallGraphNode::CallRecord &RHS)
Definition: CallGraph.h:223
bool operator!=(CanQual< T > x, CanQual< U > y)
@ Other
Other implicit parameter.
float __ovld __cnfn distance(float, float)
Returns the distance between p0 and p1.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:513
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:516
Represents a change before a token, a break inside a token, or the layout of an unchanged token (or w...
Change(const FormatToken &Tok, bool CreateReplacement, SourceRange OriginalWhitespaceRange, int Spaces, unsigned StartOfTokenColumn, unsigned NewlinesBefore, StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken)
Creates a Change.
std::tuple< unsigned, unsigned, unsigned > indentAndNestingLevel() const