clang  19.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Contains implementation of BreakableToken class and classes derived
11 /// from it.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "BreakableToken.h"
16 #include "ContinuationIndenter.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/Format/Format.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/Debug.h"
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-token-breaker"
24 
25 namespace clang {
26 namespace format {
27 
28 static constexpr StringRef Blanks = " \t\v\f\r";
29 static bool IsBlank(char C) {
30  switch (C) {
31  case ' ':
32  case '\t':
33  case '\v':
34  case '\f':
35  case '\r':
36  return true;
37  default:
38  return false;
39  }
40 }
41 
42 static StringRef getLineCommentIndentPrefix(StringRef Comment,
43  const FormatStyle &Style) {
44  static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45  "//!", "//:", "//"};
46  static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47  "//", "#"};
48  ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
50  KnownPrefixes = KnownTextProtoPrefixes;
51 
52  assert(
53  llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54  return Lhs.size() > Rhs.size();
55  }));
56 
57  for (StringRef KnownPrefix : KnownPrefixes) {
58  if (Comment.starts_with(KnownPrefix)) {
59  const auto PrefixLength =
60  Comment.find_first_not_of(' ', KnownPrefix.size());
61  return Comment.substr(0, PrefixLength);
62  }
63  }
64  return {};
65 }
66 
68 getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69  unsigned ColumnLimit, unsigned TabWidth,
71  bool DecorationEndsWithStar = false) {
72  LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73  << "\", Column limit: " << ColumnLimit
74  << ", Content start: " << ContentStartColumn << "\n");
75  if (ColumnLimit <= ContentStartColumn + 1)
76  return BreakableToken::Split(StringRef::npos, 0);
77 
78  unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
79  unsigned MaxSplitBytes = 0;
80 
81  for (unsigned NumChars = 0;
82  NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83  unsigned BytesInChar =
86  Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
87  TabWidth, Encoding);
88  MaxSplitBytes += BytesInChar;
89  }
90 
91  // In JavaScript, some @tags can be followed by {, and machinery that parses
92  // these comments will fail to understand the comment if followed by a line
93  // break. So avoid ever breaking before a {.
94  if (Style.isJavaScript()) {
95  StringRef::size_type SpaceOffset =
96  Text.find_first_of(Blanks, MaxSplitBytes);
97  if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98  Text[SpaceOffset + 1] == '{') {
99  MaxSplitBytes = SpaceOffset + 1;
100  }
101  }
102 
103  StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104 
105  static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106  // Some spaces are unacceptable to break on, rewind past them.
107  while (SpaceOffset != StringRef::npos) {
108  // If a line-comment ends with `\`, the next line continues the comment,
109  // whether or not it starts with `//`. This is confusing and triggers
110  // -Wcomment.
111  // Avoid introducing multiline comments by not allowing a break right
112  // after '\'.
113  if (Style.isCpp()) {
114  StringRef::size_type LastNonBlank =
115  Text.find_last_not_of(Blanks, SpaceOffset);
116  if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117  SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118  continue;
119  }
120  }
121 
122  // Do not split before a number followed by a dot: this would be interpreted
123  // as a numbered list, which would prevent re-flowing in subsequent passes.
124  if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
125  SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126  continue;
127  }
128 
129  // Avoid ever breaking before a @tag or a { in JavaScript.
130  if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
131  (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132  SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133  continue;
134  }
135 
136  break;
137  }
138 
139  if (SpaceOffset == StringRef::npos ||
140  // Don't break at leading whitespace.
141  Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
142  // Make sure that we don't break at leading whitespace that
143  // reaches past MaxSplit.
144  StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
145  if (FirstNonWhitespace == StringRef::npos) {
146  // If the comment is only whitespace, we cannot split.
147  return BreakableToken::Split(StringRef::npos, 0);
148  }
149  SpaceOffset = Text.find_first_of(
150  Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
151  }
152  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
153  // adaptStartOfLine will break after lines starting with /** if the comment
154  // is broken anywhere. Avoid emitting this break twice here.
155  // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
156  // insert a break after /**, so this code must not insert the same break.
157  if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
158  return BreakableToken::Split(StringRef::npos, 0);
159  StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
160  StringRef AfterCut = Text.substr(SpaceOffset);
161  // Don't trim the leading blanks if it would create a */ after the break.
162  if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
163  AfterCut = AfterCut.ltrim(Blanks);
164  return BreakableToken::Split(BeforeCut.size(),
165  AfterCut.begin() - BeforeCut.end());
166  }
167  return BreakableToken::Split(StringRef::npos, 0);
168 }
169 
171 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
172  unsigned TabWidth, encoding::Encoding Encoding) {
173  // FIXME: Reduce unit test case.
174  if (Text.empty())
175  return BreakableToken::Split(StringRef::npos, 0);
176  if (ColumnLimit <= UsedColumns)
177  return BreakableToken::Split(StringRef::npos, 0);
178  unsigned MaxSplit = ColumnLimit - UsedColumns;
179  StringRef::size_type SpaceOffset = 0;
180  StringRef::size_type SlashOffset = 0;
181  StringRef::size_type WordStartOffset = 0;
182  StringRef::size_type SplitPoint = 0;
183  for (unsigned Chars = 0;;) {
184  unsigned Advance;
185  if (Text[0] == '\\') {
187  Chars += Advance;
188  } else {
191  Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
192  }
193 
194  if (Chars > MaxSplit || Text.size() <= Advance)
195  break;
196 
197  if (IsBlank(Text[0]))
198  SpaceOffset = SplitPoint;
199  if (Text[0] == '/')
200  SlashOffset = SplitPoint;
201  if (Advance == 1 && !isAlphanumeric(Text[0]))
202  WordStartOffset = SplitPoint;
203 
204  SplitPoint += Advance;
205  Text = Text.substr(Advance);
206  }
207 
208  if (SpaceOffset != 0)
209  return BreakableToken::Split(SpaceOffset + 1, 0);
210  if (SlashOffset != 0)
211  return BreakableToken::Split(SlashOffset + 1, 0);
212  if (WordStartOffset != 0)
213  return BreakableToken::Split(WordStartOffset + 1, 0);
214  if (SplitPoint != 0)
215  return BreakableToken::Split(SplitPoint, 0);
216  return BreakableToken::Split(StringRef::npos, 0);
217 }
218 
220  assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
221  "formatting regions are switched by comment tokens");
222  StringRef Content = Token.TokenText.substr(2).ltrim();
223  return Content.starts_with("clang-format on") ||
224  Content.starts_with("clang-format off");
225 }
226 
227 unsigned
228 BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
229  Split Split) const {
230  // Example: consider the content
231  // lala lala
232  // - RemainingTokenColumns is the original number of columns, 10;
233  // - Split is (4, 2), denoting the two spaces between the two words;
234  //
235  // We compute the number of columns when the split is compressed into a single
236  // space, like:
237  // lala lala
238  //
239  // FIXME: Correctly measure the length of whitespace in Split.second so it
240  // works with tabs.
241  return RemainingTokenColumns + 1 - Split.second;
242 }
243 
244 unsigned BreakableStringLiteral::getLineCount() const { return 1; }
245 
246 unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
247  unsigned Offset,
248  StringRef::size_type Length,
249  unsigned StartColumn) const {
250  llvm_unreachable("Getting the length of a part of the string literal "
251  "indicates that the code tries to reflow it.");
252 }
253 
254 unsigned
256  unsigned StartColumn) const {
257  return UnbreakableTailLength + Postfix.size() +
260 }
261 
263  bool Break) const {
264  return StartColumn + Prefix.size();
265 }
266 
268  const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269  StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
271  : BreakableToken(Tok, InPPDirective, Encoding, Style),
272  StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
273  UnbreakableTailLength(UnbreakableTailLength) {
274  assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
275  Line = Tok.TokenText.substr(
276  Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
277 }
278 
280  unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281  unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
282  return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
283  ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
284 }
285 
286 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
287  unsigned TailOffset, Split Split,
288  unsigned ContentIndent,
289  WhitespaceManager &Whitespaces) const {
290  Whitespaces.replaceWhitespaceInToken(
291  Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
293 }
294 
296  const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
297  unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
300  Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
301  : QuoteStyle == AtDoubleQuotes ? "@\""
302  : "\"",
303  /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
304  UnbreakableTailLength, InPPDirective, Encoding, Style),
305  BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
306  QuoteStyle(QuoteStyle) {
307  // Find the replacement text for inserting braces and quotes and line breaks.
308  // We don't create an allocated string concatenated from parts here because it
309  // has to outlive the BreakableStringliteral object. The brace replacements
310  // include a quote so that WhitespaceManager can tell it apart from whitespace
311  // replacements between the string and surrounding tokens.
312 
313  // The option is not implemented in JavaScript.
314  bool SignOnNewLine =
315  !Style.isJavaScript() &&
317 
318  if (Style.isVerilog()) {
319  // In Verilog, all strings are quoted by double quotes, joined by commas,
320  // and wrapped in braces. The comma is always before the newline.
321  assert(QuoteStyle == DoubleQuotes);
322  LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
323  RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
324  Postfix = "\",";
325  Prefix = "\"";
326  } else {
327  // The plus sign may be on either line. And also C# and JavaScript have
328  // several quoting styles.
329  if (QuoteStyle == SingleQuotes) {
332  Postfix = SignOnNewLine ? "'" : "' +";
333  Prefix = SignOnNewLine ? "+ '" : "'";
334  } else {
335  if (QuoteStyle == AtDoubleQuotes) {
337  Prefix = SignOnNewLine ? "+ @\"" : "@\"";
338  } else {
339  LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
340  Prefix = SignOnNewLine ? "+ \"" : "\"";
341  }
343  Postfix = SignOnNewLine ? "\"" : "\" +";
344  }
345  }
346 
347  // Following lines are indented by the width of the brace and space if any.
349  // The plus sign may need to be unindented depending on the style.
350  // FIXME: Add support for DontAlign.
351  if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
353  ContinuationIndent -= 2;
354  }
355 }
356 
358  unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
359  return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
362 }
363 
364 unsigned
366  bool Break) const {
367  return std::max(
368  0,
369  static_cast<int>(StartColumn) +
370  (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
371  : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
372  : 0) +
373  (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
374 }
375 
377  unsigned LineIndex, unsigned TailOffset, Split Split,
378  unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
379  Whitespaces.replaceWhitespaceInToken(
380  Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
381  Split.first,
382  /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
383  /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
384  /*Spaces=*/
385  std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
386 }
387 
389  WhitespaceManager &Whitespaces) const {
390  // Add the braces required for breaking the token if they are needed.
391  if (!BracesNeeded)
392  return;
393 
394  // To add a brace or parenthesis, we replace the quote (or the at sign) with a
395  // brace and another quote. This is because the rest of the program requires
396  // one replacement for each source range. If we replace the empty strings
397  // around the string, it may conflict with whitespace replacements between the
398  // string and adjacent tokens.
399  Whitespaces.replaceWhitespaceInToken(
400  Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
401  /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
402  /*Spaces=*/0);
403  Whitespaces.replaceWhitespaceInToken(
404  Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
405  /*PreviousPostfix=*/RightBraceQuote,
406  /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
407 }
408 
410  unsigned StartColumn, bool InPPDirective,
412  const FormatStyle &Style)
413  : BreakableToken(Token, InPPDirective, Encoding, Style),
414  StartColumn(StartColumn) {}
415 
416 unsigned BreakableComment::getLineCount() const { return Lines.size(); }
417 
419 BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
420  unsigned ColumnLimit, unsigned ContentStartColumn,
421  const llvm::Regex &CommentPragmasRegex) const {
422  // Don't break lines matching the comment pragmas regex.
423  if (CommentPragmasRegex.match(Content[LineIndex]))
424  return Split(StringRef::npos, 0);
425  return getCommentSplit(Content[LineIndex].substr(TailOffset),
426  ContentStartColumn, ColumnLimit, Style.TabWidth,
427  Encoding, Style);
428 }
429 
431  unsigned LineIndex, unsigned TailOffset, Split Split,
432  WhitespaceManager &Whitespaces) const {
433  StringRef Text = Content[LineIndex].substr(TailOffset);
434  // Text is relative to the content line, but Whitespaces operates relative to
435  // the start of the corresponding token, so compute the start of the Split
436  // that needs to be compressed into a single space relative to the start of
437  // its token.
438  unsigned BreakOffsetInToken =
439  Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
440  unsigned CharsToRemove = Split.second;
441  Whitespaces.replaceWhitespaceInToken(
442  tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
443  /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
444 }
445 
446 const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
447  return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
448 }
449 
450 static bool mayReflowContent(StringRef Content) {
451  Content = Content.trim(Blanks);
452  // Lines starting with '@' or '\' commonly have special meaning.
453  // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
454  bool hasSpecialMeaningPrefix = false;
455  for (StringRef Prefix :
456  {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
457  if (Content.starts_with(Prefix)) {
458  hasSpecialMeaningPrefix = true;
459  break;
460  }
461  }
462 
463  // Numbered lists may also start with a number followed by '.'
464  // To avoid issues if a line starts with a number which is actually the end
465  // of a previous line, we only consider numbers with up to 2 digits.
466  static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
467  hasSpecialMeaningPrefix =
468  hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
469 
470  // Simple heuristic for what to reflow: content should contain at least two
471  // characters and either the first or second character must be
472  // non-punctuation.
473  return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
474  !Content.ends_with("\\") &&
475  // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
476  // true, then the first code point must be 1 byte long.
477  (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
478 }
479 
481  const FormatToken &Token, unsigned StartColumn,
482  unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
483  encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
484  : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
485  DelimitersOnNewline(false),
486  UnbreakableTailLength(Token.UnbreakableTailLength) {
487  assert(Tok.is(TT_BlockComment) &&
488  "block comment section must start with a block comment");
489 
490  StringRef TokenText(Tok.TokenText);
491  assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
492  TokenText.substr(2, TokenText.size() - 4)
493  .split(Lines, UseCRLF ? "\r\n" : "\n");
494 
495  int IndentDelta = StartColumn - OriginalStartColumn;
496  Content.resize(Lines.size());
497  Content[0] = Lines[0];
498  ContentColumn.resize(Lines.size());
499  // Account for the initial '/*'.
500  ContentColumn[0] = StartColumn + 2;
501  Tokens.resize(Lines.size());
502  for (size_t i = 1; i < Lines.size(); ++i)
503  adjustWhitespace(i, IndentDelta);
504 
505  // Align decorations with the column of the star on the first line,
506  // that is one column after the start "/*".
507  DecorationColumn = StartColumn + 1;
508 
509  // Account for comment decoration patterns like this:
510  //
511  // /*
512  // ** blah blah blah
513  // */
514  if (Lines.size() >= 2 && Content[1].starts_with("**") &&
515  static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
516  DecorationColumn = StartColumn;
517  }
518 
519  Decoration = "* ";
520  if (Lines.size() == 1 && !FirstInLine) {
521  // Comments for which FirstInLine is false can start on arbitrary column,
522  // and available horizontal space can be too small to align consecutive
523  // lines with the first one.
524  // FIXME: We could, probably, align them to current indentation level, but
525  // now we just wrap them without stars.
526  Decoration = "";
527  }
528  for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
529  const StringRef &Text = Content[i];
530  if (i + 1 == e) {
531  // If the last line is empty, the closing "*/" will have a star.
532  if (Text.empty())
533  break;
534  } else if (!Text.empty() && Decoration.starts_with(Text)) {
535  continue;
536  }
537  while (!Text.starts_with(Decoration))
538  Decoration = Decoration.drop_back(1);
539  }
540 
541  LastLineNeedsDecoration = true;
542  IndentAtLineBreak = ContentColumn[0] + 1;
543  for (size_t i = 1, e = Lines.size(); i < e; ++i) {
544  if (Content[i].empty()) {
545  if (i + 1 == e) {
546  // Empty last line means that we already have a star as a part of the
547  // trailing */. We also need to preserve whitespace, so that */ is
548  // correctly indented.
549  LastLineNeedsDecoration = false;
550  // Align the star in the last '*/' with the stars on the previous lines.
551  if (e >= 2 && !Decoration.empty())
552  ContentColumn[i] = DecorationColumn;
553  } else if (Decoration.empty()) {
554  // For all other lines, set the start column to 0 if they're empty, so
555  // we do not insert trailing whitespace anywhere.
556  ContentColumn[i] = 0;
557  }
558  continue;
559  }
560 
561  // The first line already excludes the star.
562  // The last line excludes the star if LastLineNeedsDecoration is false.
563  // For all other lines, adjust the line to exclude the star and
564  // (optionally) the first whitespace.
565  unsigned DecorationSize = Decoration.starts_with(Content[i])
566  ? Content[i].size()
567  : Decoration.size();
568  if (DecorationSize)
569  ContentColumn[i] = DecorationColumn + DecorationSize;
570  Content[i] = Content[i].substr(DecorationSize);
571  if (!Decoration.starts_with(Content[i])) {
572  IndentAtLineBreak =
573  std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
574  }
575  }
576  IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
577 
578  // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
580  if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
581  // This is a multiline jsdoc comment.
582  DelimitersOnNewline = true;
583  } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
584  // Detect a long single-line comment, like:
585  // /** long long long */
586  // Below, '2' is the width of '*/'.
587  unsigned EndColumn =
588  ContentColumn[0] +
591  2;
592  DelimitersOnNewline = EndColumn > Style.ColumnLimit;
593  }
594  }
595 
596  LLVM_DEBUG({
597  llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
598  llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
599  for (size_t i = 0; i < Lines.size(); ++i) {
600  llvm::dbgs() << i << " |" << Content[i] << "| "
601  << "CC=" << ContentColumn[i] << "| "
602  << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
603  }
604  });
605 }
606 
608  unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609  unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
610  // Don't break lines matching the comment pragmas regex.
611  if (CommentPragmasRegex.match(Content[LineIndex]))
612  return Split(StringRef::npos, 0);
613  return getCommentSplit(Content[LineIndex].substr(TailOffset),
614  ContentStartColumn, ColumnLimit, Style.TabWidth,
615  Encoding, Style, Decoration.ends_with("*"));
616 }
617 
618 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
619  int IndentDelta) {
620  // When in a preprocessor directive, the trailing backslash in a block comment
621  // is not needed, but can serve a purpose of uniformity with necessary escaped
622  // newlines outside the comment. In this case we remove it here before
623  // trimming the trailing whitespace. The backslash will be re-added later when
624  // inserting a line break.
625  size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
626  if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
627  --EndOfPreviousLine;
628 
629  // Calculate the end of the non-whitespace text in the previous line.
630  EndOfPreviousLine =
631  Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
632  if (EndOfPreviousLine == StringRef::npos)
633  EndOfPreviousLine = 0;
634  else
635  ++EndOfPreviousLine;
636  // Calculate the start of the non-whitespace text in the current line.
637  size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
638  if (StartOfLine == StringRef::npos)
639  StartOfLine = Lines[LineIndex].size();
640 
641  StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
642  // Adjust Lines to only contain relevant text.
643  size_t PreviousContentOffset =
644  Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
645  Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
646  PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
647  Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
648 
649  // Adjust the start column uniformly across all lines.
650  ContentColumn[LineIndex] =
652  IndentDelta;
653 }
654 
655 unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
656  unsigned Offset,
657  StringRef::size_type Length,
658  unsigned StartColumn) const {
660  Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
661  Encoding);
662 }
663 
664 unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
665  unsigned Offset,
666  unsigned StartColumn) const {
667  unsigned LineLength =
668  UnbreakableTailLength +
669  getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
670  if (LineIndex + 1 == Lines.size()) {
671  LineLength += 2;
672  // We never need a decoration when breaking just the trailing "*/" postfix.
673  bool HasRemainingText = Offset < Content[LineIndex].size();
674  if (!HasRemainingText) {
675  bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
676  if (HasDecoration)
677  LineLength -= Decoration.size();
678  }
679  }
680  return LineLength;
681 }
682 
684  bool Break) const {
685  if (Break)
686  return IndentAtLineBreak;
687  return std::max(0, ContentColumn[LineIndex]);
688 }
689 
690 const llvm::StringSet<>
692  "@param", "@return", "@returns", "@throws", "@type", "@template",
693  "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
694 };
695 
696 unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
698  return 0;
699  // The content at LineIndex 0 of a comment like:
700  // /** line 0 */
701  // is "* line 0", so we need to skip over the decoration in that case.
702  StringRef ContentWithNoDecoration = Content[LineIndex];
703  if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
704  ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
705  StringRef FirstWord = ContentWithNoDecoration.substr(
706  0, ContentWithNoDecoration.find_first_of(Blanks));
707  if (ContentIndentingJavadocAnnotations.contains(FirstWord))
709  return 0;
710 }
711 
712 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
713  Split Split, unsigned ContentIndent,
714  WhitespaceManager &Whitespaces) const {
715  StringRef Text = Content[LineIndex].substr(TailOffset);
716  StringRef Prefix = Decoration;
717  // We need this to account for the case when we have a decoration "* " for all
718  // the lines except for the last one, where the star in "*/" acts as a
719  // decoration.
720  unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
721  if (LineIndex + 1 == Lines.size() &&
722  Text.size() == Split.first + Split.second) {
723  // For the last line we need to break before "*/", but not to add "* ".
724  Prefix = "";
725  if (LocalIndentAtLineBreak >= 2)
726  LocalIndentAtLineBreak -= 2;
727  }
728  // The split offset is from the beginning of the line. Convert it to an offset
729  // from the beginning of the token text.
730  unsigned BreakOffsetInToken =
731  Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
732  unsigned CharsToRemove = Split.second;
733  assert(LocalIndentAtLineBreak >= Prefix.size());
734  std::string PrefixWithTrailingIndent = std::string(Prefix);
735  PrefixWithTrailingIndent.append(ContentIndent, ' ');
736  Whitespaces.replaceWhitespaceInToken(
737  tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
738  PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
739  /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
740  PrefixWithTrailingIndent.size());
741 }
742 
744  unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
745  if (!mayReflow(LineIndex, CommentPragmasRegex))
746  return Split(StringRef::npos, 0);
747 
748  // If we're reflowing into a line with content indent, only reflow the next
749  // line if its starting whitespace matches the content indent.
750  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
751  if (LineIndex) {
752  unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
753  if (PreviousContentIndent && Trimmed != StringRef::npos &&
754  Trimmed != PreviousContentIndent) {
755  return Split(StringRef::npos, 0);
756  }
757  }
758 
759  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
760 }
761 
763  // A break is introduced when we want delimiters on newline.
764  return DelimitersOnNewline &&
765  Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
766 }
767 
768 void BreakableBlockComment::reflow(unsigned LineIndex,
769  WhitespaceManager &Whitespaces) const {
770  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
771  // Here we need to reflow.
772  assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
773  "Reflowing whitespace within a token");
774  // This is the offset of the end of the last line relative to the start of
775  // the token text in the token.
776  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
777  Content[LineIndex - 1].size() -
778  tokenAt(LineIndex).TokenText.data();
779  unsigned WhitespaceLength = TrimmedContent.data() -
780  tokenAt(LineIndex).TokenText.data() -
781  WhitespaceOffsetInToken;
782  Whitespaces.replaceWhitespaceInToken(
783  tokenAt(LineIndex), WhitespaceOffsetInToken,
784  /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
785  /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
786  /*Spaces=*/0);
787 }
788 
790  unsigned LineIndex, WhitespaceManager &Whitespaces) const {
791  if (LineIndex == 0) {
792  if (DelimitersOnNewline) {
793  // Since we're breaking at index 1 below, the break position and the
794  // break length are the same.
795  // Note: this works because getCommentSplit is careful never to split at
796  // the beginning of a line.
797  size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
798  if (BreakLength != StringRef::npos) {
799  insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
800  Whitespaces);
801  }
802  }
803  return;
804  }
805  // Here no reflow with the previous line will happen.
806  // Fix the decoration of the line at LineIndex.
807  StringRef Prefix = Decoration;
808  if (Content[LineIndex].empty()) {
809  if (LineIndex + 1 == Lines.size()) {
810  if (!LastLineNeedsDecoration) {
811  // If the last line was empty, we don't need a prefix, as the */ will
812  // line up with the decoration (if it exists).
813  Prefix = "";
814  }
815  } else if (!Decoration.empty()) {
816  // For other empty lines, if we do have a decoration, adapt it to not
817  // contain a trailing whitespace.
818  Prefix = Prefix.substr(0, 1);
819  }
820  } else if (ContentColumn[LineIndex] == 1) {
821  // This line starts immediately after the decorating *.
822  Prefix = Prefix.substr(0, 1);
823  }
824  // This is the offset of the end of the last line relative to the start of the
825  // token text in the token.
826  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
827  Content[LineIndex - 1].size() -
828  tokenAt(LineIndex).TokenText.data();
829  unsigned WhitespaceLength = Content[LineIndex].data() -
830  tokenAt(LineIndex).TokenText.data() -
831  WhitespaceOffsetInToken;
832  Whitespaces.replaceWhitespaceInToken(
833  tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
834  InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
835 }
836 
839  if (DelimitersOnNewline) {
840  // Replace the trailing whitespace of the last line with a newline.
841  // In case the last line is empty, the ending '*/' is already on its own
842  // line.
843  StringRef Line = Content.back().substr(TailOffset);
844  StringRef TrimmedLine = Line.rtrim(Blanks);
845  if (!TrimmedLine.empty())
846  return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
847  }
848  return Split(StringRef::npos, 0);
849 }
850 
852  unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
853  // Content[LineIndex] may exclude the indent after the '*' decoration. In that
854  // case, we compute the start of the comment pragma manually.
855  StringRef IndentContent = Content[LineIndex];
856  if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
857  IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
858  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
859  mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
860  !switchesFormatting(tokenAt(LineIndex));
861 }
862 
864  const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
866  : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
867  assert(Tok.is(TT_LineComment) &&
868  "line comment section must start with a line comment");
869  FormatToken *LineTok = nullptr;
870  const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
871  // How many spaces we changed in the first line of the section, this will be
872  // applied in all following lines
873  int FirstLineSpaceChange = 0;
874  for (const FormatToken *CurrentTok = &Tok;
875  CurrentTok && CurrentTok->is(TT_LineComment);
876  CurrentTok = CurrentTok->Next) {
877  LastLineTok = LineTok;
878  StringRef TokenText(CurrentTok->TokenText);
879  assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
880  "unsupported line comment prefix, '//' and '#' are supported");
881  size_t FirstLineIndex = Lines.size();
882  TokenText.split(Lines, "\n");
883  Content.resize(Lines.size());
884  ContentColumn.resize(Lines.size());
885  PrefixSpaceChange.resize(Lines.size());
886  Tokens.resize(Lines.size());
887  Prefix.resize(Lines.size());
888  OriginalPrefix.resize(Lines.size());
889  for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
890  Lines[i] = Lines[i].ltrim(Blanks);
891  StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
892  OriginalPrefix[i] = IndentPrefix;
893  const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
894 
895  // This lambda also considers multibyte character that is not handled in
896  // functions like isPunctuation provided by CharInfo.
897  const auto NoSpaceBeforeFirstCommentChar = [&]() {
898  assert(Lines[i].size() > IndentPrefix.size());
899  const char FirstCommentChar = Lines[i][IndentPrefix.size()];
900  const unsigned FirstCharByteSize =
901  encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
903  Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
904  Encoding) != 1) {
905  return false;
906  }
907  // In C-like comments, add a space before #. For example this is useful
908  // to preserve the relative indentation when commenting out code with
909  // #includes.
910  //
911  // In languages using # as the comment leader such as proto, don't
912  // add a space to support patterns like:
913  // #########
914  // # section
915  // #########
916  if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
917  return false;
918  return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
919  isHorizontalWhitespace(FirstCommentChar);
920  };
921 
922  // On the first line of the comment section we calculate how many spaces
923  // are to be added or removed, all lines after that just get only the
924  // change and we will not look at the maximum anymore. Additionally to the
925  // actual first line, we calculate that when the non space Prefix changes,
926  // e.g. from "///" to "//".
927  if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
928  OriginalPrefix[i - 1].rtrim(Blanks)) {
929  if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
930  !NoSpaceBeforeFirstCommentChar()) {
931  FirstLineSpaceChange = Minimum - SpacesInPrefix;
932  } else if (static_cast<unsigned>(SpacesInPrefix) >
934  FirstLineSpaceChange =
935  Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
936  } else {
937  FirstLineSpaceChange = 0;
938  }
939  }
940 
941  if (Lines[i].size() != IndentPrefix.size()) {
942  PrefixSpaceChange[i] = FirstLineSpaceChange;
943 
944  if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
945  PrefixSpaceChange[i] +=
946  Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
947  }
948 
949  assert(Lines[i].size() > IndentPrefix.size());
950  const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
951  const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
952  const bool LineRequiresLeadingSpace =
953  !NoSpaceBeforeFirstCommentChar() ||
954  (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
955  const bool AllowsSpaceChange =
956  !IsFormatComment &&
957  (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
958 
959  if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
960  Prefix[i] = IndentPrefix.str();
961  Prefix[i].append(PrefixSpaceChange[i], ' ');
962  } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
963  Prefix[i] = IndentPrefix
964  .drop_back(std::min<std::size_t>(
965  -PrefixSpaceChange[i], SpacesInPrefix))
966  .str();
967  } else {
968  Prefix[i] = IndentPrefix.str();
969  }
970  } else {
971  // If the IndentPrefix is the whole line, there is no content and we
972  // drop just all space
973  Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
974  }
975 
976  Tokens[i] = LineTok;
977  Content[i] = Lines[i].substr(IndentPrefix.size());
978  ContentColumn[i] =
981 
982  // Calculate the end of the non-whitespace text in this line.
983  size_t EndOfLine = Content[i].find_last_not_of(Blanks);
984  if (EndOfLine == StringRef::npos)
985  EndOfLine = Content[i].size();
986  else
987  ++EndOfLine;
988  Content[i] = Content[i].substr(0, EndOfLine);
989  }
990  LineTok = CurrentTok->Next;
991  if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
992  // A line comment section needs to broken by a line comment that is
993  // preceded by at least two newlines. Note that we put this break here
994  // instead of breaking at a previous stage during parsing, since that
995  // would split the contents of the enum into two unwrapped lines in this
996  // example, which is undesirable:
997  // enum A {
998  // a, // comment about a
999  //
1000  // // comment about b
1001  // b
1002  // };
1003  //
1004  // FIXME: Consider putting separate line comment sections as children to
1005  // the unwrapped line instead.
1006  break;
1007  }
1008  }
1009 }
1010 
1011 unsigned
1013  StringRef::size_type Length,
1014  unsigned StartColumn) const {
1016  Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1017  Encoding);
1018 }
1019 
1020 unsigned
1022  bool /*Break*/) const {
1023  return ContentColumn[LineIndex];
1024 }
1025 
1027  unsigned LineIndex, unsigned TailOffset, Split Split,
1028  unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1029  StringRef Text = Content[LineIndex].substr(TailOffset);
1030  // Compute the offset of the split relative to the beginning of the token
1031  // text.
1032  unsigned BreakOffsetInToken =
1033  Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1034  unsigned CharsToRemove = Split.second;
1035  Whitespaces.replaceWhitespaceInToken(
1036  tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1037  Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1038  /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1039 }
1040 
1042  unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1043  if (!mayReflow(LineIndex, CommentPragmasRegex))
1044  return Split(StringRef::npos, 0);
1045 
1046  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1047 
1048  // In a line comment section each line is a separate token; thus, after a
1049  // split we replace all whitespace before the current line comment token
1050  // (which does not need to be included in the split), plus the start of the
1051  // line up to where the content starts.
1052  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1053 }
1054 
1055 void BreakableLineCommentSection::reflow(unsigned LineIndex,
1056  WhitespaceManager &Whitespaces) const {
1057  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1058  // Reflow happens between tokens. Replace the whitespace between the
1059  // tokens by the empty string.
1060  Whitespaces.replaceWhitespace(
1061  *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1062  /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1063  /*InPPDirective=*/false);
1064  } else if (LineIndex > 0) {
1065  // In case we're reflowing after the '\' in:
1066  //
1067  // // line comment \
1068  // // line 2
1069  //
1070  // the reflow happens inside the single comment token (it is a single line
1071  // comment with an unescaped newline).
1072  // Replace the whitespace between the '\' and '//' with the empty string.
1073  //
1074  // Offset points to after the '\' relative to start of the token.
1075  unsigned Offset = Lines[LineIndex - 1].data() +
1076  Lines[LineIndex - 1].size() -
1077  tokenAt(LineIndex - 1).TokenText.data();
1078  // WhitespaceLength is the number of chars between the '\' and the '//' on
1079  // the next line.
1080  unsigned WhitespaceLength =
1081  Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1082  Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1083  /*ReplaceChars=*/WhitespaceLength,
1084  /*PreviousPostfix=*/"",
1085  /*CurrentPrefix=*/"",
1086  /*InPPDirective=*/false,
1087  /*Newlines=*/0,
1088  /*Spaces=*/0);
1089  }
1090  // Replace the indent and prefix of the token with the reflow prefix.
1091  unsigned Offset =
1092  Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1093  unsigned WhitespaceLength =
1094  Content[LineIndex].data() - Lines[LineIndex].data();
1095  Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1096  /*ReplaceChars=*/WhitespaceLength,
1097  /*PreviousPostfix=*/"",
1098  /*CurrentPrefix=*/ReflowPrefix,
1099  /*InPPDirective=*/false,
1100  /*Newlines=*/0,
1101  /*Spaces=*/0);
1102 }
1103 
1105  unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1106  // If this is the first line of a token, we need to inform Whitespace Manager
1107  // about it: either adapt the whitespace range preceding it, or mark it as an
1108  // untouchable token.
1109  // This happens for instance here:
1110  // // line 1 \
1111  // // line 2
1112  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1113  // This is the first line for the current token, but no reflow with the
1114  // previous token is necessary. However, we still may need to adjust the
1115  // start column. Note that ContentColumn[LineIndex] is the expected
1116  // content column after a possible update to the prefix, hence the prefix
1117  // length change is included.
1118  unsigned LineColumn =
1119  ContentColumn[LineIndex] -
1120  (Content[LineIndex].data() - Lines[LineIndex].data()) +
1121  (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1122 
1123  // We always want to create a replacement instead of adding an untouchable
1124  // token, even if LineColumn is the same as the original column of the
1125  // token. This is because WhitespaceManager doesn't align trailing
1126  // comments if they are untouchable.
1127  Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1128  /*Newlines=*/1,
1129  /*Spaces=*/LineColumn,
1130  /*StartOfTokenColumn=*/LineColumn,
1131  /*IsAligned=*/true,
1132  /*InPPDirective=*/false);
1133  }
1134  if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1135  // Adjust the prefix if necessary.
1136  const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1137  const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1138  Whitespaces.replaceWhitespaceInToken(
1139  tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1140  /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1141  /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1142  }
1143 }
1144 
1146  if (LastLineTok)
1147  State.NextToken = LastLineTok->Next;
1148 }
1149 
1151  unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1152  // Line comments have the indent as part of the prefix, so we need to
1153  // recompute the start of the line.
1154  StringRef IndentContent = Content[LineIndex];
1155  if (Lines[LineIndex].starts_with("//"))
1156  IndentContent = Lines[LineIndex].substr(2);
1157  // FIXME: Decide whether we want to reflow non-regular indents:
1158  // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1159  // OriginalPrefix[LineIndex-1]. That means we don't reflow
1160  // // text that protrudes
1161  // // into text with different indent
1162  // We do reflow in that case in block comments.
1163  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
1164  mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1165  !switchesFormatting(tokenAt(LineIndex)) &&
1166  OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1167 }
1168 
1169 } // namespace format
1170 } // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
StringRef Text
Definition: Format.cpp:2977
unsigned Offset
Definition: Format.cpp:2978
Various functions to configurably format source code.
LineState State
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:96
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:77
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44
static constexpr StringRef Blanks
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static bool IsBlank(char C)
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:138
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:91
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:152
#define false
Definition: stdbool.h:26
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4582
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4580
bool Other
Put a space in parentheses not covered by preceding options.
Definition: Format.h:4687
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2436
@ LK_Java
Should be used for Java.
Definition: Format.h:3160
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
Definition: Format.h:3174
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2459
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1729
@ BOS_None
Break after operators.
Definition: Format.h:1700
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3192
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:4854
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:549
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
Definition: Format.h:4722
bool isVerilog() const
Definition: Format.h:3184
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4617
bool isJavaScript() const
Definition: Format.h:3183
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:543
unsigned ColumnLimit
The column limit.
Definition: Format.h:2337
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:310
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:369
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:562
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:602
The current state when indenting a unwrapped line.