clang  19.0.0git
DefinitionBlockSeparator.cpp
Go to the documentation of this file.
1 //===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts
11 /// or removes empty lines separating definition blocks like classes, structs,
12 /// functions, enums, and namespaces in between.
13 ///
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/Support/Debug.h"
18 #define DEBUG_TYPE "definition-block-separator"
19 
20 namespace clang {
21 namespace format {
22 std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
23  TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
24  FormatTokenLexer &Tokens) {
26  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
27  tooling::Replacements Result;
28  separateBlocks(AnnotatedLines, Result, Tokens);
29  return {Result, 0};
30 }
31 
32 void DefinitionBlockSeparator::separateBlocks(
34  FormatTokenLexer &Tokens) {
35  const bool IsNeverStyle =
37  const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
38  auto GetBracketLevelChange = [](const FormatToken *Tok) {
39  if (Tok->isOneOf(tok::l_brace, tok::l_paren, tok::l_square))
40  return 1;
41  if (Tok->isOneOf(tok::r_brace, tok::r_paren, tok::r_square))
42  return -1;
43  return 0;
44  };
45  auto LikelyDefinition = [&](const AnnotatedLine *Line,
46  bool ExcludeEnum = false) {
47  if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
48  Line->startsWithNamespace()) {
49  return true;
50  }
51  int BracketLevel = 0;
52  for (const FormatToken *CurrentToken = Line->First; CurrentToken;
53  CurrentToken = CurrentToken->Next) {
54  if (BracketLevel == 0) {
55  if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct,
56  tok::kw_union) ||
57  (Style.isJavaScript() &&
58  CurrentToken->is(ExtraKeywords.kw_function))) {
59  return true;
60  }
61  if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
62  return true;
63  }
64  BracketLevel += GetBracketLevelChange(CurrentToken);
65  }
66  return false;
67  };
68  unsigned NewlineCount =
70  WhitespaceManager Whitespaces(
77  for (unsigned I = 0; I < Lines.size(); ++I) {
78  const auto &CurrentLine = Lines[I];
79  if (CurrentLine->InPPDirective)
80  continue;
81  FormatToken *TargetToken = nullptr;
82  AnnotatedLine *TargetLine;
83  auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
84  AnnotatedLine *OpeningLine = nullptr;
85  const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
86  return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
87  };
88  const auto InsertReplacement = [&](const int NewlineToInsert) {
89  assert(TargetLine);
90  assert(TargetToken);
91 
92  // Do not handle EOF newlines.
93  if (TargetToken->is(tok::eof))
94  return;
95  if (IsAccessSpecifierToken(TargetToken) ||
96  (OpeningLineIndex > 0 &&
97  IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) {
98  return;
99  }
100  if (!TargetLine->Affected)
101  return;
102  Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
103  TargetToken->OriginalColumn,
104  TargetToken->OriginalColumn);
105  };
106  const auto IsPPConditional = [&](const size_t LineIndex) {
107  const auto &Line = Lines[LineIndex];
108  return Line->First->is(tok::hash) && Line->First->Next &&
109  Line->First->Next->isOneOf(tok::pp_if, tok::pp_ifdef, tok::pp_else,
112  tok::pp_endif);
113  };
114  const auto FollowingOtherOpening = [&]() {
115  return OpeningLineIndex == 0 ||
116  Lines[OpeningLineIndex - 1]->Last->opensScope() ||
117  IsPPConditional(OpeningLineIndex - 1);
118  };
119  const auto HasEnumOnLine = [&]() {
120  bool FoundEnumKeyword = false;
121  int BracketLevel = 0;
122  for (const FormatToken *CurrentToken = CurrentLine->First; CurrentToken;
123  CurrentToken = CurrentToken->Next) {
124  if (BracketLevel == 0) {
125  if (CurrentToken->is(tok::kw_enum))
126  FoundEnumKeyword = true;
127  else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
128  return true;
129  }
130  BracketLevel += GetBracketLevelChange(CurrentToken);
131  }
132  return FoundEnumKeyword && I + 1 < Lines.size() &&
133  Lines[I + 1]->First->is(tok::l_brace);
134  };
135 
136  bool IsDefBlock = false;
137  const auto MayPrecedeDefinition = [&](const int Direction = -1) {
138  assert(Direction >= -1);
139  assert(Direction <= 1);
140  const size_t OperateIndex = OpeningLineIndex + Direction;
141  assert(OperateIndex < Lines.size());
142  const auto &OperateLine = Lines[OperateIndex];
143  if (LikelyDefinition(OperateLine))
144  return false;
145 
146  if (const auto *Tok = OperateLine->First;
147  Tok->is(tok::comment) && !isClangFormatOn(Tok->TokenText)) {
148  return true;
149  }
150 
151  // A single line identifier that is not in the last line.
152  if (OperateLine->First->is(tok::identifier) &&
153  OperateLine->First == OperateLine->Last &&
154  OperateIndex + 1 < Lines.size()) {
155  // UnwrappedLineParser's recognition of free-standing macro like
156  // Q_OBJECT may also recognize some uppercased type names that may be
157  // used as return type as that kind of macros, which is a bit hard to
158  // distinguish one from another purely from token patterns. Here, we
159  // try not to add new lines below those identifiers.
160  AnnotatedLine *NextLine = Lines[OperateIndex + 1];
161  if (NextLine->MightBeFunctionDecl &&
162  NextLine->mightBeFunctionDefinition() &&
163  NextLine->First->NewlinesBefore == 1 &&
164  OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro)) {
165  return true;
166  }
167  }
168 
169  if (Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare))
170  return true;
171  return false;
172  };
173 
174  if (HasEnumOnLine() &&
175  !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
176  // We have no scope opening/closing information for enum.
177  IsDefBlock = true;
178  OpeningLineIndex = I;
179  while (OpeningLineIndex > 0 && MayPrecedeDefinition())
180  --OpeningLineIndex;
181  OpeningLine = Lines[OpeningLineIndex];
182  TargetLine = OpeningLine;
183  TargetToken = TargetLine->First;
184  if (!FollowingOtherOpening())
185  InsertReplacement(NewlineCount);
186  else if (IsNeverStyle)
187  InsertReplacement(OpeningLineIndex != 0);
188  TargetLine = CurrentLine;
189  TargetToken = TargetLine->First;
190  while (TargetToken && TargetToken->isNot(tok::r_brace))
191  TargetToken = TargetToken->Next;
192  if (!TargetToken)
193  while (I < Lines.size() && Lines[I]->First->isNot(tok::r_brace))
194  ++I;
195  } else if (CurrentLine->First->closesScope()) {
196  if (OpeningLineIndex > Lines.size())
197  continue;
198  // Handling the case that opening brace has its own line, with checking
199  // whether the last line already had an opening brace to guard against
200  // misrecognition.
201  if (OpeningLineIndex > 0 &&
202  Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
203  Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace)) {
204  --OpeningLineIndex;
205  }
206  OpeningLine = Lines[OpeningLineIndex];
207  // Closing a function definition.
208  if (LikelyDefinition(OpeningLine)) {
209  IsDefBlock = true;
210  while (OpeningLineIndex > 0 && MayPrecedeDefinition())
211  --OpeningLineIndex;
212  OpeningLine = Lines[OpeningLineIndex];
213  TargetLine = OpeningLine;
214  TargetToken = TargetLine->First;
215  if (!FollowingOtherOpening()) {
216  // Avoid duplicated replacement.
217  if (TargetToken->isNot(tok::l_brace))
218  InsertReplacement(NewlineCount);
219  } else if (IsNeverStyle) {
220  InsertReplacement(OpeningLineIndex != 0);
221  }
222  }
223  }
224 
225  // Not the last token.
226  if (IsDefBlock && I + 1 < Lines.size()) {
227  OpeningLineIndex = I + 1;
228  TargetLine = Lines[OpeningLineIndex];
229  TargetToken = TargetLine->First;
230 
231  // No empty line for continuously closing scopes. The token will be
232  // handled in another case if the line following is opening a
233  // definition.
234  if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) {
235  // Check whether current line may precede a definition line.
236  while (OpeningLineIndex + 1 < Lines.size() &&
237  MayPrecedeDefinition(/*Direction=*/0)) {
238  ++OpeningLineIndex;
239  }
240  TargetLine = Lines[OpeningLineIndex];
241  if (!LikelyDefinition(TargetLine)) {
242  OpeningLineIndex = I + 1;
243  TargetLine = Lines[I + 1];
244  TargetToken = TargetLine->First;
245  InsertReplacement(NewlineCount);
246  }
247  } else if (IsNeverStyle) {
248  InsertReplacement(/*NewlineToInsert=*/1);
249  }
250  }
251  }
252  for (const auto &R : Whitespaces.generateReplacements()) {
253  // The add method returns an Error instance which simulates program exit
254  // code through overloading boolean operator, thus false here indicates
255  // success.
256  if (Result.add(R))
257  return;
258  }
259 }
260 } // namespace format
261 } // namespace clang
This file declares DefinitionBlockSeparator, a TokenAnalyzer that inserts or removes empty lines sepa...
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine * > &Lines)
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens) override
SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:38
AffectedRangeManager AffectedRangeMgr
Definition: TokenAnalyzer.h:99
const Environment & Env
Definition: TokenAnalyzer.h:97
Determines extra information about the tokens comprising an UnwrappedLine.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
bool isClangFormatOn(StringRef Comment)
Definition: Format.cpp:4134
The JSON file list parser is used to communicate input to InstallAPI.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:995
@ SDS_Never
Remove any empty line between definition blocks.
Definition: Format.h:3993
@ SDS_Always
Insert an empty line between definition blocks.
Definition: Format.h:3991
@ SDS_Leave
Leave definition blocks as they are.
Definition: Format.h:3989
@ LE_DeriveCRLF
Use \r\n unless the input has more lines ending in \n.
Definition: Format.h:3203
bool isCSharp() const
Definition: Format.h:3181
bool isJavaScript() const
Definition: Format.h:3183
LineEndingStyle LineEnding
Line ending style (\n or \r\n) to use.
Definition: Format.h:3208
SeparateDefinitionStyle SeparateDefinitionBlocks
Specifies the use of empty lines to separate definition blocks, including classes,...
Definition: Format.h:4042
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290