clang  19.0.0git
TokenAnnotator.h
Go to the documentation of this file.
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 
20 namespace clang {
21 namespace format {
22 
23 enum LineType {
26  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28  LT_ObjCProperty, // An @property line.
34 };
35 
36 enum ScopeType {
37  // Contained in class declaration/definition.
39  // Contained within function definition.
41  // Contained within other scope block (loop, if/else, etc).
43 };
44 
46 public:
48  : First(Line.Tokens.front().Tok), Level(Line.Level),
49  PPLevel(Line.PPLevel),
60  assert(!Line.Tokens.empty());
61 
62  // Calculate Next and Previous for all tokens. Note that we must overwrite
63  // Next and Previous for every token, as previous formatting runs might have
64  // left them in a different state.
65  First->Previous = nullptr;
66  FormatToken *Current = First;
67  addChildren(Line.Tokens.front(), Current);
68  for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
69  if (Node.Tok->MacroParent)
70  ContainsMacroCall = true;
71  Current->Next = Node.Tok;
72  Node.Tok->Previous = Current;
73  Current = Current->Next;
74  addChildren(Node, Current);
75  // FIXME: if we add children, previous will point to the token before
76  // the children; changing this requires significant changes across
77  // clang-format.
78  }
79  Last = Current;
80  Last->Next = nullptr;
81  }
82 
83  void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
84  Current->Children.clear();
85  for (const auto &Child : Node.Children) {
86  Children.push_back(new AnnotatedLine(Child));
87  if (Children.back()->ContainsMacroCall)
88  ContainsMacroCall = true;
89  Current->Children.push_back(Children.back());
90  }
91  }
92 
93  size_t size() const {
94  size_t Size = 1;
95  for (const auto *Child : Children)
96  Size += Child->size();
97  return Size;
98  }
99 
101  for (AnnotatedLine *Child : Children)
102  delete Child;
103  FormatToken *Current = First;
104  while (Current) {
105  Current->Children.clear();
106  Current->Role.reset();
107  Current = Current->Next;
108  }
109  }
110 
111  bool isComment() const {
112  return First && First->is(tok::comment) && !First->getNextNonComment();
113  }
114 
115  /// \c true if this line starts with the given tokens in order, ignoring
116  /// comments.
117  template <typename... Ts> bool startsWith(Ts... Tokens) const {
118  return First && First->startsSequence(Tokens...);
119  }
120 
121  /// \c true if this line ends with the given tokens in reversed order,
122  /// ignoring comments.
123  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
124  /// this line is like "... T3 T2 T1".
125  template <typename... Ts> bool endsWith(Ts... Tokens) const {
126  return Last && Last->endsSequence(Tokens...);
127  }
128 
129  /// \c true if this line looks like a function definition instead of a
130  /// function declaration. Asserts MightBeFunctionDecl.
132  assert(MightBeFunctionDecl);
133  // Try to determine if the end of a stream of tokens is either the
134  // Definition or the Declaration for a function. It does this by looking for
135  // the ';' in foo(); and using that it ends with a ; to know this is the
136  // Definition, however the line could end with
137  // foo(); /* comment */
138  // or
139  // foo(); // comment
140  // or
141  // foo() // comment
142  // endsWith() ignores the comment.
143  return !endsWith(tok::semi);
144  }
145 
146  /// \c true if this line starts a namespace definition.
147  bool startsWithNamespace() const {
148  return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
149  startsWith(tok::kw_inline, tok::kw_namespace) ||
150  startsWith(tok::kw_export, tok::kw_namespace);
151  }
152 
154  assert(First);
155  return First->is(tok::comment) ? First->getNextNonComment() : First;
156  }
157 
159  assert(Last);
160  return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
161  }
162 
165 
167 
169  unsigned Level;
170  unsigned PPLevel;
179 
180  /// \c True if this line contains a macro call for which an expansion exists.
181  bool ContainsMacroCall = false;
182 
183  /// \c True if this line should be formatted, i.e. intersects directly or
184  /// indirectly with one of the input ranges.
185  bool Affected;
186 
187  /// \c True if the leading empty lines of this line intersect with one of the
188  /// input ranges.
190 
191  /// \c True if one of this line's children intersects with an input range.
193 
194  /// \c True if breaking after last attribute group in function return type.
196 
197  /// \c True if this line should be indented by ContinuationIndent in addition
198  /// to the normal indention level.
200 
202 
203 private:
204  // Disallow copying.
205  AnnotatedLine(const AnnotatedLine &) = delete;
206  void operator=(const AnnotatedLine &) = delete;
207 };
208 
209 /// Determines extra information about the tokens comprising an
210 /// \c UnwrappedLine.
212 public:
213  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
214  : Style(Style), IsCpp(Style.isCpp()),
215  LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
216  assert(IsCpp == LangOpts.CXXOperatorNames);
217  }
218 
219  /// Adapts the indent levels of comment lines to the indent of the
220  /// subsequent line.
221  // FIXME: Can/should this be done in the UnwrappedLineParser?
223 
224  void annotate(AnnotatedLine &Line);
226 
227 private:
228  /// Calculate the penalty for splitting before \c Tok.
229  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
230  bool InFunctionDecl) const;
231 
232  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
233 
234  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
235  const FormatToken &Right) const;
236 
237  bool spaceRequiredBefore(const AnnotatedLine &Line,
238  const FormatToken &Right) const;
239 
240  bool mustBreakBefore(const AnnotatedLine &Line,
241  const FormatToken &Right) const;
242 
243  bool canBreakBefore(const AnnotatedLine &Line,
244  const FormatToken &Right) const;
245 
246  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
247 
248  void printDebugInfo(const AnnotatedLine &Line) const;
249 
250  void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
251 
252  void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
253 
254  FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
255  FormatToken *CurrentToken,
256  unsigned Depth) const;
258  getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
259 
260  FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
261  const FormatToken &PointerOrReference) const;
262 
263  const FormatStyle &Style;
264 
265  bool IsCpp;
266  LangOptions LangOpts;
267 
268  const AdditionalKeywords &Keywords;
269 
270  SmallVector<ScopeType> Scopes;
271 };
272 
273 } // end namespace format
274 } // end namespace clang
275 
276 #endif
int Depth
Definition: ASTDiff.cpp:190
DynTypedNode Node
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:482
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current)
bool ReturnTypeWrapped
True if breaking after last attribute group in function return type.
bool LeadingEmptyLinesAffected
True if the leading empty lines of this line intersect with one of the input ranges.
bool Affected
True if this line should be formatted, i.e.
AnnotatedLine(const UnwrappedLine &Line)
bool ContainsMacroCall
True if this line contains a macro call for which an expansion exists.
bool mightBeFunctionDefinition() const
true if this line looks like a function definition instead of a function declaration.
bool ChildrenAffected
True if one of this line's children intersects with an input range.
SmallVector< AnnotatedLine *, 0 > Children
FormatToken * getFirstNonComment() const
bool startsWithNamespace() const
true if this line starts a namespace definition.
bool IsContinuation
True if this line should be indented by ContinuationIndent in addition to the normal indention level.
bool endsWith(Ts... Tokens) const
true if this line ends with the given tokens in reversed order, ignoring comments.
bool startsWith(Ts... Tokens) const
true if this line starts with the given tokens in order, ignoring comments.
FormatToken * getLastNonComment() const
Determines extra information about the tokens comprising an UnwrappedLine.
void calculateFormattingInformation(AnnotatedLine &Line) const
void annotate(AnnotatedLine &Line)
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
void setCommentLineLevels(SmallVectorImpl< AnnotatedLine * > &Lines) const
Adapts the indent levels of comment lines to the indent of the subsequent line.
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3841
@ LT_CommentAbovePPDirective
@ LT_ArrayOfStructInitializer
The JSON file list parser is used to communicate input to InstallAPI.
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:995
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
PointerAlignmentStyle
The &, && and * alignment style.
Definition: Format.h:3557
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers,...
Definition: FormatToken.h:639
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:562
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:803
FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:811
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:602
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:559
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:650
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...