clang  19.0.0git
FormatTokenSource.h
Go to the documentation of this file.
1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the \c FormatTokenSource interface, which provides a token
11 /// stream as well as the ability to manipulate the token stream.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17 
18 #include "UnwrappedLineParser.h"
19 
20 #define DEBUG_TYPE "format-token-source"
21 
22 namespace clang {
23 namespace format {
24 
25 // Navigate a token stream.
26 //
27 // Enables traversal of a token stream, resetting the position in a token
28 // stream, as well as inserting new tokens.
30 public:
31  virtual ~FormatTokenSource() {}
32 
33  // Returns the next token in the token stream.
34  virtual FormatToken *getNextToken() = 0;
35 
36  // Returns the token preceding the token returned by the last call to
37  // getNextToken() in the token stream, or nullptr if no such token exists.
38  //
39  // Must not be called directly at the position directly after insertTokens()
40  // is called.
41  virtual FormatToken *getPreviousToken() = 0;
42 
43  // Returns the token that would be returned by the next call to
44  // getNextToken().
45  virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
46 
47  // Returns whether we are at the end of the file.
48  // This can be different from whether getNextToken() returned an eof token
49  // when the FormatTokenSource is a view on a part of the token stream.
50  virtual bool isEOF() = 0;
51 
52  // Gets the current position in the token stream, to be used by setPosition().
53  //
54  // Note that the value of the position is not meaningful, and specifically
55  // should not be used to get relative token positions.
56  virtual unsigned getPosition() = 0;
57 
58  // Resets the token stream to the state it was in when getPosition() returned
59  // Position, and return the token at that position in the stream.
60  virtual FormatToken *setPosition(unsigned Position) = 0;
61 
62  // Insert the given tokens before the current position.
63  // Returns the first token in \c Tokens.
64  // The next returned token will be the second token in \c Tokens.
65  // Requires the last token in Tokens to be EOF; once the EOF token is reached,
66  // the next token will be the last token returned by getNextToken();
67  //
68  // For example, given the token sequence 'a1 a2':
69  // getNextToken() -> a1
70  // insertTokens('b1 b2') -> b1
71  // getNextToken() -> b2
72  // getNextToken() -> a1
73  // getNextToken() -> a2
75 
76  [[nodiscard]] FormatToken *getNextNonComment() {
77  FormatToken *Tok;
78  do {
79  Tok = getNextToken();
80  assert(Tok);
81  } while (Tok->is(tok::comment));
82  return Tok;
83  }
84 };
85 
87 public:
89  : Tokens(Tokens), Position(-1) {}
90 
91  FormatToken *getNextToken() override {
92  if (Position >= 0 && isEOF()) {
93  LLVM_DEBUG({
94  llvm::dbgs() << "Next ";
95  dbgToken(Position);
96  });
97  return Tokens[Position];
98  }
99  Position = successor(Position);
100  LLVM_DEBUG({
101  llvm::dbgs() << "Next ";
102  dbgToken(Position);
103  });
104  return Tokens[Position];
105  }
106 
108  assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
109  return Position > 0 ? Tokens[Position - 1] : nullptr;
110  }
111 
112  FormatToken *peekNextToken(bool SkipComment = false) override {
113  if (isEOF())
114  return Tokens[Position];
115  int Next = successor(Position);
116  if (SkipComment)
117  while (Tokens[Next]->is(tok::comment))
118  Next = successor(Next);
119  LLVM_DEBUG({
120  llvm::dbgs() << "Peeking ";
121  dbgToken(Next);
122  });
123  return Tokens[Next];
124  }
125 
126  bool isEOF() override {
127  return Position == -1 ? false : Tokens[Position]->is(tok::eof);
128  }
129 
130  unsigned getPosition() override {
131  LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
132  assert(Position >= 0);
133  return Position;
134  }
135 
136  FormatToken *setPosition(unsigned P) override {
137  LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
138  Position = P;
139  return Tokens[Position];
140  }
141 
143  assert(Position != -1);
144  assert((*New.rbegin())->Tok.is(tok::eof));
145  int Next = Tokens.size();
146  Tokens.append(New.begin(), New.end());
147  LLVM_DEBUG({
148  llvm::dbgs() << "Inserting:\n";
149  for (int I = Next, E = Tokens.size(); I != E; ++I)
150  dbgToken(I, " ");
151  llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> "
152  << Position << "\n";
153  });
154  Jumps[Tokens.size() - 1] = Position;
155  Position = Next;
156  LLVM_DEBUG({
157  llvm::dbgs() << "At inserted token ";
158  dbgToken(Position);
159  });
160  return Tokens[Position];
161  }
162 
163  void reset() { Position = -1; }
164 
165 private:
166  int successor(int Current) const {
167  int Next = Current + 1;
168  auto it = Jumps.find(Next);
169  if (it != Jumps.end()) {
170  Next = it->second;
171  assert(!Jumps.contains(Next));
172  }
173  return Next;
174  }
175 
176  void dbgToken(int Position, StringRef Indent = "") {
177  FormatToken *Tok = Tokens[Position];
178  llvm::dbgs() << Indent << "[" << Position
179  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
180  << ", Macro: " << !!Tok->MacroCtx << "\n";
181  }
182 
183  SmallVector<FormatToken *> Tokens;
184  int Position;
185 
186  // Maps from position a to position b, so that when we reach a, the token
187  // stream continues at position b instead.
188  llvm::DenseMap<int, int> Jumps;
189 };
190 
192 public:
194  FormatToken *&ResetToken)
195  : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
196  PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
197  Token(nullptr), PreviousToken(nullptr) {
198  FakeEOF.Tok.startToken();
199  FakeEOF.Tok.setKind(tok::eof);
200  TokenSource = this;
201  Line.Level = 0;
202  Line.InPPDirective = true;
203  // InMacroBody gets set after the `#define x` part.
204  }
205 
206  ~ScopedMacroState() override {
207  TokenSource = PreviousTokenSource;
208  ResetToken = Token;
209  Line.InPPDirective = false;
210  Line.InMacroBody = false;
211  Line.Level = PreviousLineLevel;
212  }
213 
214  FormatToken *getNextToken() override {
215  // The \c UnwrappedLineParser guards against this by never calling
216  // \c getNextToken() after it has encountered the first eof token.
217  assert(!eof());
218  PreviousToken = Token;
219  Token = PreviousTokenSource->getNextToken();
220  if (eof())
221  return &FakeEOF;
222  return Token;
223  }
224 
226  return PreviousTokenSource->getPreviousToken();
227  }
228 
229  FormatToken *peekNextToken(bool SkipComment) override {
230  if (eof())
231  return &FakeEOF;
232  return PreviousTokenSource->peekNextToken(SkipComment);
233  }
234 
235  bool isEOF() override { return PreviousTokenSource->isEOF(); }
236 
237  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
238 
239  FormatToken *setPosition(unsigned Position) override {
240  PreviousToken = nullptr;
241  Token = PreviousTokenSource->setPosition(Position);
242  return Token;
243  }
244 
246  llvm_unreachable("Cannot insert tokens while parsing a macro.");
247  return nullptr;
248  }
249 
250 private:
251  bool eof() {
252  return Token && Token->HasUnescapedNewline &&
253  !continuesLineComment(*Token, PreviousToken,
254  /*MinColumnToken=*/PreviousToken);
255  }
256 
257  FormatToken FakeEOF;
258  UnwrappedLine &Line;
259  FormatTokenSource *&TokenSource;
260  FormatToken *&ResetToken;
261  unsigned PreviousLineLevel;
262  FormatTokenSource *PreviousTokenSource;
263 
264  FormatToken *Token;
265  FormatToken *PreviousToken;
266 };
267 
268 } // namespace format
269 } // namespace clang
270 
271 #undef DEBUG_TYPE
272 
273 #endif
StringRef P
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
void setKind(tok::TokenKind K)
Definition: Token.h:95
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
virtual FormatToken * insertTokens(ArrayRef< FormatToken * > Tokens)=0
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual FormatToken * getNextToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getPreviousToken()=0
virtual unsigned getPosition()=0
FormatToken * setPosition(unsigned P) override
IndexedTokenSource(ArrayRef< FormatToken * > Tokens)
FormatToken * peekNextToken(bool SkipComment=false) override
FormatToken * getNextToken() override
FormatToken * getPreviousToken() override
FormatToken * insertTokens(ArrayRef< FormatToken * > New) override
ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, FormatToken *&ResetToken)
FormatToken * peekNextToken(bool SkipComment) override
FormatToken * getNextToken() override
FormatToken * insertTokens(ArrayRef< FormatToken * > Tokens) override
FormatToken * setPosition(unsigned Position) override
FormatToken * getPreviousToken() override
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1936
The JSON file list parser is used to communicate input to InstallAPI.
raw_ostream & Indent(raw_ostream &Out, const unsigned int Space, bool IsDot)
Definition: JsonSupport.h:21
#define false
Definition: stdbool.h:26
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:290
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:602
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
bool InMacroBody
Whether it is part of a macro body.
unsigned Level
The indent level of the UnwrappedLine.
bool InPPDirective
Whether this UnwrappedLine is part of a preprocessor directive.