clang  19.0.0git
TextDiagnostic.cpp
Go to the documentation of this file.
1 //===--- TextDiagnostic.cpp - Text Diagnostic Pretty-Printing -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "clang/Basic/CharInfo.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Lex/Preprocessor.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Support/ConvertUTF.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/Locale.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <algorithm>
24 #include <optional>
25 
26 using namespace clang;
27 
28 static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN;
29 static const enum raw_ostream::Colors remarkColor =
30  raw_ostream::BLUE;
31 static const enum raw_ostream::Colors fixitColor =
32  raw_ostream::GREEN;
33 static const enum raw_ostream::Colors caretColor =
34  raw_ostream::GREEN;
35 static const enum raw_ostream::Colors warningColor =
36  raw_ostream::MAGENTA;
37 static const enum raw_ostream::Colors templateColor =
38  raw_ostream::CYAN;
39 static const enum raw_ostream::Colors errorColor = raw_ostream::RED;
40 static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
41 // Used for changing only the bold attribute.
42 static const enum raw_ostream::Colors savedColor =
43  raw_ostream::SAVEDCOLOR;
44 
45 // Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
46 // is already taken for 'note'. Green is already used to underline
47 // source ranges. White and black are bad because of the usual
48 // terminal backgrounds. Which leaves us only with TWO options.
49 static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
50 static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
51 static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
52 
53 /// Add highlights to differences in template strings.
54 static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
55  bool &Normal, bool Bold) {
56  while (true) {
57  size_t Pos = Str.find(ToggleHighlight);
58  OS << Str.slice(0, Pos);
59  if (Pos == StringRef::npos)
60  break;
61 
62  Str = Str.substr(Pos + 1);
63  if (Normal)
64  OS.changeColor(templateColor, true);
65  else {
66  OS.resetColor();
67  if (Bold)
68  OS.changeColor(savedColor, true);
69  }
70  Normal = !Normal;
71  }
72 }
73 
74 /// Number of spaces to indent when word-wrapping.
75 const unsigned WordWrapIndentation = 6;
76 
77 static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) {
78  int bytes = 0;
79  while (0<i) {
80  if (SourceLine[--i]=='\t')
81  break;
82  ++bytes;
83  }
84  return bytes;
85 }
86 
87 /// returns a printable representation of first item from input range
88 ///
89 /// This function returns a printable representation of the next item in a line
90 /// of source. If the next byte begins a valid and printable character, that
91 /// character is returned along with 'true'.
92 ///
93 /// Otherwise, if the next byte begins a valid, but unprintable character, a
94 /// printable, escaped representation of the character is returned, along with
95 /// 'false'. Otherwise a printable, escaped representation of the next byte
96 /// is returned along with 'false'.
97 ///
98 /// \note The index is updated to be used with a subsequent call to
99 /// printableTextForNextCharacter.
100 ///
101 /// \param SourceLine The line of source
102 /// \param I Pointer to byte index,
103 /// \param TabStop used to expand tabs
104 /// \return pair(printable text, 'true' iff original text was printable)
105 ///
106 static std::pair<SmallString<16>, bool>
107 printableTextForNextCharacter(StringRef SourceLine, size_t *I,
108  unsigned TabStop) {
109  assert(I && "I must not be null");
110  assert(*I < SourceLine.size() && "must point to a valid index");
111 
112  if (SourceLine[*I] == '\t') {
113  assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop &&
114  "Invalid -ftabstop value");
115  unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I);
116  unsigned NumSpaces = TabStop - (Col % TabStop);
117  assert(0 < NumSpaces && NumSpaces <= TabStop
118  && "Invalid computation of space amt");
119  ++(*I);
120 
121  SmallString<16> ExpandedTab;
122  ExpandedTab.assign(NumSpaces, ' ');
123  return std::make_pair(ExpandedTab, true);
124  }
125 
126  const unsigned char *Begin = SourceLine.bytes_begin() + *I;
127 
128  // Fast path for the common ASCII case.
129  if (*Begin < 0x80 && llvm::sys::locale::isPrint(*Begin)) {
130  ++(*I);
131  return std::make_pair(SmallString<16>(Begin, Begin + 1), true);
132  }
133  unsigned CharSize = llvm::getNumBytesForUTF8(*Begin);
134  const unsigned char *End = Begin + CharSize;
135 
136  // Convert it to UTF32 and check if it's printable.
137  if (End <= SourceLine.bytes_end() && llvm::isLegalUTF8Sequence(Begin, End)) {
138  llvm::UTF32 C;
139  llvm::UTF32 *CPtr = &C;
140 
141  // Begin and end before conversion.
142  unsigned char const *OriginalBegin = Begin;
143  llvm::ConversionResult Res = llvm::ConvertUTF8toUTF32(
144  &Begin, End, &CPtr, CPtr + 1, llvm::strictConversion);
145  (void)Res;
146  assert(Res == llvm::conversionOK);
147  assert(OriginalBegin < Begin);
148  assert((Begin - OriginalBegin) == CharSize);
149 
150  (*I) += (Begin - OriginalBegin);
151 
152  // Valid, multi-byte, printable UTF8 character.
153  if (llvm::sys::locale::isPrint(C))
154  return std::make_pair(SmallString<16>(OriginalBegin, End), true);
155 
156  // Valid but not printable.
157  SmallString<16> Str("<U+>");
158  while (C) {
159  Str.insert(Str.begin() + 3, llvm::hexdigit(C % 16));
160  C /= 16;
161  }
162  while (Str.size() < 8)
163  Str.insert(Str.begin() + 3, llvm::hexdigit(0));
164  return std::make_pair(Str, false);
165  }
166 
167  // Otherwise, not printable since it's not valid UTF8.
168  SmallString<16> ExpandedByte("<XX>");
169  unsigned char Byte = SourceLine[*I];
170  ExpandedByte[1] = llvm::hexdigit(Byte / 16);
171  ExpandedByte[2] = llvm::hexdigit(Byte % 16);
172  ++(*I);
173  return std::make_pair(ExpandedByte, false);
174 }
175 
176 static void expandTabs(std::string &SourceLine, unsigned TabStop) {
177  size_t I = SourceLine.size();
178  while (I > 0) {
179  I--;
180  if (SourceLine[I] != '\t')
181  continue;
182  size_t TmpI = I;
183  auto [Str, Printable] =
184  printableTextForNextCharacter(SourceLine, &TmpI, TabStop);
185  SourceLine.replace(I, 1, Str.c_str());
186  }
187 }
188 
189 /// \p BytesOut:
190 /// A mapping from columns to the byte of the source line that produced the
191 /// character displaying at that column. This is the inverse of \p ColumnsOut.
192 ///
193 /// The last element in the array is the number of bytes in the source string.
194 ///
195 /// example: (given a tabstop of 8)
196 ///
197 /// "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7}
198 ///
199 /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
200 /// display)
201 ///
202 /// \p ColumnsOut:
203 /// A mapping from the bytes
204 /// of the printable representation of the line to the columns those printable
205 /// characters will appear at (numbering the first column as 0).
206 ///
207 /// If a byte 'i' corresponds to multiple columns (e.g. the byte contains a tab
208 /// character) then the array will map that byte to the first column the
209 /// tab appears at and the next value in the map will have been incremented
210 /// more than once.
211 ///
212 /// If a byte is the first in a sequence of bytes that together map to a single
213 /// entity in the output, then the array will map that byte to the appropriate
214 /// column while the subsequent bytes will be -1.
215 ///
216 /// The last element in the array does not correspond to any byte in the input
217 /// and instead is the number of columns needed to display the source
218 ///
219 /// example: (given a tabstop of 8)
220 ///
221 /// "a \t \u3042" -> {0,1,2,8,9,-1,-1,11}
222 ///
223 /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to
224 /// display)
225 static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop,
226  SmallVectorImpl<int> &BytesOut,
227  SmallVectorImpl<int> &ColumnsOut) {
228  assert(BytesOut.empty());
229  assert(ColumnsOut.empty());
230 
231  if (SourceLine.empty()) {
232  BytesOut.resize(1u, 0);
233  ColumnsOut.resize(1u, 0);
234  return;
235  }
236 
237  ColumnsOut.resize(SourceLine.size() + 1, -1);
238 
239  int Columns = 0;
240  size_t I = 0;
241  while (I < SourceLine.size()) {
242  ColumnsOut[I] = Columns;
243  BytesOut.resize(Columns + 1, -1);
244  BytesOut.back() = I;
245  auto [Str, Printable] =
246  printableTextForNextCharacter(SourceLine, &I, TabStop);
247  Columns += llvm::sys::locale::columnWidth(Str);
248  }
249 
250  ColumnsOut.back() = Columns;
251  BytesOut.resize(Columns + 1, -1);
252  BytesOut.back() = I;
253 }
254 
255 namespace {
256 struct SourceColumnMap {
257  SourceColumnMap(StringRef SourceLine, unsigned TabStop)
258  : m_SourceLine(SourceLine) {
259 
260  genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn);
261 
262  assert(m_byteToColumn.size()==SourceLine.size()+1);
263  assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size());
264  assert(m_byteToColumn.size()
265  == static_cast<unsigned>(m_columnToByte.back()+1));
266  assert(static_cast<unsigned>(m_byteToColumn.back()+1)
267  == m_columnToByte.size());
268  }
269  int columns() const { return m_byteToColumn.back(); }
270  int bytes() const { return m_columnToByte.back(); }
271 
272  /// Map a byte to the column which it is at the start of, or return -1
273  /// if it is not at the start of a column (for a UTF-8 trailing byte).
274  int byteToColumn(int n) const {
275  assert(0<=n && n<static_cast<int>(m_byteToColumn.size()));
276  return m_byteToColumn[n];
277  }
278 
279  /// Map a byte to the first column which contains it.
280  int byteToContainingColumn(int N) const {
281  assert(0 <= N && N < static_cast<int>(m_byteToColumn.size()));
282  while (m_byteToColumn[N] == -1)
283  --N;
284  return m_byteToColumn[N];
285  }
286 
287  /// Map a column to the byte which starts the column, or return -1 if
288  /// the column the second or subsequent column of an expanded tab or similar
289  /// multi-column entity.
290  int columnToByte(int n) const {
291  assert(0<=n && n<static_cast<int>(m_columnToByte.size()));
292  return m_columnToByte[n];
293  }
294 
295  /// Map from a byte index to the next byte which starts a column.
296  int startOfNextColumn(int N) const {
297  assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1));
298  while (byteToColumn(++N) == -1) {}
299  return N;
300  }
301 
302  /// Map from a byte index to the previous byte which starts a column.
303  int startOfPreviousColumn(int N) const {
304  assert(0 < N && N < static_cast<int>(m_byteToColumn.size()));
305  while (byteToColumn(--N) == -1) {}
306  return N;
307  }
308 
309  StringRef getSourceLine() const {
310  return m_SourceLine;
311  }
312 
313 private:
314  const std::string m_SourceLine;
315  SmallVector<int,200> m_byteToColumn;
316  SmallVector<int,200> m_columnToByte;
317 };
318 } // end anonymous namespace
319 
320 /// When the source code line we want to print is too long for
321 /// the terminal, select the "interesting" region.
322 static void selectInterestingSourceRegion(std::string &SourceLine,
323  std::string &CaretLine,
324  std::string &FixItInsertionLine,
325  unsigned Columns,
326  const SourceColumnMap &map) {
327  unsigned CaretColumns = CaretLine.size();
328  unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine);
329  unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()),
330  std::max(CaretColumns, FixItColumns));
331  // if the number of columns is less than the desired number we're done
332  if (MaxColumns <= Columns)
333  return;
334 
335  // No special characters are allowed in CaretLine.
336  assert(llvm::none_of(CaretLine, [](char c) { return c < ' ' || '~' < c; }));
337 
338  // Find the slice that we need to display the full caret line
339  // correctly.
340  unsigned CaretStart = 0, CaretEnd = CaretLine.size();
341  for (; CaretStart != CaretEnd; ++CaretStart)
342  if (!isWhitespace(CaretLine[CaretStart]))
343  break;
344 
345  for (; CaretEnd != CaretStart; --CaretEnd)
346  if (!isWhitespace(CaretLine[CaretEnd - 1]))
347  break;
348 
349  // caret has already been inserted into CaretLine so the above whitespace
350  // check is guaranteed to include the caret
351 
352  // If we have a fix-it line, make sure the slice includes all of the
353  // fix-it information.
354  if (!FixItInsertionLine.empty()) {
355  unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size();
356  for (; FixItStart != FixItEnd; ++FixItStart)
357  if (!isWhitespace(FixItInsertionLine[FixItStart]))
358  break;
359 
360  for (; FixItEnd != FixItStart; --FixItEnd)
361  if (!isWhitespace(FixItInsertionLine[FixItEnd - 1]))
362  break;
363 
364  // We can safely use the byte offset FixItStart as the column offset
365  // because the characters up until FixItStart are all ASCII whitespace
366  // characters.
367  unsigned FixItStartCol = FixItStart;
368  unsigned FixItEndCol
369  = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd));
370 
371  CaretStart = std::min(FixItStartCol, CaretStart);
372  CaretEnd = std::max(FixItEndCol, CaretEnd);
373  }
374 
375  // CaretEnd may have been set at the middle of a character
376  // If it's not at a character's first column then advance it past the current
377  // character.
378  while (static_cast<int>(CaretEnd) < map.columns() &&
379  -1 == map.columnToByte(CaretEnd))
380  ++CaretEnd;
381 
382  assert((static_cast<int>(CaretStart) > map.columns() ||
383  -1!=map.columnToByte(CaretStart)) &&
384  "CaretStart must not point to a column in the middle of a source"
385  " line character");
386  assert((static_cast<int>(CaretEnd) > map.columns() ||
387  -1!=map.columnToByte(CaretEnd)) &&
388  "CaretEnd must not point to a column in the middle of a source line"
389  " character");
390 
391  // CaretLine[CaretStart, CaretEnd) contains all of the interesting
392  // parts of the caret line. While this slice is smaller than the
393  // number of columns we have, try to grow the slice to encompass
394  // more context.
395 
396  unsigned SourceStart = map.columnToByte(std::min<unsigned>(CaretStart,
397  map.columns()));
398  unsigned SourceEnd = map.columnToByte(std::min<unsigned>(CaretEnd,
399  map.columns()));
400 
401  unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart
402  - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart));
403 
404  char const *front_ellipse = " ...";
405  char const *front_space = " ";
406  char const *back_ellipse = "...";
407  unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse);
408 
409  unsigned TargetColumns = Columns;
410  // Give us extra room for the ellipses
411  // and any of the caret line that extends past the source
412  if (TargetColumns > ellipses_space+CaretColumnsOutsideSource)
413  TargetColumns -= ellipses_space+CaretColumnsOutsideSource;
414 
415  while (SourceStart>0 || SourceEnd<SourceLine.size()) {
416  bool ExpandedRegion = false;
417 
418  if (SourceStart>0) {
419  unsigned NewStart = map.startOfPreviousColumn(SourceStart);
420 
421  // Skip over any whitespace we see here; we're looking for
422  // another bit of interesting text.
423  // FIXME: Detect non-ASCII whitespace characters too.
424  while (NewStart && isWhitespace(SourceLine[NewStart]))
425  NewStart = map.startOfPreviousColumn(NewStart);
426 
427  // Skip over this bit of "interesting" text.
428  while (NewStart) {
429  unsigned Prev = map.startOfPreviousColumn(NewStart);
430  if (isWhitespace(SourceLine[Prev]))
431  break;
432  NewStart = Prev;
433  }
434 
435  assert(map.byteToColumn(NewStart) != -1);
436  unsigned NewColumns = map.byteToColumn(SourceEnd) -
437  map.byteToColumn(NewStart);
438  if (NewColumns <= TargetColumns) {
439  SourceStart = NewStart;
440  ExpandedRegion = true;
441  }
442  }
443 
444  if (SourceEnd<SourceLine.size()) {
445  unsigned NewEnd = map.startOfNextColumn(SourceEnd);
446 
447  // Skip over any whitespace we see here; we're looking for
448  // another bit of interesting text.
449  // FIXME: Detect non-ASCII whitespace characters too.
450  while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
451  NewEnd = map.startOfNextColumn(NewEnd);
452 
453  // Skip over this bit of "interesting" text.
454  while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd]))
455  NewEnd = map.startOfNextColumn(NewEnd);
456 
457  assert(map.byteToColumn(NewEnd) != -1);
458  unsigned NewColumns = map.byteToColumn(NewEnd) -
459  map.byteToColumn(SourceStart);
460  if (NewColumns <= TargetColumns) {
461  SourceEnd = NewEnd;
462  ExpandedRegion = true;
463  }
464  }
465 
466  if (!ExpandedRegion)
467  break;
468  }
469 
470  CaretStart = map.byteToColumn(SourceStart);
471  CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource;
472 
473  // [CaretStart, CaretEnd) is the slice we want. Update the various
474  // output lines to show only this slice.
475  assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 &&
476  SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1);
477  assert(SourceStart <= SourceEnd);
478  assert(CaretStart <= CaretEnd);
479 
480  unsigned BackColumnsRemoved
481  = map.byteToColumn(SourceLine.size())-map.byteToColumn(SourceEnd);
482  unsigned FrontColumnsRemoved = CaretStart;
483  unsigned ColumnsKept = CaretEnd-CaretStart;
484 
485  // We checked up front that the line needed truncation
486  assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns);
487 
488  // The line needs some truncation, and we'd prefer to keep the front
489  // if possible, so remove the back
490  if (BackColumnsRemoved > strlen(back_ellipse))
491  SourceLine.replace(SourceEnd, std::string::npos, back_ellipse);
492 
493  // If that's enough then we're done
494  if (FrontColumnsRemoved+ColumnsKept <= Columns)
495  return;
496 
497  // Otherwise remove the front as well
498  if (FrontColumnsRemoved > strlen(front_ellipse)) {
499  SourceLine.replace(0, SourceStart, front_ellipse);
500  CaretLine.replace(0, CaretStart, front_space);
501  if (!FixItInsertionLine.empty())
502  FixItInsertionLine.replace(0, CaretStart, front_space);
503  }
504 }
505 
506 /// Skip over whitespace in the string, starting at the given
507 /// index.
508 ///
509 /// \returns The index of the first non-whitespace character that is
510 /// greater than or equal to Idx or, if no such character exists,
511 /// returns the end of the string.
512 static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length) {
513  while (Idx < Length && isWhitespace(Str[Idx]))
514  ++Idx;
515  return Idx;
516 }
517 
518 /// If the given character is the start of some kind of
519 /// balanced punctuation (e.g., quotes or parentheses), return the
520 /// character that will terminate the punctuation.
521 ///
522 /// \returns The ending punctuation character, if any, or the NULL
523 /// character if the input character does not start any punctuation.
524 static inline char findMatchingPunctuation(char c) {
525  switch (c) {
526  case '\'': return '\'';
527  case '`': return '\'';
528  case '"': return '"';
529  case '(': return ')';
530  case '[': return ']';
531  case '{': return '}';
532  default: break;
533  }
534 
535  return 0;
536 }
537 
538 /// Find the end of the word starting at the given offset
539 /// within a string.
540 ///
541 /// \returns the index pointing one character past the end of the
542 /// word.
543 static unsigned findEndOfWord(unsigned Start, StringRef Str,
544  unsigned Length, unsigned Column,
545  unsigned Columns) {
546  assert(Start < Str.size() && "Invalid start position!");
547  unsigned End = Start + 1;
548 
549  // If we are already at the end of the string, take that as the word.
550  if (End == Str.size())
551  return End;
552 
553  // Determine if the start of the string is actually opening
554  // punctuation, e.g., a quote or parentheses.
555  char EndPunct = findMatchingPunctuation(Str[Start]);
556  if (!EndPunct) {
557  // This is a normal word. Just find the first space character.
558  while (End < Length && !isWhitespace(Str[End]))
559  ++End;
560  return End;
561  }
562 
563  // We have the start of a balanced punctuation sequence (quotes,
564  // parentheses, etc.). Determine the full sequence is.
565  SmallString<16> PunctuationEndStack;
566  PunctuationEndStack.push_back(EndPunct);
567  while (End < Length && !PunctuationEndStack.empty()) {
568  if (Str[End] == PunctuationEndStack.back())
569  PunctuationEndStack.pop_back();
570  else if (char SubEndPunct = findMatchingPunctuation(Str[End]))
571  PunctuationEndStack.push_back(SubEndPunct);
572 
573  ++End;
574  }
575 
576  // Find the first space character after the punctuation ended.
577  while (End < Length && !isWhitespace(Str[End]))
578  ++End;
579 
580  unsigned PunctWordLength = End - Start;
581  if (// If the word fits on this line
582  Column + PunctWordLength <= Columns ||
583  // ... or the word is "short enough" to take up the next line
584  // without too much ugly white space
585  PunctWordLength < Columns/3)
586  return End; // Take the whole thing as a single "word".
587 
588  // The whole quoted/parenthesized string is too long to print as a
589  // single "word". Instead, find the "word" that starts just after
590  // the punctuation and use that end-point instead. This will recurse
591  // until it finds something small enough to consider a word.
592  return findEndOfWord(Start + 1, Str, Length, Column + 1, Columns);
593 }
594 
595 /// Print the given string to a stream, word-wrapping it to
596 /// some number of columns in the process.
597 ///
598 /// \param OS the stream to which the word-wrapping string will be
599 /// emitted.
600 /// \param Str the string to word-wrap and output.
601 /// \param Columns the number of columns to word-wrap to.
602 /// \param Column the column number at which the first character of \p
603 /// Str will be printed. This will be non-zero when part of the first
604 /// line has already been printed.
605 /// \param Bold if the current text should be bold
606 /// \returns true if word-wrapping was required, or false if the
607 /// string fit on the first line.
608 static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
609  unsigned Column, bool Bold) {
610  const unsigned Length = std::min(Str.find('\n'), Str.size());
611  bool TextNormal = true;
612 
613  bool Wrapped = false;
614  for (unsigned WordStart = 0, WordEnd; WordStart < Length;
615  WordStart = WordEnd) {
616  // Find the beginning of the next word.
617  WordStart = skipWhitespace(WordStart, Str, Length);
618  if (WordStart == Length)
619  break;
620 
621  // Find the end of this word.
622  WordEnd = findEndOfWord(WordStart, Str, Length, Column, Columns);
623 
624  // Does this word fit on the current line?
625  unsigned WordLength = WordEnd - WordStart;
626  if (Column + WordLength < Columns) {
627  // This word fits on the current line; print it there.
628  if (WordStart) {
629  OS << ' ';
630  Column += 1;
631  }
632  applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength),
633  TextNormal, Bold);
634  Column += WordLength;
635  continue;
636  }
637 
638  // This word does not fit on the current line, so wrap to the next
639  // line.
640  OS << '\n';
641  OS.indent(WordWrapIndentation);
642  applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength),
643  TextNormal, Bold);
644  Column = WordWrapIndentation + WordLength;
645  Wrapped = true;
646  }
647 
648  // Append any remaning text from the message with its existing formatting.
649  applyTemplateHighlighting(OS, Str.substr(Length), TextNormal, Bold);
650 
651  assert(TextNormal && "Text highlighted at end of diagnostic message.");
652 
653  return Wrapped;
654 }
655 
656 TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
657  DiagnosticOptions *DiagOpts,
658  const Preprocessor *PP)
659  : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
660 
662 
665  StringRef Message, ArrayRef<clang::CharSourceRange> Ranges,
666  DiagOrStoredDiag D) {
667  uint64_t StartOfLocationInfo = OS.tell();
668 
669  // Emit the location of this particular diagnostic.
670  if (Loc.isValid())
671  emitDiagnosticLoc(Loc, PLoc, Level, Ranges);
672 
673  if (DiagOpts->ShowColors)
674  OS.resetColor();
675 
676  if (DiagOpts->ShowLevel)
677  printDiagnosticLevel(OS, Level, DiagOpts->ShowColors);
679  /*IsSupplemental*/ Level == DiagnosticsEngine::Note,
680  Message, OS.tell() - StartOfLocationInfo,
681  DiagOpts->MessageLength, DiagOpts->ShowColors);
682 }
683 
684 /*static*/ void
687  bool ShowColors) {
688  if (ShowColors) {
689  // Print diagnostic category in bold and color
690  switch (Level) {
692  llvm_unreachable("Invalid diagnostic type");
693  case DiagnosticsEngine::Note: OS.changeColor(noteColor, true); break;
694  case DiagnosticsEngine::Remark: OS.changeColor(remarkColor, true); break;
695  case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break;
696  case DiagnosticsEngine::Error: OS.changeColor(errorColor, true); break;
697  case DiagnosticsEngine::Fatal: OS.changeColor(fatalColor, true); break;
698  }
699  }
700 
701  switch (Level) {
703  llvm_unreachable("Invalid diagnostic type");
704  case DiagnosticsEngine::Note: OS << "note: "; break;
705  case DiagnosticsEngine::Remark: OS << "remark: "; break;
706  case DiagnosticsEngine::Warning: OS << "warning: "; break;
707  case DiagnosticsEngine::Error: OS << "error: "; break;
708  case DiagnosticsEngine::Fatal: OS << "fatal error: "; break;
709  }
710 
711  if (ShowColors)
712  OS.resetColor();
713 }
714 
715 /*static*/
717  bool IsSupplemental,
718  StringRef Message,
719  unsigned CurrentColumn,
720  unsigned Columns, bool ShowColors) {
721  bool Bold = false;
722  if (ShowColors && !IsSupplemental) {
723  // Print primary diagnostic messages in bold and without color, to visually
724  // indicate the transition from continuation notes and other output.
725  OS.changeColor(savedColor, true);
726  Bold = true;
727  }
728 
729  if (Columns)
730  printWordWrapped(OS, Message, Columns, CurrentColumn, Bold);
731  else {
732  bool Normal = true;
733  applyTemplateHighlighting(OS, Message, Normal, Bold);
734  assert(Normal && "Formatting should have returned to normal");
735  }
736 
737  if (ShowColors)
738  OS.resetColor();
739  OS << '\n';
740 }
741 
742 void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
743 #ifdef _WIN32
744  SmallString<4096> TmpFilename;
745 #endif
746  if (DiagOpts->AbsolutePath) {
747  auto File = SM.getFileManager().getOptionalFileRef(Filename);
748  if (File) {
749  // We want to print a simplified absolute path, i. e. without "dots".
750  //
751  // The hardest part here are the paths like "<part1>/<link>/../<part2>".
752  // On Unix-like systems, we cannot just collapse "<link>/..", because
753  // paths are resolved sequentially, and, thereby, the path
754  // "<part1>/<part2>" may point to a different location. That is why
755  // we use FileManager::getCanonicalName(), which expands all indirections
756  // with llvm::sys::fs::real_path() and caches the result.
757  //
758  // On the other hand, it would be better to preserve as much of the
759  // original path as possible, because that helps a user to recognize it.
760  // real_path() expands all links, which sometimes too much. Luckily,
761  // on Windows we can just use llvm::sys::path::remove_dots(), because,
762  // on that system, both aforementioned paths point to the same place.
763 #ifdef _WIN32
764  TmpFilename = File->getName();
765  llvm::sys::fs::make_absolute(TmpFilename);
766  llvm::sys::path::native(TmpFilename);
767  llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
768  Filename = StringRef(TmpFilename.data(), TmpFilename.size());
769 #else
770  Filename = SM.getFileManager().getCanonicalName(*File);
771 #endif
772  }
773  }
774 
775  OS << Filename;
776 }
777 
778 /// Print out the file/line/column information and include trace.
779 ///
780 /// This method handles the emission of the diagnostic location information.
781 /// This includes extracting as much location information as is present for
782 /// the diagnostic and printing it, as well as any include stack or source
783 /// ranges necessary.
786  ArrayRef<CharSourceRange> Ranges) {
787  if (PLoc.isInvalid()) {
788  // At least print the file name if available:
789  if (FileID FID = Loc.getFileID(); FID.isValid()) {
790  if (OptionalFileEntryRef FE = Loc.getFileEntryRef()) {
791  emitFilename(FE->getName(), Loc.getManager());
792  OS << ": ";
793  }
794  }
795  return;
796  }
797  unsigned LineNo = PLoc.getLine();
798 
799  if (!DiagOpts->ShowLocation)
800  return;
801 
802  if (DiagOpts->ShowColors)
803  OS.changeColor(savedColor, true);
804 
805  emitFilename(PLoc.getFilename(), Loc.getManager());
806  switch (DiagOpts->getFormat()) {
809  if (DiagOpts->ShowLine)
810  OS << ':' << LineNo;
811  break;
812  case DiagnosticOptions::MSVC: OS << '(' << LineNo; break;
813  case DiagnosticOptions::Vi: OS << " +" << LineNo; break;
814  }
815 
816  if (DiagOpts->ShowColumn)
817  // Compute the column number.
818  if (unsigned ColNo = PLoc.getColumn()) {
819  if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) {
820  OS << ',';
821  // Visual Studio 2010 or earlier expects column number to be off by one
822  if (LangOpts.MSCompatibilityVersion &&
824  ColNo--;
825  } else
826  OS << ':';
827  OS << ColNo;
828  }
829  switch (DiagOpts->getFormat()) {
832  case DiagnosticOptions::Vi: OS << ':'; break;
834  // MSVC2013 and before print 'file(4) : error'. MSVC2015 gets rid of the
835  // space and prints 'file(4): error'.
836  OS << ')';
837  if (LangOpts.MSCompatibilityVersion &&
839  OS << ' ';
840  OS << ':';
841  break;
842  }
843 
844  if (DiagOpts->ShowSourceRanges && !Ranges.empty()) {
845  FileID CaretFileID = Loc.getExpansionLoc().getFileID();
846  bool PrintedRange = false;
847  const SourceManager &SM = Loc.getManager();
848 
849  for (const auto &R : Ranges) {
850  // Ignore invalid ranges.
851  if (!R.isValid())
852  continue;
853 
854  SourceLocation B = SM.getExpansionLoc(R.getBegin());
855  CharSourceRange ERange = SM.getExpansionRange(R.getEnd());
856  SourceLocation E = ERange.getEnd();
857 
858  // If the start or end of the range is in another file, just
859  // discard it.
860  if (SM.getFileID(B) != CaretFileID || SM.getFileID(E) != CaretFileID)
861  continue;
862 
863  // Add in the length of the token, so that we cover multi-char
864  // tokens.
865  unsigned TokSize = 0;
866  if (ERange.isTokenRange())
867  TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts);
868 
869  FullSourceLoc BF(B, SM), EF(E, SM);
870  OS << '{'
871  << BF.getLineNumber() << ':' << BF.getColumnNumber() << '-'
872  << EF.getLineNumber() << ':' << (EF.getColumnNumber() + TokSize)
873  << '}';
874  PrintedRange = true;
875  }
876 
877  if (PrintedRange)
878  OS << ':';
879  }
880  OS << ' ';
881 }
882 
884  if (DiagOpts->ShowLocation && PLoc.isValid()) {
885  OS << "In file included from ";
886  emitFilename(PLoc.getFilename(), Loc.getManager());
887  OS << ':' << PLoc.getLine() << ":\n";
888  } else
889  OS << "In included file:\n";
890 }
891 
893  StringRef ModuleName) {
894  if (DiagOpts->ShowLocation && PLoc.isValid())
895  OS << "In module '" << ModuleName << "' imported from "
896  << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
897  else
898  OS << "In module '" << ModuleName << "':\n";
899 }
900 
902  PresumedLoc PLoc,
903  StringRef ModuleName) {
904  if (DiagOpts->ShowLocation && PLoc.isValid())
905  OS << "While building module '" << ModuleName << "' imported from "
906  << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
907  else
908  OS << "While building module '" << ModuleName << "':\n";
909 }
910 
911 /// Find the suitable set of lines to show to include a set of ranges.
912 static std::optional<std::pair<unsigned, unsigned>>
914  const SourceManager &SM) {
915  if (!R.isValid())
916  return std::nullopt;
917 
919  SourceLocation End = R.getEnd();
920  if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID)
921  return std::nullopt;
922 
923  return std::make_pair(SM.getExpansionLineNumber(Begin),
924  SM.getExpansionLineNumber(End));
925 }
926 
927 /// Add as much of range B into range A as possible without exceeding a maximum
928 /// size of MaxRange. Ranges are inclusive.
929 static std::pair<unsigned, unsigned>
930 maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B,
931  unsigned MaxRange) {
932  // If A is already the maximum size, we're done.
933  unsigned Slack = MaxRange - (A.second - A.first + 1);
934  if (Slack == 0)
935  return A;
936 
937  // Easy case: merge succeeds within MaxRange.
938  unsigned Min = std::min(A.first, B.first);
939  unsigned Max = std::max(A.second, B.second);
940  if (Max - Min + 1 <= MaxRange)
941  return {Min, Max};
942 
943  // If we can't reach B from A within MaxRange, there's nothing to do.
944  // Don't add lines to the range that contain nothing interesting.
945  if ((B.first > A.first && B.first - A.first + 1 > MaxRange) ||
946  (B.second < A.second && A.second - B.second + 1 > MaxRange))
947  return A;
948 
949  // Otherwise, expand A towards B to produce a range of size MaxRange. We
950  // attempt to expand by the same amount in both directions if B strictly
951  // contains A.
952 
953  // Expand downwards by up to half the available amount, then upwards as
954  // much as possible, then downwards as much as possible.
955  A.second = std::min(A.second + (Slack + 1) / 2, Max);
956  Slack = MaxRange - (A.second - A.first + 1);
957  A.first = std::max(Min + Slack, A.first) - Slack;
958  A.second = std::min(A.first + MaxRange - 1, Max);
959  return A;
960 }
961 
962 struct LineRange {
963  unsigned LineNo;
964  unsigned StartCol;
965  unsigned EndCol;
966 };
967 
968 /// Highlight \p R (with ~'s) on the current source line.
969 static void highlightRange(const LineRange &R, const SourceColumnMap &Map,
970  std::string &CaretLine) {
971  // Pick the first non-whitespace column.
972  unsigned StartColNo = R.StartCol;
973  while (StartColNo < Map.getSourceLine().size() &&
974  (Map.getSourceLine()[StartColNo] == ' ' ||
975  Map.getSourceLine()[StartColNo] == '\t'))
976  StartColNo = Map.startOfNextColumn(StartColNo);
977 
978  // Pick the last non-whitespace column.
979  unsigned EndColNo =
980  std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size());
981  while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' ||
982  Map.getSourceLine()[EndColNo - 1] == '\t'))
983  EndColNo = Map.startOfPreviousColumn(EndColNo);
984 
985  // If the start/end passed each other, then we are trying to highlight a
986  // range that just exists in whitespace. That most likely means we have
987  // a multi-line highlighting range that covers a blank line.
988  if (StartColNo > EndColNo)
989  return;
990 
991  // Fill the range with ~'s.
992  StartColNo = Map.byteToContainingColumn(StartColNo);
993  EndColNo = Map.byteToContainingColumn(EndColNo);
994 
995  assert(StartColNo <= EndColNo && "Invalid range!");
996  if (CaretLine.size() < EndColNo)
997  CaretLine.resize(EndColNo, ' ');
998  std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~');
999 }
1000 
1001 static std::string buildFixItInsertionLine(FileID FID,
1002  unsigned LineNo,
1003  const SourceColumnMap &map,
1004  ArrayRef<FixItHint> Hints,
1005  const SourceManager &SM,
1006  const DiagnosticOptions *DiagOpts) {
1007  std::string FixItInsertionLine;
1008  if (Hints.empty() || !DiagOpts->ShowFixits)
1009  return FixItInsertionLine;
1010  unsigned PrevHintEndCol = 0;
1011 
1012  for (const auto &H : Hints) {
1013  if (H.CodeToInsert.empty())
1014  continue;
1015 
1016  // We have an insertion hint. Determine whether the inserted
1017  // code contains no newlines and is on the same line as the caret.
1018  std::pair<FileID, unsigned> HintLocInfo =
1019  SM.getDecomposedExpansionLoc(H.RemoveRange.getBegin());
1020  if (FID == HintLocInfo.first &&
1021  LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
1022  StringRef(H.CodeToInsert).find_first_of("\n\r") == StringRef::npos) {
1023  // Insert the new code into the line just below the code
1024  // that the user wrote.
1025  // Note: When modifying this function, be very careful about what is a
1026  // "column" (printed width, platform-dependent) and what is a
1027  // "byte offset" (SourceManager "column").
1028  unsigned HintByteOffset =
1029  SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
1030 
1031  // The hint must start inside the source or right at the end
1032  assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1);
1033  unsigned HintCol = map.byteToContainingColumn(HintByteOffset);
1034 
1035  // If we inserted a long previous hint, push this one forwards, and add
1036  // an extra space to show that this is not part of the previous
1037  // completion. This is sort of the best we can do when two hints appear
1038  // to overlap.
1039  //
1040  // Note that if this hint is located immediately after the previous
1041  // hint, no space will be added, since the location is more important.
1042  if (HintCol < PrevHintEndCol)
1043  HintCol = PrevHintEndCol + 1;
1044 
1045  // This should NOT use HintByteOffset, because the source might have
1046  // Unicode characters in earlier columns.
1047  unsigned NewFixItLineSize = FixItInsertionLine.size() +
1048  (HintCol - PrevHintEndCol) +
1049  H.CodeToInsert.size();
1050  if (NewFixItLineSize > FixItInsertionLine.size())
1051  FixItInsertionLine.resize(NewFixItLineSize, ' ');
1052 
1053  std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(),
1054  FixItInsertionLine.end() - H.CodeToInsert.size());
1055 
1056  PrevHintEndCol = HintCol + llvm::sys::locale::columnWidth(H.CodeToInsert);
1057  }
1058  }
1059 
1060  expandTabs(FixItInsertionLine, DiagOpts->TabStop);
1061 
1062  return FixItInsertionLine;
1063 }
1064 
1065 static unsigned getNumDisplayWidth(unsigned N) {
1066  unsigned L = 1u, M = 10u;
1067  while (M <= N && ++L != std::numeric_limits<unsigned>::digits10 + 1)
1068  M *= 10u;
1069 
1070  return L;
1071 }
1072 
1073 /// Filter out invalid ranges, ranges that don't fit into the window of
1074 /// source lines we will print, and ranges from other files.
1075 ///
1076 /// For the remaining ranges, convert them to simple LineRange structs,
1077 /// which only cover one line at a time.
1080  const SourceManager &SM,
1081  const std::pair<unsigned, unsigned> &Lines, FileID FID,
1082  const LangOptions &LangOpts) {
1083  SmallVector<LineRange> LineRanges;
1084 
1085  for (const CharSourceRange &R : Ranges) {
1086  if (R.isInvalid())
1087  continue;
1088  SourceLocation Begin = R.getBegin();
1089  SourceLocation End = R.getEnd();
1090 
1091  unsigned StartLineNo = SM.getExpansionLineNumber(Begin);
1092  if (StartLineNo > Lines.second || SM.getFileID(Begin) != FID)
1093  continue;
1094 
1095  unsigned EndLineNo = SM.getExpansionLineNumber(End);
1096  if (EndLineNo < Lines.first || SM.getFileID(End) != FID)
1097  continue;
1098 
1099  unsigned StartColumn = SM.getExpansionColumnNumber(Begin);
1100  unsigned EndColumn = SM.getExpansionColumnNumber(End);
1101  if (R.isTokenRange())
1102  EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts);
1103 
1104  // Only a single line.
1105  if (StartLineNo == EndLineNo) {
1106  LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1});
1107  continue;
1108  }
1109 
1110  // Start line.
1111  LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u});
1112 
1113  // Middle lines.
1114  for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S)
1115  LineRanges.push_back({S, 0, ~0u});
1116 
1117  // End line.
1118  LineRanges.push_back({EndLineNo, 0, EndColumn - 1});
1119  }
1120 
1121  return LineRanges;
1122 }
1123 
1124 /// Creates syntax highlighting information in form of StyleRanges.
1125 ///
1126 /// The returned unique ptr has always exactly size
1127 /// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there
1128 /// corresponds to syntax highlighting information in one line. In each line,
1129 /// the StyleRanges are non-overlapping and sorted from start to end of the
1130 /// line.
1131 static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
1132 highlightLines(StringRef FileData, unsigned StartLineNumber,
1133  unsigned EndLineNumber, const Preprocessor *PP,
1134  const LangOptions &LangOpts, bool ShowColors, FileID FID,
1135  const SourceManager &SM) {
1136  assert(StartLineNumber <= EndLineNumber);
1137  auto SnippetRanges =
1138  std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>(
1139  EndLineNumber - StartLineNumber + 1);
1140 
1141  if (!PP || !ShowColors)
1142  return SnippetRanges;
1143 
1144  // Might cause emission of another diagnostic.
1146  return SnippetRanges;
1147 
1148  auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData);
1149  Lexer L{FID, *Buff, SM, LangOpts};
1150  L.SetKeepWhitespaceMode(true);
1151 
1152  const char *FirstLineStart =
1153  FileData.data() +
1154  SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second;
1155  if (const char *CheckPoint = PP->getCheckPoint(FID, FirstLineStart)) {
1156  assert(CheckPoint >= Buff->getBufferStart() &&
1157  CheckPoint <= Buff->getBufferEnd());
1158  assert(CheckPoint <= FirstLineStart);
1159  size_t Offset = CheckPoint - Buff->getBufferStart();
1160  L.seek(Offset, /*IsAtStartOfLine=*/false);
1161  }
1162 
1163  // Classify the given token and append it to the given vector.
1164  auto appendStyle =
1165  [PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec,
1166  const Token &T, unsigned Start, unsigned Length) -> void {
1167  if (T.is(tok::raw_identifier)) {
1168  StringRef RawIdent = T.getRawIdentifier();
1169  // Special case true/false/nullptr/... literals, since they will otherwise
1170  // be treated as keywords.
1171  // FIXME: It would be good to have a programmatic way of getting this
1172  // list.
1173  if (llvm::StringSwitch<bool>(RawIdent)
1174  .Case("true", true)
1175  .Case("false", true)
1176  .Case("nullptr", true)
1177  .Case("__func__", true)
1178  .Case("__objc_yes__", true)
1179  .Case("__objc_no__", true)
1180  .Case("__null", true)
1181  .Case("__FUNCDNAME__", true)
1182  .Case("__FUNCSIG__", true)
1183  .Case("__FUNCTION__", true)
1184  .Case("__FUNCSIG__", true)
1185  .Default(false)) {
1186  Vec.emplace_back(Start, Start + Length, LiteralColor);
1187  } else {
1188  const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
1189  assert(II);
1190  if (II->isKeyword(LangOpts))
1191  Vec.emplace_back(Start, Start + Length, KeywordColor);
1192  }
1193  } else if (tok::isLiteral(T.getKind())) {
1194  Vec.emplace_back(Start, Start + Length, LiteralColor);
1195  } else {
1196  assert(T.is(tok::comment));
1197  Vec.emplace_back(Start, Start + Length, CommentColor);
1198  }
1199  };
1200 
1201  bool Stop = false;
1202  while (!Stop) {
1203  Token T;
1204  Stop = L.LexFromRawLexer(T);
1205  if (T.is(tok::unknown))
1206  continue;
1207 
1208  // We are only interested in identifiers, literals and comments.
1209  if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
1210  !tok::isLiteral(T.getKind()))
1211  continue;
1212 
1213  bool Invalid = false;
1214  unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
1215  if (Invalid || TokenEndLine < StartLineNumber)
1216  continue;
1217 
1218  assert(TokenEndLine >= StartLineNumber);
1219 
1220  unsigned TokenStartLine =
1221  SM.getSpellingLineNumber(T.getLocation(), &Invalid);
1222  if (Invalid)
1223  continue;
1224  // If this happens, we're done.
1225  if (TokenStartLine > EndLineNumber)
1226  break;
1227 
1228  unsigned StartCol =
1229  SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
1230  if (Invalid)
1231  continue;
1232 
1233  // Simple tokens.
1234  if (TokenStartLine == TokenEndLine) {
1236  SnippetRanges[TokenStartLine - StartLineNumber];
1237  appendStyle(LineRanges, T, StartCol, T.getLength());
1238  continue;
1239  }
1240  assert((TokenEndLine - TokenStartLine) >= 1);
1241 
1242  // For tokens that span multiple lines (think multiline comments), we
1243  // divide them into multiple StyleRanges.
1244  unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
1245  if (Invalid)
1246  continue;
1247 
1248  std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
1249 
1250  unsigned L = TokenStartLine;
1251  unsigned LineLength = 0;
1252  for (unsigned I = 0; I <= Spelling.size(); ++I) {
1253  // This line is done.
1254  if (I == Spelling.size() || isVerticalWhitespace(Spelling[I])) {
1256  SnippetRanges[L - StartLineNumber];
1257 
1258  if (L >= StartLineNumber) {
1259  if (L == TokenStartLine) // First line
1260  appendStyle(LineRanges, T, StartCol, LineLength);
1261  else if (L == TokenEndLine) // Last line
1262  appendStyle(LineRanges, T, 0, EndCol);
1263  else
1264  appendStyle(LineRanges, T, 0, LineLength);
1265  }
1266 
1267  ++L;
1268  if (L > EndLineNumber)
1269  break;
1270  LineLength = 0;
1271  continue;
1272  }
1273  ++LineLength;
1274  }
1275  }
1276 
1277  return SnippetRanges;
1278 }
1279 
1280 /// Emit a code snippet and caret line.
1281 ///
1282 /// This routine emits a single line's code snippet and caret line..
1283 ///
1284 /// \param Loc The location for the caret.
1285 /// \param Ranges The underlined ranges for this code snippet.
1286 /// \param Hints The FixIt hints active for this diagnostic.
1287 void TextDiagnostic::emitSnippetAndCaret(
1290  assert(Loc.isValid() && "must have a valid source location here");
1291  assert(Loc.isFileID() && "must have a file location here");
1292 
1293  // If caret diagnostics are enabled and we have location, we want to
1294  // emit the caret. However, we only do this if the location moved
1295  // from the last diagnostic, if the last diagnostic was a note that
1296  // was part of a different warning or error diagnostic, or if the
1297  // diagnostic has ranges. We don't want to emit the same caret
1298  // multiple times if one loc has multiple diagnostics.
1299  if (!DiagOpts->ShowCarets)
1300  return;
1301  if (Loc == LastLoc && Ranges.empty() && Hints.empty() &&
1303  return;
1304 
1305  FileID FID = Loc.getFileID();
1306  const SourceManager &SM = Loc.getManager();
1307 
1308  // Get information about the buffer it points into.
1309  bool Invalid = false;
1310  StringRef BufData = Loc.getBufferData(&Invalid);
1311  if (Invalid)
1312  return;
1313  const char *BufStart = BufData.data();
1314  const char *BufEnd = BufStart + BufData.size();
1315 
1316  unsigned CaretLineNo = Loc.getLineNumber();
1317  unsigned CaretColNo = Loc.getColumnNumber();
1318 
1319  // Arbitrarily stop showing snippets when the line is too long.
1320  static const size_t MaxLineLengthToPrint = 4096;
1321  if (CaretColNo > MaxLineLengthToPrint)
1322  return;
1323 
1324  // Find the set of lines to include.
1325  const unsigned MaxLines = DiagOpts->SnippetLineLimit;
1326  std::pair<unsigned, unsigned> Lines = {CaretLineNo, CaretLineNo};
1327  unsigned DisplayLineNo = Loc.getPresumedLoc().getLine();
1328  for (const auto &I : Ranges) {
1329  if (auto OptionalRange = findLinesForRange(I, FID, SM))
1330  Lines = maybeAddRange(Lines, *OptionalRange, MaxLines);
1331 
1332  DisplayLineNo =
1333  std::min(DisplayLineNo, SM.getPresumedLineNumber(I.getBegin()));
1334  }
1335 
1336  // Our line numbers look like:
1337  // " [number] | "
1338  // Where [number] is MaxLineNoDisplayWidth columns
1339  // and the full thing is therefore MaxLineNoDisplayWidth + 4 columns.
1340  unsigned MaxLineNoDisplayWidth =
1341  DiagOpts->ShowLineNumbers
1342  ? std::max(4u, getNumDisplayWidth(DisplayLineNo + MaxLines))
1343  : 0;
1344  auto indentForLineNumbers = [&] {
1345  if (MaxLineNoDisplayWidth > 0)
1346  OS.indent(MaxLineNoDisplayWidth + 2) << "| ";
1347  };
1348 
1349  // Prepare source highlighting information for the lines we're about to
1350  // emit, starting from the first line.
1351  std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles =
1352  highlightLines(BufData, Lines.first, Lines.second, PP, LangOpts,
1353  DiagOpts->ShowColors, FID, SM);
1354 
1355  SmallVector<LineRange> LineRanges =
1356  prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts);
1357 
1358  for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1;
1359  ++LineNo, ++DisplayLineNo) {
1360  // Rewind from the current position to the start of the line.
1361  const char *LineStart =
1362  BufStart +
1363  SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second;
1364  if (LineStart == BufEnd)
1365  break;
1366 
1367  // Compute the line end.
1368  const char *LineEnd = LineStart;
1369  while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd)
1370  ++LineEnd;
1371 
1372  // Arbitrarily stop showing snippets when the line is too long.
1373  // FIXME: Don't print any lines in this case.
1374  if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint)
1375  return;
1376 
1377  // Copy the line of code into an std::string for ease of manipulation.
1378  std::string SourceLine(LineStart, LineEnd);
1379  // Remove trailing null bytes.
1380  while (!SourceLine.empty() && SourceLine.back() == '\0' &&
1381  (LineNo != CaretLineNo || SourceLine.size() > CaretColNo))
1382  SourceLine.pop_back();
1383 
1384  // Build the byte to column map.
1385  const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
1386 
1387  std::string CaretLine;
1388  // Highlight all of the characters covered by Ranges with ~ characters.
1389  for (const auto &LR : LineRanges) {
1390  if (LR.LineNo == LineNo)
1391  highlightRange(LR, sourceColMap, CaretLine);
1392  }
1393 
1394  // Next, insert the caret itself.
1395  if (CaretLineNo == LineNo) {
1396  size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1);
1397  CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' ');
1398  CaretLine[Col] = '^';
1399  }
1400 
1401  std::string FixItInsertionLine = buildFixItInsertionLine(
1402  FID, LineNo, sourceColMap, Hints, SM, DiagOpts.get());
1403 
1404  // If the source line is too long for our terminal, select only the
1405  // "interesting" source region within that line.
1406  unsigned Columns = DiagOpts->MessageLength;
1407  if (Columns)
1408  selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine,
1409  Columns, sourceColMap);
1410 
1411  // If we are in -fdiagnostics-print-source-range-info mode, we are trying
1412  // to produce easily machine parsable output. Add a space before the
1413  // source line and the caret to make it trivial to tell the main diagnostic
1414  // line from what the user is intended to see.
1415  if (DiagOpts->ShowSourceRanges && !SourceLine.empty()) {
1416  SourceLine = ' ' + SourceLine;
1417  CaretLine = ' ' + CaretLine;
1418  }
1419 
1420  // Emit what we have computed.
1421  emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo,
1422  SourceStyles[LineNo - Lines.first]);
1423 
1424  if (!CaretLine.empty()) {
1425  indentForLineNumbers();
1426  if (DiagOpts->ShowColors)
1427  OS.changeColor(caretColor, true);
1428  OS << CaretLine << '\n';
1429  if (DiagOpts->ShowColors)
1430  OS.resetColor();
1431  }
1432 
1433  if (!FixItInsertionLine.empty()) {
1434  indentForLineNumbers();
1435  if (DiagOpts->ShowColors)
1436  // Print fixit line in color
1437  OS.changeColor(fixitColor, false);
1438  if (DiagOpts->ShowSourceRanges)
1439  OS << ' ';
1440  OS << FixItInsertionLine << '\n';
1441  if (DiagOpts->ShowColors)
1442  OS.resetColor();
1443  }
1444  }
1445 
1446  // Print out any parseable fixit information requested by the options.
1447  emitParseableFixits(Hints, SM);
1448 }
1449 
1450 void TextDiagnostic::emitSnippet(StringRef SourceLine,
1451  unsigned MaxLineNoDisplayWidth,
1452  unsigned LineNo, unsigned DisplayLineNo,
1453  ArrayRef<StyleRange> Styles) {
1454  // Emit line number.
1455  if (MaxLineNoDisplayWidth > 0) {
1456  unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo);
1457  OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1)
1458  << DisplayLineNo << " | ";
1459  }
1460 
1461  // Print the source line one character at a time.
1462  bool PrintReversed = false;
1463  std::optional<llvm::raw_ostream::Colors> CurrentColor;
1464  size_t I = 0;
1465  while (I < SourceLine.size()) {
1466  auto [Str, WasPrintable] =
1467  printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop);
1468 
1469  // Toggle inverted colors on or off for this character.
1470  if (DiagOpts->ShowColors) {
1471  if (WasPrintable == PrintReversed) {
1472  PrintReversed = !PrintReversed;
1473  if (PrintReversed)
1474  OS.reverseColor();
1475  else {
1476  OS.resetColor();
1477  CurrentColor = std::nullopt;
1478  }
1479  }
1480 
1481  // Apply syntax highlighting information.
1482  const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) {
1483  return (R.Start < I && R.End >= I);
1484  });
1485 
1486  if (CharStyle != Styles.end()) {
1487  if (!CurrentColor ||
1488  (CurrentColor && *CurrentColor != CharStyle->Color)) {
1489  OS.changeColor(CharStyle->Color, false);
1490  CurrentColor = CharStyle->Color;
1491  }
1492  } else if (CurrentColor) {
1493  OS.resetColor();
1494  CurrentColor = std::nullopt;
1495  }
1496  }
1497 
1498  OS << Str;
1499  }
1500 
1501  if (DiagOpts->ShowColors)
1502  OS.resetColor();
1503 
1504  OS << '\n';
1505 }
1506 
1507 void TextDiagnostic::emitParseableFixits(ArrayRef<FixItHint> Hints,
1508  const SourceManager &SM) {
1509  if (!DiagOpts->ShowParseableFixits)
1510  return;
1511 
1512  // We follow FixItRewriter's example in not (yet) handling
1513  // fix-its in macros.
1514  for (const auto &H : Hints) {
1515  if (H.RemoveRange.isInvalid() || H.RemoveRange.getBegin().isMacroID() ||
1516  H.RemoveRange.getEnd().isMacroID())
1517  return;
1518  }
1519 
1520  for (const auto &H : Hints) {
1521  SourceLocation BLoc = H.RemoveRange.getBegin();
1522  SourceLocation ELoc = H.RemoveRange.getEnd();
1523 
1524  std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(BLoc);
1525  std::pair<FileID, unsigned> EInfo = SM.getDecomposedLoc(ELoc);
1526 
1527  // Adjust for token ranges.
1528  if (H.RemoveRange.isTokenRange())
1529  EInfo.second += Lexer::MeasureTokenLength(ELoc, SM, LangOpts);
1530 
1531  // We specifically do not do word-wrapping or tab-expansion here,
1532  // because this is supposed to be easy to parse.
1533  PresumedLoc PLoc = SM.getPresumedLoc(BLoc);
1534  if (PLoc.isInvalid())
1535  break;
1536 
1537  OS << "fix-it:\"";
1538  OS.write_escaped(PLoc.getFilename());
1539  OS << "\":{" << SM.getLineNumber(BInfo.first, BInfo.second)
1540  << ':' << SM.getColumnNumber(BInfo.first, BInfo.second)
1541  << '-' << SM.getLineNumber(EInfo.first, EInfo.second)
1542  << ':' << SM.getColumnNumber(EInfo.first, EInfo.second)
1543  << "}:\"";
1544  OS.write_escaped(H.CodeToInsert);
1545  OS << "\"\n";
1546  }
1547 }
static StringRef bytes(const std::vector< T, Allocator > &v)
Definition: ASTWriter.cpp:127
#define SM(sm)
Definition: Cuda.cpp:83
Defines the clang::FileManager interface and associated types.
unsigned Offset
Definition: Format.cpp:2978
StringRef Filename
Definition: Format.cpp:2976
bool ShowColors
Definition: Logger.cpp:29
Defines the clang::Preprocessor interface.
SourceLocation Loc
Definition: SemaObjC.cpp:755
Defines the SourceManager interface.
static enum raw_ostream::Colors caretColor
static std::pair< unsigned, unsigned > maybeAddRange(std::pair< unsigned, unsigned > A, std::pair< unsigned, unsigned > B, unsigned MaxRange)
Add as much of range B into range A as possible without exceeding a maximum size of MaxRange.
static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i)
static constexpr raw_ostream::Colors LiteralColor
static SmallVector< LineRange > prepareAndFilterRanges(const SmallVectorImpl< CharSourceRange > &Ranges, const SourceManager &SM, const std::pair< unsigned, unsigned > &Lines, FileID FID, const LangOptions &LangOpts)
Filter out invalid ranges, ranges that don't fit into the window of source lines we will print,...
static std::pair< SmallString< 16 >, bool > printableTextForNextCharacter(StringRef SourceLine, size_t *I, unsigned TabStop)
returns a printable representation of first item from input range
static enum raw_ostream::Colors fixitColor
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, bool &Normal, bool Bold)
Add highlights to differences in template strings.
static enum raw_ostream::Colors savedColor
static enum raw_ostream::Colors errorColor
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, unsigned Column, bool Bold)
Print the given string to a stream, word-wrapping it to some number of columns in the process.
static unsigned findEndOfWord(unsigned Start, StringRef Str, unsigned Length, unsigned Column, unsigned Columns)
Find the end of the word starting at the given offset within a string.
static enum raw_ostream::Colors remarkColor
static std::optional< std::pair< unsigned, unsigned > > findLinesForRange(const CharSourceRange &R, FileID FID, const SourceManager &SM)
Find the suitable set of lines to show to include a set of ranges.
static enum raw_ostream::Colors fatalColor
static constexpr raw_ostream::Colors KeywordColor
static void selectInterestingSourceRegion(std::string &SourceLine, std::string &CaretLine, std::string &FixItInsertionLine, unsigned Columns, const SourceColumnMap &map)
When the source code line we want to print is too long for the terminal, select the "interesting" reg...
static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, const SourceColumnMap &map, ArrayRef< FixItHint > Hints, const SourceManager &SM, const DiagnosticOptions *DiagOpts)
static enum raw_ostream::Colors warningColor
static char findMatchingPunctuation(char c)
If the given character is the start of some kind of balanced punctuation (e.g., quotes or parentheses...
static enum raw_ostream::Colors noteColor
static void expandTabs(std::string &SourceLine, unsigned TabStop)
static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop, SmallVectorImpl< int > &BytesOut, SmallVectorImpl< int > &ColumnsOut)
BytesOut: A mapping from columns to the byte of the source line that produced the character displayin...
static void highlightRange(const LineRange &R, const SourceColumnMap &Map, std::string &CaretLine)
Highlight R (with ~'s) on the current source line.
const unsigned WordWrapIndentation
Number of spaces to indent when word-wrapping.
static unsigned getNumDisplayWidth(unsigned N)
static enum raw_ostream::Colors templateColor
static std::unique_ptr< llvm::SmallVector< TextDiagnostic::StyleRange >[]> highlightLines(StringRef FileData, unsigned StartLineNumber, unsigned EndLineNumber, const Preprocessor *PP, const LangOptions &LangOpts, bool ShowColors, FileID FID, const SourceManager &SM)
Creates syntax highlighting information in form of StyleRanges.
SourceLocation End
SourceLocation Begin
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
__device__ __2f16 float c
Represents a character-granular source range.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
SourceLocation getEnd() const
SourceLocation getBegin() const
Options for controlling the compiler diagnostics engine.
Class to encapsulate the logic for formatting a diagnostic message.
const LangOptions & LangOpts
SourceLocation LastLoc
The location of the previous diagnostic if known.
DiagnosticsEngine::Level LastLevel
The level of the last diagnostic emitted.
IntrusiveRefCntPtr< DiagnosticOptions > DiagOpts
Level
The level of the diagnostic, after it has been through mapping.
Definition: Diagnostic.h:196
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
A SourceLocation and its associated SourceManager.
unsigned getColumnNumber(bool *Invalid=nullptr) const
unsigned getLineNumber(bool *Invalid=nullptr) const
One of these records is kept for each identifier that is lexed.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
IdentifierInfoLookup * getExternalIdentifierLookup() const
Retrieve the external identifier lookup object, if any.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:482
bool isCompatibleWithMSVC(MSVCMajorVersion MajorVersion) const
Definition: LangOptions.h:666
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:78
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Definition: Lexer.cpp:452
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
Definition: Lexer.cpp:499
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
IdentifierTable & getIdentifierTable()
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
bool isValid() const
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
bool isInvalid() const
Return true if this object is invalid or uninitialized.
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
This class handles loading and caching of source files into memory.
static void printDiagnosticMessage(raw_ostream &OS, bool IsSupplemental, StringRef Message, unsigned CurrentColumn, unsigned Columns, bool ShowColors)
Pretty-print a diagnostic message to a raw_ostream.
void emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc, StringRef ModuleName) override
void emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) override
static void printDiagnosticLevel(raw_ostream &OS, DiagnosticsEngine::Level Level, bool ShowColors)
Print the diagonstic level to a raw_ostream.
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, DiagnosticOptions *DiagOpts, const Preprocessor *PP=nullptr)
void emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, ArrayRef< CharSourceRange > Ranges) override
Print out the file/line/column information and include trace.
void emitDiagnosticMessage(FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, StringRef Message, ArrayRef< CharSourceRange > Ranges, DiagOrStoredDiag D) override
void emitBuildingModuleLocation(FullSourceLoc Loc, PresumedLoc PLoc, StringRef ModuleName) override
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
The JSON file list parser is used to communicate input to InstallAPI.
static const TerminalColor CommentColor
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
Definition: CharInfo.h:99
llvm::PointerUnion< const Diagnostic *, const StoredDiagnostic * > DiagOrStoredDiag
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
Definition: CharInfo.h:108
const char ToggleHighlight
Special character that the diagnostic printer will use to toggle the bold attribute.
Definition: Diagnostic.h:1852
const FunctionProtoType * T
unsigned long uint64_t
unsigned EndCol
unsigned LineNo
unsigned StartCol