clang  19.0.0git
DependencyScanningFilesystem.h
Go to the documentation of this file.
1 //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
11 
12 #include "clang/Basic/LLVM.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/Allocator.h"
17 #include "llvm/Support/ErrorOr.h"
18 #include "llvm/Support/VirtualFileSystem.h"
19 #include <mutex>
20 #include <optional>
21 
22 namespace clang {
23 namespace tooling {
24 namespace dependencies {
25 
28 
29 /// Contents and directive tokens of a cached file entry. Single instance can
30 /// be shared between multiple entries.
32  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
33  : Original(std::move(Contents)), DepDirectives(nullptr) {}
34 
35  /// Owning storage for the original contents.
36  std::unique_ptr<llvm::MemoryBuffer> Original;
37 
38  /// The mutex that must be locked before mutating directive tokens.
39  std::mutex ValueLock;
41  /// Accessor to the directive tokens that's atomic to avoid data races.
42  /// \p CachedFileContents has ownership of the pointer.
43  std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
44 
45  ~CachedFileContents() { delete DepDirectives.load(); }
46 };
47 
48 /// An in-memory representation of a file system entity that is of interest to
49 /// the dependency scanning filesystem.
50 ///
51 /// It represents one of the following:
52 /// - opened file with contents and a stat value,
53 /// - opened file with contents, directive tokens and a stat value,
54 /// - directory entry with its stat value,
55 /// - filesystem error.
56 ///
57 /// Single instance of this class can be shared across different filenames (e.g.
58 /// a regular file and a symlink). For this reason the status filename is empty
59 /// and is only materialized by \c EntryRef that knows the requested filename.
61 public:
62  /// Creates an entry without contents: either a filesystem error or
63  /// a directory with stat value.
64  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
65  : MaybeStat(std::move(Stat)), Contents(nullptr) {
66  clearStatName();
67  }
68 
69  /// Creates an entry representing a file with contents.
70  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
71  CachedFileContents *Contents)
72  : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
73  clearStatName();
74  }
75 
76  /// \returns True if the entry is a filesystem error.
77  bool isError() const { return !MaybeStat; }
78 
79  /// \returns True if the current entry represents a directory.
80  bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
81 
82  /// \returns Original contents of the file.
83  StringRef getOriginalContents() const {
84  assert(!isError() && "error");
85  assert(!MaybeStat->isDirectory() && "not a file");
86  assert(Contents && "contents not initialized");
87  return Contents->Original->getBuffer();
88  }
89 
90  /// \returns The scanned preprocessor directive tokens of the file that are
91  /// used to speed up preprocessing, if available.
92  std::optional<ArrayRef<dependency_directives_scan::Directive>>
94  assert(!isError() && "error");
95  assert(!isDirectory() && "not a file");
96  assert(Contents && "contents not initialized");
97  if (auto *Directives = Contents->DepDirectives.load()) {
98  if (Directives->has_value())
100  }
101  return std::nullopt;
102  }
103 
104  /// \returns The error.
105  std::error_code getError() const { return MaybeStat.getError(); }
106 
107  /// \returns The entry status with empty filename.
108  llvm::vfs::Status getStatus() const {
109  assert(!isError() && "error");
110  assert(MaybeStat->getName().empty() && "stat name must be empty");
111  return *MaybeStat;
112  }
113 
114  /// \returns The unique ID of the entry.
115  llvm::sys::fs::UniqueID getUniqueID() const {
116  assert(!isError() && "error");
117  return MaybeStat->getUniqueID();
118  }
119 
120  /// \returns The data structure holding both contents and directive tokens.
122  assert(!isError() && "error");
123  assert(!isDirectory() && "not a file");
124  return Contents;
125  }
126 
127 private:
128  void clearStatName() {
129  if (MaybeStat)
130  MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
131  }
132 
133  /// Either the filesystem error or status of the entry.
134  /// The filename is empty and only materialized by \c EntryRef.
135  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
136 
137  /// Non-owning pointer to the file contents.
138  ///
139  /// We're using pointer here to keep the size of this class small. Instances
140  /// representing directories and filesystem errors don't hold any contents
141  /// anyway.
142  CachedFileContents *Contents;
143 };
144 
145 using CachedRealPath = llvm::ErrorOr<std::string>;
146 
147 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
148 /// underlying real file system, and the scanned preprocessor directives of
149 /// files.
150 ///
151 /// It is sharded based on the hash of the key to reduce the lock contention for
152 /// the worker threads.
154 public:
155  struct CacheShard {
156  /// The mutex that needs to be locked before mutation of any member.
157  mutable std::mutex CacheLock;
158 
159  /// Map from filenames to cached entries and real paths.
160  llvm::StringMap<
161  std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
162  llvm::BumpPtrAllocator>
164 
165  /// Map from unique IDs to cached entries.
166  llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
168 
169  /// The backing storage for cached entries.
170  llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
171 
172  /// The backing storage for cached contents.
173  llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
174 
175  /// The backing storage for cached real paths.
176  llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
177 
178  /// Returns entry associated with the filename or nullptr if none is found.
179  const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
180 
181  /// Returns entry associated with the unique ID or nullptr if none is found.
182  const CachedFileSystemEntry *
183  findEntryByUID(llvm::sys::fs::UniqueID UID) const;
184 
185  /// Returns entry associated with the filename if there is some. Otherwise,
186  /// constructs new one with the given status, associates it with the
187  /// filename and returns the result.
188  const CachedFileSystemEntry &
190  llvm::ErrorOr<llvm::vfs::Status> Stat);
191 
192  /// Returns entry associated with the unique ID if there is some. Otherwise,
193  /// constructs new one with the given status and contents, associates it
194  /// with the unique ID and returns the result.
195  const CachedFileSystemEntry &
196  getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
197  std::unique_ptr<llvm::MemoryBuffer> Contents);
198 
199  /// Returns entry associated with the filename if there is some. Otherwise,
200  /// associates the given entry with the filename and returns it.
201  const CachedFileSystemEntry &
203  const CachedFileSystemEntry &Entry);
204 
205  /// Returns the real path associated with the filename or nullptr if none is
206  /// found.
207  const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
208 
209  /// Returns the real path associated with the filename if there is some.
210  /// Otherwise, constructs new one with the given one, associates it with the
211  /// filename and returns the result.
212  const CachedRealPath &
214  llvm::ErrorOr<StringRef> RealPath);
215  };
216 
218 
219  /// Returns shard for the given key.
220  CacheShard &getShardForFilename(StringRef Filename) const;
221  CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
222 
223 private:
224  std::unique_ptr<CacheShard[]> CacheShards;
225  unsigned NumShards;
226 };
227 
228 /// This class is a local cache, that caches the 'stat' and 'open' calls to the
229 /// underlying real file system.
231  llvm::StringMap<
232  std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
233  llvm::BumpPtrAllocator>
234  Cache;
235 
236 public:
237  /// Returns entry associated with the filename or nullptr if none is found.
239  assert(llvm::sys::path::is_absolute_gnu(Filename));
240  auto It = Cache.find(Filename);
241  return It == Cache.end() ? nullptr : It->getValue().first;
242  }
243 
244  /// Associates the given entry with the filename and returns the given entry
245  /// pointer (for convenience).
246  const CachedFileSystemEntry &
248  const CachedFileSystemEntry &Entry) {
249  assert(llvm::sys::path::is_absolute_gnu(Filename));
250  auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
251  auto &[CachedEntry, CachedRealPath] = It->getValue();
252  if (!Inserted) {
253  // The file is already present in the local cache. If we got here, it only
254  // contains the real path. Let's make sure the entry is populated too.
255  assert((!CachedEntry && CachedRealPath) && "entry already present");
256  CachedEntry = &Entry;
257  }
258  return *CachedEntry;
259  }
260 
261  /// Returns real path associated with the filename or nullptr if none is
262  /// found.
264  assert(llvm::sys::path::is_absolute_gnu(Filename));
265  auto It = Cache.find(Filename);
266  return It == Cache.end() ? nullptr : It->getValue().second;
267  }
268 
269  /// Associates the given real path with the filename and returns the given
270  /// entry pointer (for convenience).
271  const CachedRealPath &
273  const CachedRealPath &RealPath) {
274  assert(llvm::sys::path::is_absolute_gnu(Filename));
275  auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
276  auto &[CachedEntry, CachedRealPath] = It->getValue();
277  if (!Inserted) {
278  // The file is already present in the local cache. If we got here, it only
279  // contains the entry. Let's make sure the real path is populated too.
280  assert((!CachedRealPath && CachedEntry) && "real path already present");
281  CachedRealPath = &RealPath;
282  }
283  return *CachedRealPath;
284  }
285 };
286 
287 /// Reference to a CachedFileSystemEntry.
288 /// If the underlying entry is an opened file, this wrapper returns the file
289 /// contents and the scanned preprocessor directives.
290 class EntryRef {
291  /// The filename used to access this entry.
292  std::string Filename;
293 
294  /// The underlying cached entry.
295  const CachedFileSystemEntry &Entry;
296 
298 
299 public:
300  EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
301  : Filename(Name), Entry(Entry) {}
302 
303  llvm::vfs::Status getStatus() const {
304  llvm::vfs::Status Stat = Entry.getStatus();
305  if (!Stat.isDirectory())
306  Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
307  return llvm::vfs::Status::copyWithNewName(Stat, Filename);
308  }
309 
310  bool isError() const { return Entry.isError(); }
311  bool isDirectory() const { return Entry.isDirectory(); }
312 
313  /// If the cached entry represents an error, promotes it into `ErrorOr`.
314  llvm::ErrorOr<EntryRef> unwrapError() const {
315  if (isError())
316  return Entry.getError();
317  return *this;
318  }
319 
320  StringRef getContents() const { return Entry.getOriginalContents(); }
321 
322  std::optional<ArrayRef<dependency_directives_scan::Directive>>
324  return Entry.getDirectiveTokens();
325  }
326 };
327 
328 /// A virtual file system optimized for the dependency discovery.
329 ///
330 /// It is primarily designed to work with source files whose contents was
331 /// preprocessed to remove any tokens that are unlikely to affect the dependency
332 /// computation.
333 ///
334 /// This is not a thread safe VFS. A single instance is meant to be used only in
335 /// one thread. Multiple instances are allowed to service multiple threads
336 /// running in parallel.
338  : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
339  llvm::vfs::ProxyFileSystem> {
340 public:
341  static const char ID;
342 
346 
347  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
348  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
349  openFileForRead(const Twine &Path) override;
350 
351  std::error_code getRealPath(const Twine &Path,
352  SmallVectorImpl<char> &Output) override;
353 
354  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
355 
356  /// Returns entry for the given filename.
357  ///
358  /// Attempts to use the local and shared caches first, then falls back to
359  /// using the underlying filesystem.
360  llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
361 
362  /// Ensure the directive tokens are populated for this file entry.
363  ///
364  /// Returns true if the directive tokens are populated for this file entry,
365  /// false if not (i.e. this entry is not a file or its scan fails).
367 
368  /// Check whether \p Path exists. By default checks cached result of \c
369  /// status(), and falls back on FS if unable to do so.
370  bool exists(const Twine &Path) override;
371 
372 private:
373  /// For a filename that's not yet associated with any entry in the caches,
374  /// uses the underlying filesystem to either look up the entry based in the
375  /// shared cache indexed by unique ID, or creates new entry from scratch.
376  /// \p FilenameForLookup will always be an absolute path, and different than
377  /// \p OriginalFilename if \p OriginalFilename is relative.
378  llvm::ErrorOr<const CachedFileSystemEntry &>
379  computeAndStoreResult(StringRef OriginalFilename,
380  StringRef FilenameForLookup);
381 
382  /// Represents a filesystem entry that has been stat-ed (and potentially read)
383  /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
384  struct TentativeEntry {
385  llvm::vfs::Status Status;
386  std::unique_ptr<llvm::MemoryBuffer> Contents;
387 
388  TentativeEntry(llvm::vfs::Status Status,
389  std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
390  : Status(std::move(Status)), Contents(std::move(Contents)) {}
391  };
392 
393  /// Reads file at the given path. Enforces consistency between the file size
394  /// in status and size of read contents.
395  llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
396 
397  /// Returns entry associated with the unique ID of the given tentative entry
398  /// if there is some in the shared cache. Otherwise, constructs new one,
399  /// associates it with the unique ID and returns the result.
400  const CachedFileSystemEntry &
401  getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
402 
403  /// Returns entry associated with the filename or nullptr if none is found.
404  ///
405  /// Returns entry from local cache if there is some. Otherwise, if the entry
406  /// is found in the shared cache, writes it through the local cache and
407  /// returns it. Otherwise returns nullptr.
408  const CachedFileSystemEntry *
409  findEntryByFilenameWithWriteThrough(StringRef Filename);
410 
411  /// Returns entry associated with the unique ID in the shared cache or nullptr
412  /// if none is found.
413  const CachedFileSystemEntry *
414  findSharedEntryByUID(llvm::vfs::Status Stat) const {
415  return SharedCache.getShardForUID(Stat.getUniqueID())
416  .findEntryByUID(Stat.getUniqueID());
417  }
418 
419  /// Associates the given entry with the filename in the local cache and
420  /// returns it.
421  const CachedFileSystemEntry &
422  insertLocalEntryForFilename(StringRef Filename,
423  const CachedFileSystemEntry &Entry) {
424  return LocalCache.insertEntryForFilename(Filename, Entry);
425  }
426 
427  /// Returns entry associated with the filename in the shared cache if there is
428  /// some. Otherwise, constructs new one with the given error code, associates
429  /// it with the filename and returns the result.
430  const CachedFileSystemEntry &
431  getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
432  return SharedCache.getShardForFilename(Filename)
434  }
435 
436  /// Returns entry associated with the filename in the shared cache if there is
437  /// some. Otherwise, associates the given entry with the filename and returns
438  /// it.
439  const CachedFileSystemEntry &
440  getOrInsertSharedEntryForFilename(StringRef Filename,
441  const CachedFileSystemEntry &Entry) {
442  return SharedCache.getShardForFilename(Filename)
444  }
445 
446  void printImpl(raw_ostream &OS, PrintType Type,
447  unsigned IndentLevel) const override {
448  printIndent(OS, IndentLevel);
449  OS << "DependencyScanningFilesystem\n";
450  getUnderlyingFS().print(OS, Type, IndentLevel + 1);
451  }
452 
453  /// The global cache shared between worker threads.
454  DependencyScanningFilesystemSharedCache &SharedCache;
455  /// The local cache is used by the worker thread to cache file system queries
456  /// locally instead of querying the global cache every time.
457  DependencyScanningFilesystemLocalCache LocalCache;
458 
459  /// The working directory to use for making relative paths absolute before
460  /// using them for cache lookups.
461  llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
462 
463  void updateWorkingDirForCacheLookup();
464 
465  llvm::ErrorOr<StringRef>
466  tryGetFilenameForLookup(StringRef OriginalFilename,
467  llvm::SmallVectorImpl<char> &PathBuf) const;
468 };
469 
470 } // end namespace dependencies
471 } // end namespace tooling
472 } // end namespace clang
473 
474 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
StringRef Filename
Definition: Format.cpp:2976
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
TypePropertyCache< Private > Cache
Definition: Type.cpp:4438
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat)
Creates an entry without contents: either a filesystem error or a directory with stat value.
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
CachedFileSystemEntry(llvm::ErrorOr< llvm::vfs::Status > Stat, CachedFileContents *Contents)
Creates an entry representing a file with contents.
This class is a local cache, that caches the 'stat' and 'open' calls to the underlying real file syst...
const CachedFileSystemEntry & insertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Associates the given entry with the filename and returns the given entry pointer (for convenience).
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
const CachedRealPath * findRealPathByFilename(StringRef Filename) const
Returns real path associated with the filename or nullptr if none is found.
const CachedRealPath & insertRealPathForFilename(StringRef Filename, const CachedRealPath &RealPath)
Associates the given real path with the filename and returns the given entry pointer (for convenience...
This class is a shared cache, that caches the 'stat' and 'open' calls to the underlying real file sys...
CacheShard & getShardForFilename(StringRef Filename) const
Returns shard for the given key.
A virtual file system optimized for the dependency discovery.
std::error_code getRealPath(const Twine &Path, SmallVectorImpl< char > &Output) override
bool ensureDirectiveTokensArePopulated(EntryRef Entry)
Ensure the directive tokens are populated for this file entry.
bool exists(const Twine &Path) override
Check whether Path exists.
llvm::ErrorOr< EntryRef > getOrCreateFileSystemEntry(StringRef Filename)
Returns entry for the given filename.
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
DependencyScanningWorkerFilesystem(DependencyScanningFilesystemSharedCache &SharedCache, IntrusiveRefCntPtr< llvm::vfs::FileSystem > FS)
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
Reference to a CachedFileSystemEntry.
EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
llvm::ErrorOr< EntryRef > unwrapError() const
If the cached entry represents an error, promotes it into ErrorOr.
std::optional< ArrayRef< dependency_directives_scan::Directive > > getDirectiveTokens() const
llvm::ErrorOr< std::string > CachedRealPath
The JSON file list parser is used to communicate input to InstallAPI.
Definition: Format.h:5433
Contents and directive tokens of a cached file entry.
std::mutex ValueLock
The mutex that must be locked before mutating directive tokens.
std::atomic< const std::optional< DependencyDirectivesTy > * > DepDirectives
Accessor to the directive tokens that's atomic to avoid data races.
CachedFileContents(std::unique_ptr< llvm::MemoryBuffer > Contents)
std::unique_ptr< llvm::MemoryBuffer > Original
Owning storage for the original contents.
SmallVector< dependency_directives_scan::Token, 10 > DepDirectiveTokens
const CachedFileSystemEntry & getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr< llvm::MemoryBuffer > Contents)
Returns entry associated with the unique ID if there is some.
llvm::SpecificBumpPtrAllocator< CachedFileSystemEntry > EntryStorage
The backing storage for cached entries.
std::mutex CacheLock
The mutex that needs to be locked before mutation of any member.
llvm::DenseMap< llvm::sys::fs::UniqueID, const CachedFileSystemEntry * > EntriesByUID
Map from unique IDs to cached entries.
const CachedFileSystemEntry * findEntryByUID(llvm::sys::fs::UniqueID UID) const
Returns entry associated with the unique ID or nullptr if none is found.
llvm::StringMap< std::pair< const CachedFileSystemEntry *, const CachedRealPath * >, llvm::BumpPtrAllocator > CacheByFilename
Map from filenames to cached entries and real paths.
const CachedRealPath * findRealPathByFilename(StringRef Filename) const
Returns the real path associated with the filename or nullptr if none is found.
const CachedFileSystemEntry & getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry)
Returns entry associated with the filename if there is some.
const CachedFileSystemEntry * findEntryByFilename(StringRef Filename) const
Returns entry associated with the filename or nullptr if none is found.
llvm::SpecificBumpPtrAllocator< CachedRealPath > RealPathStorage
The backing storage for cached real paths.
llvm::SpecificBumpPtrAllocator< CachedFileContents > ContentsStorage
The backing storage for cached contents.
const CachedFileSystemEntry & getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr< llvm::vfs::Status > Stat)
Returns entry associated with the filename if there is some.
const CachedRealPath & getOrEmplaceRealPathForFilename(StringRef Filename, llvm::ErrorOr< StringRef > RealPath)
Returns the real path associated with the filename if there is some.