clang  19.0.0git
ModuleDepCollector.h
Go to the documentation of this file.
1 //===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11 
12 #include "clang/Basic/LLVM.h"
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Lex/HeaderSearch.h"
17 #include "clang/Lex/PPCallbacks.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/Hashing.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <optional>
25 #include <string>
26 #include <unordered_map>
27 #include <variant>
28 
29 namespace clang {
30 namespace tooling {
31 namespace dependencies {
32 
33 class DependencyActionController;
34 class DependencyConsumer;
35 
36 /// Modular dependency that has already been built prior to the dependency scan.
38  std::string ModuleName;
39  std::string PCMFile;
40  std::string ModuleMapFile;
41 
42  explicit PrebuiltModuleDep(const Module *M)
43  : ModuleName(M->getTopLevelModuleName()),
44  PCMFile(M->getASTFile()->getName()),
45  ModuleMapFile(M->PresumedModuleMapFile) {}
46 };
47 
48 /// This is used to identify a specific module.
49 struct ModuleID {
50  /// The name of the module. This may include `:` for C++20 module partitions,
51  /// or a header-name for C++20 header units.
52  std::string ModuleName;
53 
54  /// The context hash of a module represents the compiler options that affect
55  /// the resulting command-line invocation.
56  ///
57  /// Modules with the same name and ContextHash but different invocations could
58  /// cause non-deterministic build results.
59  ///
60  /// Modules with the same name but a different \c ContextHash should be
61  /// treated as separate modules for the purpose of a build.
62  std::string ContextHash;
63 
64  bool operator==(const ModuleID &Other) const {
65  return std::tie(ModuleName, ContextHash) ==
66  std::tie(Other.ModuleName, Other.ContextHash);
67  }
68 
69  bool operator<(const ModuleID& Other) const {
70  return std::tie(ModuleName, ContextHash) <
71  std::tie(Other.ModuleName, Other.ContextHash);
72  }
73 };
74 
75 /// P1689ModuleInfo - Represents the needed information of standard C++20
76 /// modules for P1689 format.
78  /// The name of the module. This may include `:` for partitions.
79  std::string ModuleName;
80 
81  /// Optional. The source path to the module.
82  std::string SourcePath;
83 
84  /// If this module is a standard c++ interface unit.
86 
87  enum class ModuleType {
89  // To be supported
90  // AngleHeaderUnit,
91  // QuoteHeaderUnit
92  };
94 };
95 
96 /// An output from a module compilation, such as the path of the module file.
97 enum class ModuleOutputKind {
98  /// The module file (.pcm). Required.
99  ModuleFile,
100  /// The path of the dependency file (.d), if any.
102  /// The null-separated list of names to use as the targets in the dependency
103  /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
105  /// The path of the serialized diagnostic file (.dia), if any.
107 };
108 
109 struct ModuleDeps {
110  /// The identifier of the module.
112 
113  /// Whether this is a "system" module.
114  bool IsSystem;
115 
116  /// The path to the modulemap file which defines this module.
117  ///
118  /// This can be used to explicitly build this module. This file will
119  /// additionally appear in \c FileDeps as a dependency.
120  std::string ClangModuleMapFile;
121 
122  /// A collection of absolute paths to files that this module directly depends
123  /// on, not including transitive dependencies.
124  llvm::StringSet<> FileDeps;
125 
126  /// A collection of absolute paths to module map files that this module needs
127  /// to know about. The ordering is significant.
128  std::vector<std::string> ModuleMapFileDeps;
129 
130  /// A collection of prebuilt modular dependencies this module directly depends
131  /// on, not including transitive dependencies.
132  std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
133 
134  /// A list of module identifiers this module directly depends on, not
135  /// including transitive dependencies.
136  ///
137  /// This may include modules with a different context hash when it can be
138  /// determined that the differences are benign for this compilation.
139  std::vector<ModuleID> ClangModuleDeps;
140 
141  /// Get (or compute) the compiler invocation that can be used to build this
142  /// module. Does not include argv[0].
143  const std::vector<std::string> &getBuildArguments();
144 
145 private:
146  friend class ModuleDepCollectorPP;
147 
148  std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
149  BuildInfo;
150 };
151 
152 using PrebuiltModuleVFSMapT = llvm::StringMap<llvm::StringSet<>>;
153 
154 class ModuleDepCollector;
155 
156 /// Callback that records textual includes and direct modular includes/imports
157 /// during preprocessing. At the end of the main file, it also collects
158 /// transitive modular dependencies and passes everything to the
159 /// \c DependencyConsumer of the parent \c ModuleDepCollector.
160 class ModuleDepCollectorPP final : public PPCallbacks {
161 public:
163 
166  SourceLocation Loc) override;
167  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
168  StringRef FileName, bool IsAngled,
169  CharSourceRange FilenameRange,
170  OptionalFileEntryRef File, StringRef SearchPath,
171  StringRef RelativePath, const Module *SuggestedModule,
172  bool ModuleImported,
174  void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
175  const Module *Imported) override;
176 
177  void EndOfMainFile() override;
178 
179 private:
180  /// The parent dependency collector.
181  ModuleDepCollector &MDC;
182 
183  void handleImport(const Module *Imported);
184 
185  /// Adds direct modular dependencies that have already been built to the
186  /// ModuleDeps instance.
187  void
188  addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
189  llvm::DenseSet<const Module *> &SeenSubmodules);
190  void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
191  llvm::DenseSet<const Module *> &SeenSubmodules);
192 
193  /// Traverses the previously collected direct modular dependencies to discover
194  /// transitive modular dependencies and fills the parent \c ModuleDepCollector
195  /// with both.
196  /// Returns the ID or nothing if the dependency is spurious and is ignored.
197  std::optional<ModuleID> handleTopLevelModule(const Module *M);
198  void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
199  llvm::DenseSet<const Module *> &AddedModules);
200  void addModuleDep(const Module *M, ModuleDeps &MD,
201  llvm::DenseSet<const Module *> &AddedModules);
202 
203  /// Traverses the affecting modules and updates \c MD with references to the
204  /// parent \c ModuleDepCollector info.
205  void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
206  llvm::DenseSet<const Module *> &AddedModules);
207  void addAffectingClangModule(const Module *M, ModuleDeps &MD,
208  llvm::DenseSet<const Module *> &AddedModules);
209 };
210 
211 /// Collects modular and non-modular dependencies of the main file by attaching
212 /// \c ModuleDepCollectorPP to the preprocessor.
214 public:
215  ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
216  CompilerInstance &ScanInstance, DependencyConsumer &C,
217  DependencyActionController &Controller,
218  CompilerInvocation OriginalCI,
219  PrebuiltModuleVFSMapT PrebuiltModuleVFSMap,
220  ScanningOptimizations OptimizeArgs, bool EagerLoadModules,
221  bool IsStdModuleP1689Format);
222 
223  void attachToPreprocessor(Preprocessor &PP) override;
224  void attachToASTReader(ASTReader &R) override;
225 
226  /// Apply any changes implied by the discovered dependencies to the given
227  /// invocation, (e.g. disable implicit modules, add explicit module paths).
229 
230 private:
231  friend ModuleDepCollectorPP;
232 
233  /// The compiler instance for scanning the current translation unit.
234  CompilerInstance &ScanInstance;
235  /// The consumer of collected dependency information.
236  DependencyConsumer &Consumer;
237  /// Callbacks for computing dependency information.
238  DependencyActionController &Controller;
239  /// Mapping from prebuilt AST files to their sorted list of VFS overlay files.
240  PrebuiltModuleVFSMapT PrebuiltModuleVFSMap;
241  /// Path to the main source file.
242  std::string MainFile;
243  /// Hash identifying the compilation conditions of the current TU.
244  std::string ContextHash;
245  /// Non-modular file dependencies. This includes the main source file and
246  /// textually included header files.
247  std::vector<std::string> FileDeps;
248  /// Direct and transitive modular dependencies of the main source file.
249  llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
250  /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
251  /// a preprocessor. Storage owned by \c ModularDeps.
252  llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
253  /// Direct modular dependencies that have already been built.
254  llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
255  /// Working set of direct modular dependencies.
256  llvm::SetVector<const Module *> DirectModularDeps;
257  /// Options that control the dependency output generation.
258  std::unique_ptr<DependencyOutputOptions> Opts;
259  /// A Clang invocation that's based on the original TU invocation and that has
260  /// been partially transformed into one that can perform explicit build of
261  /// a discovered modular dependency. Note that this still needs to be adjusted
262  /// for each individual module.
263  CowCompilerInvocation CommonInvocation;
264  /// Whether to optimize the modules' command-line arguments.
265  ScanningOptimizations OptimizeArgs;
266  /// Whether to set up command-lines to load PCM files eagerly.
267  bool EagerLoadModules;
268  /// If we're generating dependency output in P1689 format
269  /// for standard C++ modules.
270  bool IsStdModuleP1689Format;
271 
272  std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
273  std::vector<P1689ModuleInfo> RequiredStdCXXModules;
274 
275  /// Checks whether the module is known as being prebuilt.
276  bool isPrebuiltModule(const Module *M);
277 
278  /// Adds \p Path to \c FileDeps, making it absolute if necessary.
279  void addFileDep(StringRef Path);
280  /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
281  void addFileDep(ModuleDeps &MD, StringRef Path);
282 
283  /// Get a Clang invocation adjusted to build the given modular dependency.
284  /// This excludes paths that are yet-to-be-provided by the build system.
285  CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
286  const ModuleDeps &Deps,
287  llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
288 
289  /// Collect module map files for given modules.
291  collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
292 
293  /// Add module map files to the invocation, if needed.
294  void addModuleMapFiles(CompilerInvocation &CI,
295  ArrayRef<ModuleID> ClangModuleDeps) const;
296  /// Add module files (pcm) to the invocation, if needed.
297  void addModuleFiles(CompilerInvocation &CI,
298  ArrayRef<ModuleID> ClangModuleDeps) const;
299  void addModuleFiles(CowCompilerInvocation &CI,
300  ArrayRef<ModuleID> ClangModuleDeps) const;
301 
302  /// Add paths that require looking up outputs to the given dependencies.
303  void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
304 
305  /// Compute the context hash for \p Deps, and create the mapping
306  /// \c ModuleDepsByID[Deps.ID] = &Deps.
307  void associateWithContextHash(const CowCompilerInvocation &CI,
308  ModuleDeps &Deps);
309 };
310 
311 /// Resets codegen options that don't affect modules/PCH.
313  const LangOptions &LangOpts,
314  CodeGenOptions &CGOpts);
315 
316 } // end namespace dependencies
317 } // end namespace tooling
318 } // end namespace clang
319 
320 namespace llvm {
322  return hash_combine(ID.ModuleName, ID.ContextHash);
323 }
324 
325 template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
327  static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
328  static inline ModuleID getTombstoneKey() {
329  return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
330  }
331  static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
332  static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
333  return LHS == RHS;
334  }
335 };
336 } // namespace llvm
337 
338 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
static char ID
Definition: Arena.cpp:183
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
llvm::MachO::FileType FileType
Definition: MachO.h:45
Defines the PPCallbacks interface.
static std::string getName(const CallEvent &Call)
SourceLocation Loc
Definition: SemaObjC.cpp:755
Defines the SourceManager interface.
Reads an AST files chain containing the contents of a translation unit.
Definition: ASTReader.h:366
Represents a character-granular source range.
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
Helper class for holding the data necessary to invoke the compiler.
Same as CompilerInvocation, but with copy-on-write optimization.
An interface for collecting the dependencies of a compilation.
Definition: Utils.h:63
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:482
Describes a module or submodule.
Definition: Module.h:105
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition: PPCallbacks.h:35
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
The base class of the type hierarchy.
Definition: Type.h:1813
Dependency scanner callbacks that are used during scanning to influence the behaviour of the scan - f...
Callback that records textual includes and direct modular includes/imports during preprocessing.
void EndOfMainFile() override
Callback invoked when the end of the main file is reached.
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule, bool ModuleImported, SrcMgr::CharacteristicKind FileType) override
Callback invoked whenever an inclusion directive of any kind (#include, #import, etc....
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override
Callback invoked whenever there was an explicit module-import syntax.
Collects modular and non-modular dependencies of the main file by attaching ModuleDepCollectorPP to t...
ModuleDepCollector(std::unique_ptr< DependencyOutputOptions > Opts, CompilerInstance &ScanInstance, DependencyConsumer &C, DependencyActionController &Controller, CompilerInvocation OriginalCI, PrebuiltModuleVFSMapT PrebuiltModuleVFSMap, ScanningOptimizations OptimizeArgs, bool EagerLoadModules, bool IsStdModuleP1689Format)
void applyDiscoveredDependencies(CompilerInvocation &CI)
Apply any changes implied by the discovered dependencies to the given invocation, (e....
void attachToPreprocessor(Preprocessor &PP) override
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:81
static void hash_combine(std::size_t &seed, const T &v)
ModuleOutputKind
An output from a module compilation, such as the path of the module file.
@ DiagnosticSerializationFile
The path of the serialized diagnostic file (.dia), if any.
@ DependencyFile
The path of the dependency file (.d), if any.
@ DependencyTargets
The null-separated list of names to use as the targets in the dependency file, if any.
@ ModuleFile
The module file (.pcm). Required.
void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, const LangOptions &LangOpts, CodeGenOptions &CGOpts)
Resets codegen options that don't affect modules/PCH.
llvm::StringMap< llvm::StringSet<> > PrebuiltModuleVFSMapT
The JSON file list parser is used to communicate input to InstallAPI.
@ Other
Other implicit parameter.
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID)
std::string ClangModuleMapFile
The path to the modulemap file which defines this module.
std::vector< std::string > ModuleMapFileDeps
A collection of absolute paths to module map files that this module needs to know about.
std::vector< PrebuiltModuleDep > PrebuiltModuleDeps
A collection of prebuilt modular dependencies this module directly depends on, not including transiti...
std::vector< ModuleID > ClangModuleDeps
A list of module identifiers this module directly depends on, not including transitive dependencies.
ModuleID ID
The identifier of the module.
const std::vector< std::string > & getBuildArguments()
Get (or compute) the compiler invocation that can be used to build this module.
llvm::StringSet FileDeps
A collection of absolute paths to files that this module directly depends on, not including transitiv...
bool IsSystem
Whether this is a "system" module.
This is used to identify a specific module.
std::string ContextHash
The context hash of a module represents the compiler options that affect the resulting command-line i...
std::string ModuleName
The name of the module.
bool operator==(const ModuleID &Other) const
bool operator<(const ModuleID &Other) const
P1689ModuleInfo - Represents the needed information of standard C++20 modules for P1689 format.
std::string SourcePath
Optional. The source path to the module.
std::string ModuleName
The name of the module. This may include : for partitions.
bool IsStdCXXModuleInterface
If this module is a standard c++ interface unit.
Modular dependency that has already been built prior to the dependency scan.
static bool isEqual(const ModuleID &LHS, const ModuleID &RHS)