clang  19.0.0git
Cuda.cpp
Go to the documentation of this file.
1 //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Cuda.h"
10 #include "CommonArgs.h"
11 #include "clang/Basic/Cuda.h"
12 #include "clang/Config/config.h"
14 #include "clang/Driver/Distro.h"
15 #include "clang/Driver/Driver.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/Option/ArgList.h"
21 #include "llvm/Support/FileSystem.h"
22 #include "llvm/Support/FormatAdapters.h"
23 #include "llvm/Support/FormatVariadic.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/Process.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/VirtualFileSystem.h"
28 #include "llvm/TargetParser/Host.h"
29 #include "llvm/TargetParser/TargetParser.h"
30 #include <system_error>
31 
32 using namespace clang::driver;
33 using namespace clang::driver::toolchains;
34 using namespace clang::driver::tools;
35 using namespace clang;
36 using namespace llvm::opt;
37 
38 namespace {
39 
40 CudaVersion getCudaVersion(uint32_t raw_version) {
41  if (raw_version < 7050)
42  return CudaVersion::CUDA_70;
43  if (raw_version < 8000)
44  return CudaVersion::CUDA_75;
45  if (raw_version < 9000)
46  return CudaVersion::CUDA_80;
47  if (raw_version < 9010)
48  return CudaVersion::CUDA_90;
49  if (raw_version < 9020)
50  return CudaVersion::CUDA_91;
51  if (raw_version < 10000)
52  return CudaVersion::CUDA_92;
53  if (raw_version < 10010)
54  return CudaVersion::CUDA_100;
55  if (raw_version < 10020)
56  return CudaVersion::CUDA_101;
57  if (raw_version < 11000)
58  return CudaVersion::CUDA_102;
59  if (raw_version < 11010)
60  return CudaVersion::CUDA_110;
61  if (raw_version < 11020)
62  return CudaVersion::CUDA_111;
63  if (raw_version < 11030)
64  return CudaVersion::CUDA_112;
65  if (raw_version < 11040)
66  return CudaVersion::CUDA_113;
67  if (raw_version < 11050)
68  return CudaVersion::CUDA_114;
69  if (raw_version < 11060)
70  return CudaVersion::CUDA_115;
71  if (raw_version < 11070)
72  return CudaVersion::CUDA_116;
73  if (raw_version < 11080)
74  return CudaVersion::CUDA_117;
75  if (raw_version < 11090)
76  return CudaVersion::CUDA_118;
77  if (raw_version < 12010)
78  return CudaVersion::CUDA_120;
79  if (raw_version < 12020)
80  return CudaVersion::CUDA_121;
81  if (raw_version < 12030)
82  return CudaVersion::CUDA_122;
83  if (raw_version < 12040)
84  return CudaVersion::CUDA_123;
85  if (raw_version < 12050)
86  return CudaVersion::CUDA_124;
87  return CudaVersion::NEW;
88 }
89 
90 CudaVersion parseCudaHFile(llvm::StringRef Input) {
91  // Helper lambda which skips the words if the line starts with them or returns
92  // std::nullopt otherwise.
93  auto StartsWithWords =
94  [](llvm::StringRef Line,
95  const SmallVector<StringRef, 3> words) -> std::optional<StringRef> {
96  for (StringRef word : words) {
97  if (!Line.consume_front(word))
98  return {};
99  Line = Line.ltrim();
100  }
101  return Line;
102  };
103 
104  Input = Input.ltrim();
105  while (!Input.empty()) {
106  if (auto Line =
107  StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
108  uint32_t RawVersion;
109  Line->consumeInteger(10, RawVersion);
110  return getCudaVersion(RawVersion);
111  }
112  // Find next non-empty line.
113  Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
114  }
115  return CudaVersion::UNKNOWN;
116 }
117 } // namespace
118 
120  if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
121  std::string VersionString = CudaVersionToString(Version);
122  if (!VersionString.empty())
123  VersionString.insert(0, " ");
124  D.Diag(diag::warn_drv_new_cuda_version)
125  << VersionString
128  } else if (Version > CudaVersion::FULLY_SUPPORTED)
129  D.Diag(diag::warn_drv_partially_supported_cuda_version)
130  << CudaVersionToString(Version);
131 }
132 
134  const Driver &D, const llvm::Triple &HostTriple,
135  const llvm::opt::ArgList &Args)
136  : D(D) {
137  struct Candidate {
138  std::string Path;
139  bool StrictChecking;
140 
141  Candidate(std::string Path, bool StrictChecking = false)
142  : Path(Path), StrictChecking(StrictChecking) {}
143  };
144  SmallVector<Candidate, 4> Candidates;
145 
146  // In decreasing order so we prefer newer versions to older versions.
147  std::initializer_list<const char *> Versions = {
148  "11.4", "11.3", "11.2", "11.1", "10.2", "10.1", "10.0",
149  "9.2", "9.1", "9.0", "8.0", "7.5", "7.0"};
150  auto &FS = D.getVFS();
151 
152  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
153  Candidates.emplace_back(
154  Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
155  } else if (HostTriple.isOSWindows()) {
156  // CUDA_PATH is set by the installer, prefer it over other versions that
157  // might be present on the system.
158  if (const char *CudaPathEnvVar = ::getenv("CUDA_PATH"))
159  Candidates.emplace_back(CudaPathEnvVar);
160 
161  for (const char *Ver : Versions)
162  Candidates.emplace_back(
163  D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
164  Ver);
165  } else {
166  if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
167  // Try to find ptxas binary. If the executable is located in a directory
168  // called 'bin/', its parent directory might be a good guess for a valid
169  // CUDA installation.
170  // However, some distributions might installs 'ptxas' to /usr/bin. In that
171  // case the candidate would be '/usr' which passes the following checks
172  // because '/usr/include' exists as well. To avoid this case, we always
173  // check for the directory potentially containing files for libdevice,
174  // even if the user passes -nocudalib.
175  if (llvm::ErrorOr<std::string> ptxas =
176  llvm::sys::findProgramByName("ptxas")) {
177  SmallString<256> ptxasAbsolutePath;
178  llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
179 
180  StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
181  if (llvm::sys::path::filename(ptxasDir) == "bin")
182  Candidates.emplace_back(
183  std::string(llvm::sys::path::parent_path(ptxasDir)),
184  /*StrictChecking=*/true);
185  }
186  }
187 
188  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
189  for (const char *Ver : Versions)
190  Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
191 
192  Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
193  if (Dist.IsDebian() || Dist.IsUbuntu())
194  // Special case for Debian to have nvidia-cuda-toolkit work
195  // out of the box. More info on http://bugs.debian.org/882505
196  Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
197  }
198 
199  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
200 
201  for (const auto &Candidate : Candidates) {
202  InstallPath = Candidate.Path;
203  if (InstallPath.empty() || !FS.exists(InstallPath))
204  continue;
205 
206  BinPath = InstallPath + "/bin";
207  IncludePath = InstallPath + "/include";
208  LibDevicePath = InstallPath + "/nvvm/libdevice";
209 
210  if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
211  continue;
212  bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
213  if (CheckLibDevice && !FS.exists(LibDevicePath))
214  continue;
215 
216  Version = CudaVersion::UNKNOWN;
217  if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
218  Version = parseCudaHFile((*CudaHFile)->getBuffer());
219  // As the last resort, make an educated guess between CUDA-7.0, which had
220  // old-style libdevice bitcode, and an unknown recent CUDA version.
221  if (Version == CudaVersion::UNKNOWN) {
222  Version = FS.exists(LibDevicePath + "/libdevice.10.bc")
225  }
226 
227  if (Version >= CudaVersion::CUDA_90) {
228  // CUDA-9+ uses single libdevice file for all GPU variants.
229  std::string FilePath = LibDevicePath + "/libdevice.10.bc";
230  if (FS.exists(FilePath)) {
231  for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
232  ++Arch) {
233  CudaArch GpuArch = static_cast<CudaArch>(Arch);
234  if (!IsNVIDIAGpuArch(GpuArch))
235  continue;
236  std::string GpuArchName(CudaArchToString(GpuArch));
237  LibDeviceMap[GpuArchName] = FilePath;
238  }
239  }
240  } else {
241  std::error_code EC;
242  for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
243  LE;
244  !EC && LI != LE; LI = LI.increment(EC)) {
245  StringRef FilePath = LI->path();
246  StringRef FileName = llvm::sys::path::filename(FilePath);
247  // Process all bitcode filenames that look like
248  // libdevice.compute_XX.YY.bc
249  const StringRef LibDeviceName = "libdevice.";
250  if (!(FileName.starts_with(LibDeviceName) && FileName.ends_with(".bc")))
251  continue;
252  StringRef GpuArch = FileName.slice(
253  LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
254  LibDeviceMap[GpuArch] = FilePath.str();
255  // Insert map entries for specific devices with this compute
256  // capability. NVCC's choice of the libdevice library version is
257  // rather peculiar and depends on the CUDA version.
258  if (GpuArch == "compute_20") {
259  LibDeviceMap["sm_20"] = std::string(FilePath);
260  LibDeviceMap["sm_21"] = std::string(FilePath);
261  LibDeviceMap["sm_32"] = std::string(FilePath);
262  } else if (GpuArch == "compute_30") {
263  LibDeviceMap["sm_30"] = std::string(FilePath);
264  if (Version < CudaVersion::CUDA_80) {
265  LibDeviceMap["sm_50"] = std::string(FilePath);
266  LibDeviceMap["sm_52"] = std::string(FilePath);
267  LibDeviceMap["sm_53"] = std::string(FilePath);
268  }
269  LibDeviceMap["sm_60"] = std::string(FilePath);
270  LibDeviceMap["sm_61"] = std::string(FilePath);
271  LibDeviceMap["sm_62"] = std::string(FilePath);
272  } else if (GpuArch == "compute_35") {
273  LibDeviceMap["sm_35"] = std::string(FilePath);
274  LibDeviceMap["sm_37"] = std::string(FilePath);
275  } else if (GpuArch == "compute_50") {
276  if (Version >= CudaVersion::CUDA_80) {
277  LibDeviceMap["sm_50"] = std::string(FilePath);
278  LibDeviceMap["sm_52"] = std::string(FilePath);
279  LibDeviceMap["sm_53"] = std::string(FilePath);
280  }
281  }
282  }
283  }
284 
285  // Check that we have found at least one libdevice that we can link in if
286  // -nocudalib hasn't been specified.
287  if (LibDeviceMap.empty() && !NoCudaLib)
288  continue;
289 
290  IsValid = true;
291  break;
292  }
293 }
294 
296  const ArgList &DriverArgs, ArgStringList &CC1Args) const {
297  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
298  // Add cuda_wrappers/* to our system include path. This lets us wrap
299  // standard library headers.
301  llvm::sys::path::append(P, "include");
302  llvm::sys::path::append(P, "cuda_wrappers");
303  CC1Args.push_back("-internal-isystem");
304  CC1Args.push_back(DriverArgs.MakeArgString(P));
305  }
306 
307  if (DriverArgs.hasArg(options::OPT_nogpuinc))
308  return;
309 
310  if (!isValid()) {
311  D.Diag(diag::err_drv_no_cuda_installation);
312  return;
313  }
314 
315  CC1Args.push_back("-include");
316  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
317 }
318 
320  CudaArch Arch) const {
321  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
322  ArchsWithBadVersion[(int)Arch])
323  return;
324 
325  auto MinVersion = MinVersionForCudaArch(Arch);
326  auto MaxVersion = MaxVersionForCudaArch(Arch);
327  if (Version < MinVersion || Version > MaxVersion) {
328  ArchsWithBadVersion[(int)Arch] = true;
329  D.Diag(diag::err_drv_cuda_version_unsupported)
330  << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
331  << CudaVersionToString(MaxVersion) << InstallPath
332  << CudaVersionToString(Version);
333  }
334 }
335 
336 void CudaInstallationDetector::print(raw_ostream &OS) const {
337  if (isValid())
338  OS << "Found CUDA installation: " << InstallPath << ", version "
339  << CudaVersionToString(Version) << "\n";
340 }
341 
342 namespace {
343 /// Debug info level for the NVPTX devices. We may need to emit different debug
344 /// info level for the host and for the device itselfi. This type controls
345 /// emission of the debug info for the devices. It either prohibits disable info
346 /// emission completely, or emits debug directives only, or emits same debug
347 /// info as for the host.
348 enum DeviceDebugInfoLevel {
349  DisableDebugInfo, /// Do not emit debug info for the devices.
350  DebugDirectivesOnly, /// Emit only debug directives.
351  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
352  /// host.
353 };
354 } // anonymous namespace
355 
356 /// Define debug info level for the NVPTX devices. If the debug info for both
357 /// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
358 /// only debug directives are requested for the both host and device
359 /// (-gline-directvies-only), or the debug info only for the device is disabled
360 /// (optimization is on and --cuda-noopt-device-debug was not specified), the
361 /// debug directves only must be emitted for the device. Otherwise, use the same
362 /// debug info level just like for the host (with the limitations of only
363 /// supported DWARF2 standard).
364 static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
365  const Arg *A = Args.getLastArg(options::OPT_O_Group);
366  bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
367  Args.hasFlag(options::OPT_cuda_noopt_device_debug,
368  options::OPT_no_cuda_noopt_device_debug,
369  /*Default=*/false);
370  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
371  const Option &Opt = A->getOption();
372  if (Opt.matches(options::OPT_gN_Group)) {
373  if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
374  return DisableDebugInfo;
375  if (Opt.matches(options::OPT_gline_directives_only))
376  return DebugDirectivesOnly;
377  }
378  return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
379  }
380  return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
381 }
382 
383 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
384  const InputInfo &Output,
385  const InputInfoList &Inputs,
386  const ArgList &Args,
387  const char *LinkingOutput) const {
388  const auto &TC =
389  static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
390  assert(TC.getTriple().isNVPTX() && "Wrong platform");
391 
392  StringRef GPUArchName;
393  // If this is a CUDA action we need to extract the device architecture
394  // from the Job's associated architecture, otherwise use the -march=arch
395  // option. This option may come from -Xopenmp-target flag or the default
396  // value.
398  GPUArchName = JA.getOffloadingArch();
399  } else {
400  GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
401  if (GPUArchName.empty()) {
402  C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
403  << getToolChain().getArchName() << getShortName();
404  return;
405  }
406  }
407 
408  // Obtain architecture from the action.
409  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
410  assert(gpu_arch != CudaArch::UNKNOWN &&
411  "Device action expected to have an architecture.");
412 
413  // Check that our installation's ptxas supports gpu_arch.
414  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
415  TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
416  }
417 
418  ArgStringList CmdArgs;
419  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
420  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
421  if (DIKind == EmitSameDebugInfoAsHost) {
422  // ptxas does not accept -g option if optimization is enabled, so
423  // we ignore the compiler's -O* options if we want debug info.
424  CmdArgs.push_back("-g");
425  CmdArgs.push_back("--dont-merge-basicblocks");
426  CmdArgs.push_back("--return-at-end");
427  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
428  // Map the -O we received to -O{0,1,2,3}.
429 
430  // -O3 seems like the least-bad option when -Osomething is specified to
431  // clang but it isn't handled below.
432  StringRef OOpt = "3";
433  if (A->getOption().matches(options::OPT_O4) ||
434  A->getOption().matches(options::OPT_Ofast))
435  OOpt = "3";
436  else if (A->getOption().matches(options::OPT_O0))
437  OOpt = "0";
438  else if (A->getOption().matches(options::OPT_O)) {
439  // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
440  OOpt = llvm::StringSwitch<const char *>(A->getValue())
441  .Case("1", "1")
442  .Case("2", "2")
443  .Case("3", "3")
444  .Case("s", "2")
445  .Case("z", "2")
446  .Default("2");
447  }
448  CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
449  } else {
450  // If no -O was passed, pass -O3 to ptxas -- this makes ptxas's
451  // optimization level the same as the ptxjitcompiler.
452  CmdArgs.push_back("-O3");
453  }
454  if (DIKind == DebugDirectivesOnly)
455  CmdArgs.push_back("-lineinfo");
456 
457  // Pass -v to ptxas if it was passed to the driver.
458  if (Args.hasArg(options::OPT_v))
459  CmdArgs.push_back("-v");
460 
461  CmdArgs.push_back("--gpu-name");
462  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
463  CmdArgs.push_back("--output-file");
464  std::string OutputFileName = TC.getInputFilename(Output);
465 
466  // If we are invoking `nvlink` internally we need to output a `.cubin` file.
467  // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
468  if (!C.getInputArgs().getLastArg(options::OPT_c)) {
470  llvm::sys::path::replace_extension(Filename, "cubin");
471  OutputFileName = Filename.str();
472  }
473  if (Output.isFilename() && OutputFileName != Output.getFilename())
474  C.addTempFile(Args.MakeArgString(OutputFileName));
475 
476  CmdArgs.push_back(Args.MakeArgString(OutputFileName));
477  for (const auto &II : Inputs)
478  CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
479 
480  for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
481  CmdArgs.push_back(Args.MakeArgString(A));
482 
483  bool Relocatable;
485  // In OpenMP we need to generate relocatable code.
486  Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
487  options::OPT_fnoopenmp_relocatable_target,
488  /*Default=*/true);
489  else if (JA.isOffloading(Action::OFK_Cuda))
490  // In CUDA we generate relocatable code by default.
491  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
492  /*Default=*/false);
493  else if (JA.isOffloading(Action::OFK_SYCL))
494  // In SYCL we control [no-]rdc linking at bitcode stage with 'llvm-link'.
495  // This allows for link-time optimisations and for now we do no support a
496  // non-LTO path, which means we cannot generate relocatable device code.
497  Relocatable = false;
498  else
499  // Otherwise, we are compiling directly and should create linkable output.
500  Relocatable = true;
501 
502  if (Relocatable)
503  CmdArgs.push_back("-c");
504 
505  const char *Exec;
506  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
507  Exec = A->getValue();
508  else
509  Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
510  C.addCommand(std::make_unique<Command>(
511  JA, *this,
513  "--options-file"},
514  Exec, CmdArgs, Inputs, Output));
515 }
516 
517 static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
518  // The new driver does not include PTX by default to avoid overhead.
519  bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
520  options::OPT_no_offload_new_driver, false);
521  for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ,
522  options::OPT_no_cuda_include_ptx_EQ)) {
523  A->claim();
524  const StringRef ArchStr = A->getValue();
525  if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) &&
526  (ArchStr == "all" || ArchStr == InputArch))
527  includePTX = true;
528  else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) &&
529  (ArchStr == "all" || ArchStr == InputArch))
530  includePTX = false;
531  }
532  return includePTX;
533 }
534 
535 // All inputs to this linker must be from CudaDeviceActions, as we need to look
536 // at the Inputs' Actions in order to figure out which GPU architecture they
537 // correspond to.
538 void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
539  const InputInfo &Output,
540  const InputInfoList &Inputs,
541  const ArgList &Args,
542  const char *LinkingOutput) const {
543  const auto &TC =
544  static_cast<const toolchains::CudaToolChain &>(getToolChain());
545  assert(TC.getTriple().isNVPTX() && "Wrong platform");
546 
547  ArgStringList CmdArgs;
548  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
549  CmdArgs.push_back("--cuda");
550  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
551  CmdArgs.push_back(Args.MakeArgString("--create"));
552  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
553  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
554  CmdArgs.push_back("-g");
555 
556  for (const auto &II : Inputs) {
557  auto *A = II.getAction();
558  assert(A->getInputs().size() == 1 &&
559  "Device offload action is expected to have a single input");
560  const char *gpu_arch_str = A->getOffloadingArch();
561  assert(gpu_arch_str &&
562  "Device action expected to have associated a GPU architecture!");
563  CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
564 
565  if (II.getType() == types::TY_PP_Asm &&
566  !shouldIncludePTX(Args, gpu_arch_str))
567  continue;
568  // We need to pass an Arch of the form "sm_XX" for cubin files and
569  // "compute_XX" for ptx.
570  const char *Arch = (II.getType() == types::TY_PP_Asm)
571  ? CudaArchToVirtualArchString(gpu_arch)
572  : gpu_arch_str;
573  CmdArgs.push_back(
574  Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
575  ",file=" + getToolChain().getInputFilename(II)));
576  }
577 
578  for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
579  CmdArgs.push_back(Args.MakeArgString(A));
580 
581  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
582  C.addCommand(std::make_unique<Command>(
583  JA, *this,
585  "--options-file"},
586  Exec, CmdArgs, Inputs, Output));
587 }
588 
589 void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
590  const InputInfo &Output,
591  const InputInfoList &Inputs,
592  const ArgList &Args,
593  const char *LinkingOutput) const {
594  const auto &TC =
595  static_cast<const toolchains::CudaToolChain &>(getToolChain());
596  assert(TC.getTriple().isNVPTX() && "Wrong platform");
597 
598  ArgStringList CmdArgs;
599 
600  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
601  // host binary by the host linker.
602  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
603  "CUDA toolchain not expected for an OpenMP host device.");
604 
605  if (Output.isFilename()) {
606  CmdArgs.push_back("-o");
607  CmdArgs.push_back(Output.getFilename());
608  } else
609  assert(Output.isNothing() && "Invalid output.");
610  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
611  CmdArgs.push_back("-g");
612 
613  if (Args.hasArg(options::OPT_v))
614  CmdArgs.push_back("-v");
615 
616  StringRef GPUArch =
617  Args.getLastArgValue(options::OPT_march_EQ);
618  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
619 
620  CmdArgs.push_back("-arch");
621  CmdArgs.push_back(Args.MakeArgString(GPUArch));
622 
623  // Add paths specified in LIBRARY_PATH environment variable as -L options.
624  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
625 
626  // Add paths for the default clang library path.
627  SmallString<256> DefaultLibPath =
628  llvm::sys::path::parent_path(TC.getDriver().Dir);
629  llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
630  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
631 
632  for (const auto &II : Inputs) {
633  if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
634  II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) {
635  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
636  << getToolChain().getTripleString();
637  continue;
638  }
639 
640  // Currently, we only pass the input files to the linker, we do not pass
641  // any libraries that may be valid only for the host.
642  if (!II.isFilename())
643  continue;
644 
645  const char *CubinF =
646  C.getArgs().MakeArgString(getToolChain().getInputFilename(II));
647 
648  CmdArgs.push_back(CubinF);
649  }
650 
651  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx",
652  GPUArch, /*isBitCodeSDL=*/false);
653 
654  // Find nvlink and pass it as "--nvlink-path=" argument of
655  // clang-nvlink-wrapper.
656  CmdArgs.push_back(Args.MakeArgString(
657  Twine("--nvlink-path=" + getToolChain().GetProgramPath("nvlink"))));
658 
659  const char *Exec =
660  Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
661  C.addCommand(std::make_unique<Command>(
662  JA, *this,
664  "--options-file"},
665  Exec, CmdArgs, Inputs, Output));
666 }
667 
668 void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
669  const InputInfo &Output,
670  const InputInfoList &Inputs,
671  const ArgList &Args,
672  const char *LinkingOutput) const {
673  const auto &TC =
674  static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
675  ArgStringList CmdArgs;
676 
677  assert(TC.getTriple().isNVPTX() && "Wrong platform");
678 
679  assert((Output.isFilename() || Output.isNothing()) && "Invalid output.");
680  if (Output.isFilename()) {
681  CmdArgs.push_back("-o");
682  CmdArgs.push_back(Output.getFilename());
683  }
684 
685  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
686  CmdArgs.push_back("-g");
687 
688  if (Args.hasArg(options::OPT_v))
689  CmdArgs.push_back("-v");
690 
691  StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
692  if (GPUArch.empty()) {
693  C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
694  << getToolChain().getArchName() << getShortName();
695  return;
696  }
697 
698  CmdArgs.push_back("-arch");
699  CmdArgs.push_back(Args.MakeArgString(GPUArch));
700 
701  // Add paths specified in LIBRARY_PATH environment variable as -L options.
702  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
703 
704  // Add standard library search paths passed on the command line.
705  Args.AddAllArgs(CmdArgs, options::OPT_L);
706  getToolChain().AddFilePathLibArgs(Args, CmdArgs);
707 
708  // Add paths for the default clang library path.
709  SmallString<256> DefaultLibPath =
710  llvm::sys::path::parent_path(TC.getDriver().Dir);
711  llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
712  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
713 
714  for (const auto &II : Inputs) {
715  if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
716  II.getType() == types::TY_LTO_BC || II.getType() == types::TY_LLVM_BC) {
717  C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
718  << getToolChain().getTripleString();
719  continue;
720  }
721 
722  // The 'nvlink' application performs RDC-mode linking when given a '.o'
723  // file and device linking when given a '.cubin' file. We always want to
724  // perform device linking, so just rename any '.o' files.
725  // FIXME: This should hopefully be removed if NVIDIA updates their tooling.
726  if (II.isFilename()) {
727  auto InputFile = getToolChain().getInputFilename(II);
728  if (llvm::sys::path::extension(InputFile) != ".cubin") {
729  // If there are no actions above this one then this is direct input and
730  // we can copy it. Otherwise the input is internal so a `.cubin` file
731  // should exist.
732  if (II.getAction() && II.getAction()->getInputs().size() == 0) {
733  const char *CubinF =
734  Args.MakeArgString(getToolChain().getDriver().GetTemporaryPath(
735  llvm::sys::path::stem(InputFile), "cubin"));
736  if (llvm::sys::fs::copy_file(InputFile, C.addTempFile(CubinF)))
737  continue;
738 
739  CmdArgs.push_back(CubinF);
740  } else {
741  SmallString<256> Filename(InputFile);
742  llvm::sys::path::replace_extension(Filename, "cubin");
743  CmdArgs.push_back(Args.MakeArgString(Filename));
744  }
745  } else {
746  CmdArgs.push_back(Args.MakeArgString(InputFile));
747  }
748  } else if (!II.isNothing()) {
749  II.getInputArg().renderAsInput(Args, CmdArgs);
750  }
751  }
752 
753  C.addCommand(std::make_unique<Command>(
754  JA, *this,
756  "--options-file"},
757  Args.MakeArgString(getToolChain().GetProgramPath("nvlink")), CmdArgs,
758  Inputs, Output));
759 }
760 
761 void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
762  const llvm::opt::ArgList &Args,
763  std::vector<StringRef> &Features) {
764  if (Args.hasArg(options::OPT_cuda_feature_EQ)) {
765  StringRef PtxFeature =
766  Args.getLastArgValue(options::OPT_cuda_feature_EQ, "+ptx42");
767  Features.push_back(Args.MakeArgString(PtxFeature));
768  return;
769  }
770  CudaInstallationDetector CudaInstallation(D, Triple, Args);
771 
772  // New CUDA versions often introduce new instructions that are only supported
773  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
774  // back-end.
775  const char *PtxFeature = nullptr;
776  switch (CudaInstallation.version()) {
777 #define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
778  case CudaVersion::CUDA_##CUDA_VER: \
779  PtxFeature = "+ptx" #PTX_VER; \
780  break;
781  CASE_CUDA_VERSION(124, 84);
782  CASE_CUDA_VERSION(123, 83);
783  CASE_CUDA_VERSION(122, 82);
784  CASE_CUDA_VERSION(121, 81);
785  CASE_CUDA_VERSION(120, 80);
786  CASE_CUDA_VERSION(118, 78);
787  CASE_CUDA_VERSION(117, 77);
788  CASE_CUDA_VERSION(116, 76);
789  CASE_CUDA_VERSION(115, 75);
790  CASE_CUDA_VERSION(114, 74);
791  CASE_CUDA_VERSION(113, 73);
792  CASE_CUDA_VERSION(112, 72);
793  CASE_CUDA_VERSION(111, 71);
794  CASE_CUDA_VERSION(110, 70);
795  CASE_CUDA_VERSION(102, 65);
796  CASE_CUDA_VERSION(101, 64);
797  CASE_CUDA_VERSION(100, 63);
798  CASE_CUDA_VERSION(92, 61);
799  CASE_CUDA_VERSION(91, 61);
800  CASE_CUDA_VERSION(90, 60);
801 #undef CASE_CUDA_VERSION
802  default:
803  PtxFeature = "+ptx42";
804  }
805  Features.push_back(PtxFeature);
806 }
807 
808 /// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
809 /// operates as a stand-alone version of the NVPTX tools without the host
810 /// toolchain.
811 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
812  const llvm::Triple &HostTriple,
813  const ArgList &Args, bool Freestanding = false)
814  : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
815  Freestanding(Freestanding) {
816  if (CudaInstallation.isValid())
817  getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
818  // Lookup binaries into the driver directory, this is used to
819  // discover the 'nvptx-arch' executable.
820  getProgramPaths().push_back(getDriver().Dir);
821 }
822 
823 /// We only need the host triple to locate the CUDA binary utilities, use the
824 /// system's default triple if not provided.
825 NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
826  const ArgList &Args)
827  : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
828  /*Freestanding=*/true) {}
829 
830 llvm::opt::DerivedArgList *
831 NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
832  StringRef BoundArch,
833  Action::OffloadKind OffloadKind) const {
834  DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, OffloadKind);
835  if (!DAL)
836  DAL = new DerivedArgList(Args.getBaseArgs());
837 
838  const OptTable &Opts = getDriver().getOpts();
839 
840  for (Arg *A : Args)
841  if (!llvm::is_contained(*DAL, A))
842  DAL->append(A);
843 
844  if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
845  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
847  } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" &&
848  OffloadKind == Action::OFK_None) {
849  DAL->eraseArg(options::OPT_march_EQ);
850  } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
851  auto GPUsOrErr = getSystemGPUArchs(Args);
852  if (!GPUsOrErr) {
853  getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
854  << getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
855  } else {
856  if (GPUsOrErr->size() > 1)
857  getDriver().Diag(diag::warn_drv_multi_gpu_arch)
858  << getArchName() << llvm::join(*GPUsOrErr, ", ") << "-march";
859  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
860  Args.MakeArgString(GPUsOrErr->front()));
861  }
862  }
863 
864  return DAL;
865 }
866 
867 // Select remangled libclc variant. 64-bit longs default, 32-bit longs on
868 // Windows
869 static const char *getLibSpirvTargetName(const ToolChain &HostTC) {
870  if (HostTC.getTriple().isOSWindows())
871  return "remangled-l32-signed_char.libspirv-nvptx64-nvidia-cuda.bc";
872  return "remangled-l64-signed_char.libspirv-nvptx64-nvidia-cuda.bc";
873 }
874 
875 void NVPTXToolChain::addClangTargetOptions(
876  const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
877  Action::OffloadKind DeviceOffloadingKind) const {
878  // If we are compiling with a standalone NVPTX toolchain we want to try to
879  // mimic a standard environment as much as possible. So we enable lowering
880  // ctor / dtor functions to global symbols that can be registered.
881  if (Freestanding)
882  CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
883 }
884 
885 bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
886  const Option &O = A->getOption();
887  return (O.matches(options::OPT_gN_Group) &&
888  !O.matches(options::OPT_gmodules)) ||
889  O.matches(options::OPT_g_Flag) ||
890  O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
891  O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
892  O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
893  O.matches(options::OPT_gdwarf_5) ||
894  O.matches(options::OPT_gcolumn_info);
895 }
896 
897 void NVPTXToolChain::adjustDebugInfoKind(
898  llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
899  const ArgList &Args) const {
900  switch (mustEmitDebugInfo(Args)) {
901  case DisableDebugInfo:
902  DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
903  break;
904  case DebugDirectivesOnly:
905  DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
906  break;
907  case EmitSameDebugInfoAsHost:
908  // Use same debug info level as the host.
909  break;
910  }
911 }
912 
914 NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
915  // Detect NVIDIA GPUs availible on the system.
916  std::string Program;
917  if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
918  Program = A->getValue();
919  else
920  Program = GetProgramPath("nvptx-arch");
921 
922  auto StdoutOrErr = executeToolChainProgram(Program);
923  if (!StdoutOrErr)
924  return StdoutOrErr.takeError();
925 
927  for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
928  if (!Arch.empty())
929  GPUArchs.push_back(Arch.str());
930 
931  if (GPUArchs.empty())
932  return llvm::createStringError(std::error_code(),
933  "No NVIDIA GPU detected in the system");
934 
935  return std::move(GPUArchs);
936 }
937 
938 /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
939 /// which isn't properly a linker but nonetheless performs the step of stitching
940 /// together object files from the assembler into a single blob.
941 
942 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
943  const ToolChain &HostTC, const ArgList &Args,
944  const Action::OffloadKind OK)
945  : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC),
946  OK(OK) {}
947 
949  const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
950  Action::OffloadKind DeviceOffloadingKind) const {
951  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
952 
953  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
954  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
955  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
956  DeviceOffloadingKind == Action::OFK_SYCL ||
957  DeviceOffloadingKind == Action::OFK_Cuda) &&
958  "Only OpenMP, SYCL or CUDA offloading kinds are supported for NVIDIA GPUs.");
959 
960  if (DeviceOffloadingKind == Action::OFK_Cuda) {
961  CC1Args.append(
962  {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
963 
964  // Unsized function arguments used for variadics were introduced in CUDA-9.0
965  // We still do not support generating code that actually uses variadic
966  // arguments yet, but we do need to allow parsing them as recent CUDA
967  // headers rely on that. https://github.com/llvm/llvm-project/issues/58410
969  CC1Args.push_back("-fcuda-allow-variadic-functions");
970 
971  if (DriverArgs.hasArg(options::OPT_fsycl)) {
972  // Add these flags for .cu SYCL compilation.
973  CC1Args.append({"-std=c++17", "-fsycl-is-host"});
974  }
975  }
976 
977  if (DeviceOffloadingKind == Action::OFK_SYCL) {
979  CC1Args);
980 
981  if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) {
982  CC1Args.push_back("-fcuda-prec-sqrt");
983  }
984  }
985 
986  auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv) ||
988  if (DeviceOffloadingKind == Action::OFK_SYCL && !NoLibSpirv) {
989  std::string LibSpirvFile;
990 
991  if (DriverArgs.hasArg(clang::driver::options::OPT_fsycl_libspirv_path_EQ)) {
992  auto ProvidedPath =
993  DriverArgs.getLastArgValue(clang::driver::options::OPT_fsycl_libspirv_path_EQ).str();
994  if (llvm::sys::fs::exists(ProvidedPath))
995  LibSpirvFile = ProvidedPath;
996  } else {
997  SmallVector<StringRef, 8> LibraryPaths;
998 
999  // Expected path w/out install.
1000  SmallString<256> WithoutInstallPath(getDriver().ResourceDir);
1001  llvm::sys::path::append(WithoutInstallPath, Twine("../../clc"));
1002  LibraryPaths.emplace_back(WithoutInstallPath.c_str());
1003 
1004  // Expected path w/ install.
1005  SmallString<256> WithInstallPath(getDriver().ResourceDir);
1006  llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
1007  LibraryPaths.emplace_back(WithInstallPath.c_str());
1008 
1009  // Select remangled libclc variant
1010  std::string LibSpirvTargetName = getLibSpirvTargetName(HostTC);
1011 
1012  for (StringRef LibraryPath : LibraryPaths) {
1013  SmallString<128> LibSpirvTargetFile(LibraryPath);
1014  llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
1015  if (llvm::sys::fs::exists(LibSpirvTargetFile) ||
1016  DriverArgs.hasArg(options::OPT__HASH_HASH_HASH)) {
1017  LibSpirvFile = std::string(LibSpirvTargetFile.str());
1018  break;
1019  }
1020  }
1021  }
1022 
1023  if (LibSpirvFile.empty()) {
1024  getDriver().Diag(diag::err_drv_no_sycl_libspirv)
1026  return;
1027  }
1028 
1029  CC1Args.push_back("-mlink-builtin-bitcode");
1030  CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
1031  }
1032 
1033  if (DriverArgs.hasArg(options::OPT_nogpulib))
1034  return;
1035 
1036  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
1037  DriverArgs.hasArg(options::OPT_S))
1038  return;
1039 
1040  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
1041  if (LibDeviceFile.empty()) {
1042  getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
1043  return;
1044  }
1045 
1046  CC1Args.push_back("-mlink-builtin-bitcode");
1047  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
1048 
1049  clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
1050 
1051  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
1052  options::OPT_fno_cuda_short_ptr, false))
1053  CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
1054 
1055  if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
1056  CC1Args.push_back(
1057  DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
1058  CudaVersionToString(CudaInstallationVersion)));
1059 
1060  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
1061  if (CudaInstallationVersion < CudaVersion::CUDA_92) {
1062  getDriver().Diag(
1063  diag::err_drv_omp_offload_target_cuda_version_not_support)
1064  << CudaVersionToString(CudaInstallationVersion);
1065  return;
1066  }
1067 
1068  // Link the bitcode library late if we're using device LTO.
1069  if (getDriver().isUsingLTO(/* IsOffload */ true))
1070  return;
1071 
1072  addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
1073  getTriple(), HostTC);
1074  AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
1075  GpuArch, /*isBitCodeSDL=*/true,
1076  /*postClangLink=*/true);
1077  }
1078 }
1079 
1081  const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
1082  const llvm::fltSemantics *FPType) const {
1084  if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
1085  DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
1086  options::OPT_fno_gpu_flush_denormals_to_zero, false))
1087  return llvm::DenormalMode::getPreserveSign();
1088  }
1089 
1090  assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
1091  return llvm::DenormalMode::getIEEE();
1092 }
1093 
1094 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
1095  ArgStringList &CC1Args) const {
1096  // Check our CUDA version if we're going to include the CUDA headers.
1097  if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
1098  !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
1099  StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
1100  assert(!Arch.empty() && "Must have an explicit GPU arch.");
1102  }
1103  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
1104 }
1105 
1106 std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
1107  // Only object files are changed, for example assembly files keep their .s
1108  // extensions. If the user requested device-only compilation don't change it.
1109  if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
1110  return ToolChain::getInputFilename(Input);
1111 
1112  // Replace extension for object files with cubin because nvlink relies on
1113  // these particular file names.
1115  llvm::sys::path::replace_extension(Filename, "cubin");
1116  return std::string(Filename);
1117 }
1118 
1119 llvm::opt::DerivedArgList *
1120 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
1121  StringRef BoundArch,
1122  Action::OffloadKind DeviceOffloadKind) const {
1123  DerivedArgList *DAL =
1124  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
1125  if (!DAL)
1126  DAL = new DerivedArgList(Args.getBaseArgs());
1127 
1128  const OptTable &Opts = getDriver().getOpts();
1129 
1130  // For OpenMP device offloading, append derived arguments. Make sure
1131  // flags are not duplicated.
1132  // Also append the compute capability.
1133  if (DeviceOffloadKind == Action::OFK_OpenMP) {
1134  for (Arg *A : Args)
1135  if (!llvm::is_contained(*DAL, A))
1136  DAL->append(A);
1137 
1138  if (!DAL->hasArg(options::OPT_march_EQ)) {
1139  StringRef Arch = BoundArch;
1140  if (Arch.empty()) {
1141  auto ArchsOrErr = getSystemGPUArchs(Args);
1142  if (!ArchsOrErr) {
1143  std::string ErrMsg =
1144  llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
1145  getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
1146  << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
1148  } else {
1149  Arch = Args.MakeArgString(ArchsOrErr->front());
1150  }
1151  }
1152  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
1153  }
1154 
1155  return DAL;
1156  }
1157 
1158  for (Arg *A : Args) {
1159  // Make sure flags are not duplicated.
1160  if (!llvm::is_contained(*DAL, A)) {
1161  DAL->append(A);
1162  }
1163  }
1164 
1165  if (!BoundArch.empty()) {
1166  DAL->eraseArg(options::OPT_march_EQ);
1167  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
1168  BoundArch);
1169  }
1170  return DAL;
1171 }
1172 
1174  return new tools::NVPTX::Assembler(*this);
1175 }
1176 
1178  return new tools::NVPTX::Linker(*this);
1179 }
1180 
1182  return new tools::NVPTX::Assembler(*this);
1183 }
1184 
1186  if (OK == Action::OFK_OpenMP)
1187  return new tools::NVPTX::OpenMPLinker(*this);
1188  if (OK == Action::OFK_SYCL)
1189  return new tools::NVPTX::SYCLLinker(*this);
1190  return new tools::NVPTX::FatBinary(*this);
1191 }
1192 
1194  if (OK == Action::OFK_SYCL) {
1195  if (JA.getKind() == Action::LinkJobClass &&
1196  JA.getType() == types::TY_LLVM_BC) {
1197  return static_cast<tools::NVPTX::SYCLLinker *>(ToolChain::SelectTool(JA))
1198  ->GetSYCLToolChainLinker();
1199  }
1200  }
1201  return ToolChain::SelectTool(JA);
1202 }
1203 
1204 void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
1205  HostTC.addClangWarningOptions(CC1Args);
1206 }
1207 
1209 CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
1210  return HostTC.GetCXXStdlibType(Args);
1211 }
1212 
1213 void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1214  ArgStringList &CC1Args) const {
1215  if (DriverArgs.hasArg(options::OPT_fsycl)) {
1217  CC1Args);
1218  }
1219  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1220 
1221  if (!DriverArgs.hasArg(options::OPT_nogpuinc) && CudaInstallation.isValid())
1222  CC1Args.append(
1223  {"-internal-isystem",
1224  DriverArgs.MakeArgString(CudaInstallation.getIncludePath())});
1225 }
1226 
1228  ArgStringList &CC1Args) const {
1229  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
1230 }
1231 
1232 void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1233  ArgStringList &CC1Args) const {
1234  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
1235 }
1236 
1238  // The CudaToolChain only supports sanitizers in the sense that it allows
1239  // sanitizer arguments on the command line if they are supported by the host
1240  // toolchain. The CudaToolChain will actually ignore any command line
1241  // arguments for any of these "supported" sanitizers. That means that no
1242  // sanitization of device code is actually supported at this time.
1243  //
1244  // This behavior is necessary because the host and device toolchains
1245  // invocations often share the command line, so the device toolchain must
1246  // tolerate flags meant only for the host toolchain.
1247  return HostTC.getSupportedSanitizers();
1248 }
1249 
1251  const ArgList &Args) const {
1252  return HostTC.computeMSVCVersion(D, Args);
1253 }
StringRef P
static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args)
Define debug info level for the NVPTX devices.
Definition: Cuda.cpp:364
static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch)
Definition: Cuda.cpp:517
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER)
static const char * getLibSpirvTargetName(const ToolChain &HostTC)
Definition: Cuda.cpp:869
StringRef Filename
Definition: Format.cpp:2976
__device__ int
types::ID getType() const
Definition: Action.h:160
ActionClass getKind() const
Definition: Action.h:159
OffloadKind getOffloadingDeviceKind() const
Definition: Action.h:222
bool isHostOffloading(unsigned int OKind) const
Check if this action have any offload kinds.
Definition: Action.h:230
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:233
const char * getOffloadingArch() const
Definition: Action.h:223
bool isOffloading(OffloadKind OKind) const
Definition: Action.h:236
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:45
A class to find a viable CUDA installation.
Definition: Cuda.h:28
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Definition: Cuda.cpp:295
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
Definition: Cuda.cpp:133
CudaVersion version() const
Get the detected Cuda install's version.
Definition: Cuda.h:62
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Definition: Cuda.h:75
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Definition: Cuda.cpp:319
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Definition: Cuda.cpp:336
StringRef getIncludePath() const
Get the detected Cuda Include path.
Definition: Cuda.h:71
bool isValid() const
Check whether we detected a valid Cuda install.
Definition: Cuda.h:57
Distro - Helper class for detecting and classifying Linux distributions.
Definition: Distro.h:23
bool IsDebian() const
Definition: Distro.h:129
bool IsUbuntu() const
Definition: Distro.h:133
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:77
std::string SysRoot
sysroot, if present
Definition: Driver.h:182
bool offloadDeviceOnly() const
Definition: Driver.h:439
llvm::vfs::FileSystem & getVFS() const
Definition: Driver.h:405
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:146
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:401
std::string ResourceDir
The path to the compiler resource directory.
Definition: Driver.h:166
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:22
const char * getFilename() const
Definition: InputInfo.h:83
bool isNothing() const
Definition: InputInfo.h:74
bool isFilename() const
Definition: InputInfo.h:75
types::ID getType() const
Definition: InputInfo.h:77
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:92
const Driver & getDriver() const
Definition: ToolChain.h:269
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:1149
virtual std::string getInputFilename(const InputInfo &Input) const
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: ToolChain.cpp:448
virtual Tool * SelectTool(const JobAction &JA) const
Choose a tool to use to handle the action JA.
Definition: ToolChain.cpp:928
llvm::Triple::ArchType getArch() const
Definition: ToolChain.h:285
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:375
const llvm::Triple & getTriple() const
Definition: ToolChain.h:271
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:1317
virtual VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const
On Windows, returns the MSVC compatibility version.
Definition: ToolChain.cpp:1493
virtual void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add arguments to use MCU GCC toolchain includes.
Definition: ToolChain.cpp:1476
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:1221
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:1142
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:1137
virtual SanitizerMask getSupportedSanitizers() const
Return sanitizers which are available in this toolchain.
Definition: ToolChain.cpp:1438
Tool - Information on a specific compilation tool.
Definition: Tool.h:32
std::string getInputFilename(const InputInfo &Input) const override
Some toolchains need to modify the file name, for example to replace the extension for object files w...
Definition: Cuda.cpp:1106
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use system-specific CUDA includes.
Definition: Cuda.cpp:1094
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: Cuda.cpp:1227
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:1204
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: Cuda.cpp:1237
void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add arguments to use MCU GCC toolchain includes.
Definition: Cuda.cpp:1232
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: Cuda.cpp:948
VersionTuple computeMSVCVersion(const Driver *D, const llvm::opt::ArgList &Args) const override
On Windows, returns the MSVC compatibility version.
Definition: Cuda.cpp:1250
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: Cuda.cpp:1209
Tool * buildLinker() const override
Definition: Cuda.cpp:1185
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: Cuda.cpp:1213
Tool * buildAssembler() const override
Definition: Cuda.cpp:1181
Tool * SelectTool(const JobAction &JA) const override
Choose a tool to use to handle the action JA.
Definition: Cuda.cpp:1193
llvm::DenormalMode getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType=nullptr) const override
Returns the output denormal handling type in the default floating point environment for the given FPT...
Definition: Cuda.cpp:1080
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: Cuda.cpp:1120
CudaInstallationDetector CudaInstallation
Definition: Cuda.h:203
Tool * buildAssembler() const override
Definition: Cuda.cpp:1173
Tool * buildLinker() const override
Definition: Cuda.cpp:1177
virtual Expected< SmallVector< std::string > > getSystemGPUArchs(const llvm::opt::ArgList &Args) const override
Uses nvptx-arch tool to get arch of the system GPU.
Definition: Cuda.cpp:914
static void AddSYCLIncludeArgs(const clang::driver::Driver &Driver, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args)
Definition: SYCL.cpp:1681
void getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, std::vector< StringRef > &Features)
Definition: Cuda.cpp:761
void AddStaticDeviceLibsPostLinking(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CmdArgs, StringRef Arch, StringRef Target, bool isBitCodeSDL, bool postClangLink)
void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, StringRef BitcodeSuffix, const llvm::Triple &Triple, const ToolChain &HostTC)
void AddStaticDeviceLibsLinking(Compilation &C, const Tool &T, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CmdArgs, StringRef Arch, StringRef Target, bool isBitCodeSDL)
void addDirectoryList(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *ArgName, const char *EnvVar)
EnvVar is split by system delimiter for environment variables.
bool willEmitRemarks(const llvm::opt::ArgList &Args)
bool LE(InterpState &S, CodePtr OpPC)
Definition: Interp.h:883
std::string toString(const til::SExpr *E)
The JSON file list parser is used to communicate input to InstallAPI.
const char * CudaArchToVirtualArchString(CudaArch A)
Definition: Cuda.cpp:160
CudaArch
Definition: Cuda.h:54
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:223
CudaArch StringToCudaArch(llvm::StringRef S)
Definition: Cuda.cpp:169
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Definition: Cuda.cpp:178
static bool IsNVIDIAGpuArch(CudaArch A)
Definition: Cuda.h:140
const char * CudaVersionToString(CudaVersion V)
Definition: Cuda.cpp:50
CudaVersion
Definition: Cuda.h:20
const char * CudaArchToString(CudaArch A)
Definition: Cuda.cpp:151
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25