clang  19.0.0git
SYCL.cpp
Go to the documentation of this file.
1 //===--- SYCL.cpp - SYCL Tool and ToolChain Implementations -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "SYCL.h"
9 #include "CommonArgs.h"
10 #include "clang/Driver/Action.h"
12 #include "clang/Driver/Driver.h"
14 #include "clang/Driver/InputInfo.h"
15 #include "clang/Driver/Options.h"
16 #include "llvm/Option/Option.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/Path.h"
20 #include <algorithm>
21 #include <sstream>
22 
23 using namespace clang::driver;
24 using namespace clang::driver::toolchains;
25 using namespace clang::driver::tools;
26 using namespace clang;
27 using namespace llvm::opt;
28 
30  : D(D), InstallationCandidates() {
31  InstallationCandidates.emplace_back(D.Dir + "/..");
32 }
33 
35  llvm::SmallVector<llvm::SmallString<128>, 4> &DeviceLibPaths) const {
36  for (const auto &IC : InstallationCandidates) {
37  llvm::SmallString<128> InstallLibPath(IC.str());
38  InstallLibPath.append("/lib");
39  DeviceLibPaths.emplace_back(InstallLibPath);
40  }
41 
42  DeviceLibPaths.emplace_back(D.SysRoot + "/lib");
43 }
44 
45 void SYCLInstallationDetector::print(llvm::raw_ostream &OS) const {
46  if (!InstallationCandidates.size())
47  return;
48  OS << "SYCL Installation Candidates: \n";
49  for (const auto &IC : InstallationCandidates) {
50  OS << IC << "\n";
51  }
52 }
53 
54 static void addFPGATimingDiagnostic(std::unique_ptr<Command> &Cmd,
55  Compilation &C) {
56  const char *Msg = C.getArgs().MakeArgString(
57  "The FPGA image generated during this compile contains timing violations "
58  "and may produce functional errors if used. Refer to the Intel oneAPI "
59  "DPC++ FPGA Optimization Guide section on Timing Failures for more "
60  "information.");
61  Cmd->addDiagForErrorCode(/*ErrorCode*/ 42, Msg);
62  Cmd->addExitForErrorCode(/*ErrorCode*/ 42, false);
63 }
64 
66  std::unique_ptr<Command> InputCommand,
67  const InputInfoList &InputFiles,
68  const InputInfo &Output, const Tool *T,
69  StringRef Increment, StringRef Ext,
70  StringRef ParallelJobs) {
71  // Construct llvm-foreach command.
72  // The llvm-foreach command looks like this:
73  // llvm-foreach --in-file-list=a.list --in-replace='{}' -- echo '{}'
74  ArgStringList ForeachArgs;
75  std::string OutputFileName(T->getToolChain().getInputFilename(Output));
76  ForeachArgs.push_back(C.getArgs().MakeArgString("--out-ext=" + Ext));
77  for (auto &I : InputFiles) {
78  std::string Filename(T->getToolChain().getInputFilename(I));
79  ForeachArgs.push_back(
80  C.getArgs().MakeArgString("--in-file-list=" + Filename));
81  ForeachArgs.push_back(
82  C.getArgs().MakeArgString("--in-replace=" + Filename));
83  }
84 
85  ForeachArgs.push_back(
86  C.getArgs().MakeArgString("--out-file-list=" + OutputFileName));
87  ForeachArgs.push_back(
88  C.getArgs().MakeArgString("--out-replace=" + OutputFileName));
89  if (!Increment.empty())
90  ForeachArgs.push_back(
91  C.getArgs().MakeArgString("--out-increment=" + Increment));
92  if (!ParallelJobs.empty())
93  ForeachArgs.push_back(C.getArgs().MakeArgString("--jobs=" + ParallelJobs));
94 
95  if (C.getDriver().isSaveTempsEnabled()) {
96  SmallString<128> OutputDirName;
97  if (C.getDriver().isSaveTempsObj()) {
98  OutputDirName =
99  T->getToolChain().GetFilePath(OutputFileName.c_str()).c_str();
100  llvm::sys::path::remove_filename(OutputDirName);
101  }
102  // Use the current dir if the `GetFilePath` returned en empty string, which
103  // is the case when the `OutputFileName` does not contain any directory
104  // information, or if in CWD mode. This is necessary for `llvm-foreach`, as
105  // it would disregard the parameter without it. Otherwise append separator.
106  if (OutputDirName.empty())
107  llvm::sys::path::native(OutputDirName = "./");
108  else
109  OutputDirName.append(llvm::sys::path::get_separator());
110  ForeachArgs.push_back(
111  C.getArgs().MakeArgString("--out-dir=" + OutputDirName));
112  }
113 
114  // If fsycl-dump-device-code is passed, put the PTX files
115  // into the path provided in fsycl-dump-device-code.
116  if (T->getToolChain().getTriple().isNVPTX() &&
117  C.getDriver().isDumpDeviceCodeEnabled() && Ext == "s") {
118  SmallString<128> OutputDir;
119 
120  Arg *DumpDeviceCodeArg =
121  C.getArgs().getLastArg(options::OPT_fsycl_dump_device_code_EQ);
122 
123  OutputDir = (DumpDeviceCodeArg ? DumpDeviceCodeArg->getValue() : "");
124 
125  // If the output directory path is empty, put the PTX files in the
126  // current directory.
127  if (OutputDir.empty())
128  llvm::sys::path::native(OutputDir = "./");
129  else
130  OutputDir.append(llvm::sys::path::get_separator());
131  ForeachArgs.push_back(C.getArgs().MakeArgString("--out-dir=" + OutputDir));
132  }
133 
134  ForeachArgs.push_back(C.getArgs().MakeArgString("--"));
135  ForeachArgs.push_back(
136  C.getArgs().MakeArgString(InputCommand->getExecutable()));
137 
138  for (auto &Arg : InputCommand->getArguments())
139  ForeachArgs.push_back(Arg);
140 
141  SmallString<128> ForeachPath(C.getDriver().Dir);
142  llvm::sys::path::append(ForeachPath, "llvm-foreach");
143  const char *Foreach = C.getArgs().MakeArgString(ForeachPath);
144 
145  auto Cmd = std::make_unique<Command>(JA, *T, ResponseFileSupport::None(),
146  Foreach, ForeachArgs, std::nullopt);
147  // FIXME: Add the FPGA specific timing diagnostic to the foreach call.
148  // The foreach call obscures the return codes from the tool it is calling
149  // to the compiler itself.
151  C.addCommand(std::move(Cmd));
152 }
153 
155  return !C.getArgs().hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
156  /*default=*/true);
157 }
158 
159 // Return whether to use native bfloat16 library.
160 static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
161  bool &UseNative) {
162  const llvm::opt::ArgList &Args = C.getArgs();
163  bool NeedLibs = false;
164 
165  // spir64 target is actually JIT compilation, so we defer selection of
166  // bfloat16 libraries to runtime. For AOT we need libraries, but skip
167  // for Nvidia.
168  NeedLibs =
169  Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
170  UseNative = false;
171  if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
172  C.hasOffloadToolChain<Action::OFK_SYCL>()) {
173  ArgStringList TargArgs;
174  auto ToolChains = C.getOffloadToolChains<Action::OFK_SYCL>();
175  // Match up the toolchain with the incoming Triple so we are grabbing the
176  // expected arguments to scrutinize.
177  for (auto TI = ToolChains.first, TE = ToolChains.second; TI != TE; ++TI) {
178  llvm::Triple SYCLTriple = TI->second->getTriple();
179  if (SYCLTriple == Triple) {
180  const toolchains::SYCLToolChain *SYCLTC =
181  static_cast<const toolchains::SYCLToolChain *>(TI->second);
182  SYCLTC->TranslateBackendTargetArgs(Triple, Args, TargArgs);
183  break;
184  }
185  }
186 
187  auto checkBF = [](StringRef Device) {
188  return Device.starts_with("pvc") || Device.starts_with("ats");
189  };
190 
191  std::string Params;
192  for (const auto &Arg : TargArgs) {
193  Params += " ";
194  Params += Arg;
195  }
196  size_t DevicesPos = Params.find("-device ");
197  UseNative = false;
198  if (DevicesPos != std::string::npos) {
199  UseNative = true;
200  std::istringstream Devices(Params.substr(DevicesPos + 8));
201  for (std::string S; std::getline(Devices, S, ',');)
202  UseNative &= checkBF(S);
203  }
204  }
205  return NeedLibs;
206 }
207 
209 SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
210  bool IsSpirvAOT) {
211  SmallVector<std::string, 8> LibraryList;
212  const llvm::opt::ArgList &Args = C.getArgs();
213 
214  struct DeviceLibOptInfo {
215  StringRef DeviceLibName;
216  StringRef DeviceLibOption;
217  };
218 
219  bool NoDeviceLibs = false;
220  // Currently, all SYCL device libraries will be linked by default. Linkage
221  // of "internal" libraries cannot be affected via -fno-sycl-device-lib.
222  llvm::StringMap<bool> DeviceLibLinkInfo = {
223  {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true},
224  {"libimf-fp32", true}, {"libimf-fp64", true}, {"libimf-bf16", true},
225  {"libm-bfloat16", true}, {"internal", true}};
226  if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ,
227  options::OPT_fno_sycl_device_lib_EQ)) {
228  if (A->getValues().size() == 0)
229  C.getDriver().Diag(diag::warn_drv_empty_joined_argument)
230  << A->getAsString(Args);
231  else {
232  if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ))
233  NoDeviceLibs = true;
234 
235  for (StringRef Val : A->getValues()) {
236  if (Val == "all") {
237  for (const auto &K : DeviceLibLinkInfo.keys())
238  DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal");
239  break;
240  }
241  auto LinkInfoIter = DeviceLibLinkInfo.find(Val);
242  if (LinkInfoIter == DeviceLibLinkInfo.end() || Val == "internal") {
243  // TODO: Move the diagnostic to the SYCL section of
244  // Driver::CreateOffloadingDeviceToolChains() to minimize code
245  // duplication.
246  C.getDriver().Diag(diag::err_drv_unsupported_option_argument)
247  << A->getSpelling() << Val;
248  }
249  DeviceLibLinkInfo[Val] = true && !NoDeviceLibs;
250  }
251  }
252  }
253  using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;
254 
255  const SYCLDeviceLibsList SYCLDeviceWrapperLibs = {
256  {"libsycl-crt", "libc"},
257  {"libsycl-complex", "libm-fp32"},
258  {"libsycl-complex-fp64", "libm-fp64"},
259  {"libsycl-cmath", "libm-fp32"},
260  {"libsycl-cmath-fp64", "libm-fp64"},
261 #if defined(_WIN32)
262  {"libsycl-msvc-math", "libm-fp32"},
263 #endif
264  {"libsycl-imf", "libimf-fp32"},
265  {"libsycl-imf-fp64", "libimf-fp64"},
266  {"libsycl-imf-bf16", "libimf-bf16"}};
267  // For AOT compilation, we need to link sycl_device_fallback_libs as
268  // default too.
269  const SYCLDeviceLibsList SYCLDeviceFallbackLibs = {
270  {"libsycl-fallback-cassert", "libc"},
271  {"libsycl-fallback-cstring", "libc"},
272  {"libsycl-fallback-complex", "libm-fp32"},
273  {"libsycl-fallback-complex-fp64", "libm-fp64"},
274  {"libsycl-fallback-cmath", "libm-fp32"},
275  {"libsycl-fallback-cmath-fp64", "libm-fp64"},
276  {"libsycl-fallback-imf", "libimf-fp32"},
277  {"libsycl-fallback-imf-fp64", "libimf-fp64"},
278  {"libsycl-fallback-imf-bf16", "libimf-bf16"}};
279  const SYCLDeviceLibsList SYCLDeviceBfloat16FallbackLib = {
280  {"libsycl-fallback-bfloat16", "libm-bfloat16"}};
281  const SYCLDeviceLibsList SYCLDeviceBfloat16NativeLib = {
282  {"libsycl-native-bfloat16", "libm-bfloat16"}};
283  // ITT annotation libraries are linked in separately whenever the device
284  // code instrumentation is enabled.
285  const SYCLDeviceLibsList SYCLDeviceAnnotationLibs = {
286  {"libsycl-itt-user-wrappers", "internal"},
287  {"libsycl-itt-compiler-wrappers", "internal"},
288  {"libsycl-itt-stubs", "internal"}};
289 #if !defined(_WIN32)
290  const SYCLDeviceLibsList SYCLDeviceSanitizerLibs = {
291  {"libsycl-sanitizer", "internal"}};
292 #endif
293  bool IsWindowsMSVCEnv =
294  C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
295  bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver();
296  StringRef LibSuffix = ".bc";
297  if (TargetTriple.isNVPTX() ||
298  (TargetTriple.isSPIR() &&
299  TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga))
300  // For NVidia or FPGA, we are unbundling objects.
301  LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o";
302  if (IsNewOffload)
303  // For new offload model, we use packaged .bc files.
304  LibSuffix = IsWindowsMSVCEnv ? ".new.obj" : ".new.o";
305  auto addLibraries = [&](const SYCLDeviceLibsList &LibsList) {
306  for (const DeviceLibOptInfo &Lib : LibsList) {
307  if (!DeviceLibLinkInfo[Lib.DeviceLibOption])
308  continue;
309  SmallString<128> LibName(Lib.DeviceLibName);
310  llvm::sys::path::replace_extension(LibName, LibSuffix);
311  LibraryList.push_back(Args.MakeArgString(LibName));
312  }
313  };
314 
315  addLibraries(SYCLDeviceWrapperLibs);
316  if (IsSpirvAOT || TargetTriple.isNVPTX())
317  addLibraries(SYCLDeviceFallbackLibs);
318 
319  bool NativeBfloatLibs;
320  bool NeedBfloatLibs = selectBfloatLibs(TargetTriple, C, NativeBfloatLibs);
321  if (NeedBfloatLibs) {
322  // Add native or fallback bfloat16 library.
323  if (NativeBfloatLibs)
324  addLibraries(SYCLDeviceBfloat16NativeLib);
325  else
326  addLibraries(SYCLDeviceBfloat16FallbackLib);
327  }
328 
329  if (Args.hasFlag(options::OPT_fsycl_instrument_device_code,
330  options::OPT_fno_sycl_instrument_device_code, true))
331  addLibraries(SYCLDeviceAnnotationLibs);
332 
333 #if !defined(_WIN32)
334  if (Arg *A = Args.getLastArg(options::OPT_fsanitize_EQ,
335  options::OPT_fno_sanitize_EQ)) {
336  if (A->getOption().matches(options::OPT_fsanitize_EQ) &&
337  A->getValues().size() == 1) {
338  std::string SanitizeVal = A->getValue();
339  if (SanitizeVal == "address")
340  addLibraries(SYCLDeviceSanitizerLibs);
341  }
342  } else {
343  // User can pass -fsanitize=address to device compiler via
344  // -Xsycl-target-frontend, sanitize device library must be
345  // linked with user's device image if so.
346  bool IsDeviceAsanEnabled = false;
347  auto SyclFEArg = Args.getAllArgValues(options::OPT_Xsycl_frontend);
348  IsDeviceAsanEnabled = (std::count(SyclFEArg.begin(), SyclFEArg.end(),
349  "-fsanitize=address") > 0);
350  if (!IsDeviceAsanEnabled) {
351  auto SyclFEArgEq = Args.getAllArgValues(options::OPT_Xsycl_frontend_EQ);
352  IsDeviceAsanEnabled = (std::count(SyclFEArgEq.begin(), SyclFEArgEq.end(),
353  "-fsanitize=address") > 0);
354  }
355 
356  // User can also enable asan for SYCL device via -Xarch_device option.
357  if (!IsDeviceAsanEnabled) {
358  auto DeviceArchVals = Args.getAllArgValues(options::OPT_Xarch_device);
359  for (auto DArchVal : DeviceArchVals) {
360  if (DArchVal.find("-fsanitize=address") != std::string::npos) {
361  IsDeviceAsanEnabled = true;
362  break;
363  }
364  }
365  }
366 
367  if (IsDeviceAsanEnabled)
368  addLibraries(SYCLDeviceSanitizerLibs);
369  }
370 #endif
371  return LibraryList;
372 }
373 
374 // The list should match pre-built SYCL device library files located in
375 // compiler package. Once we add or remove any SYCL device library files,
376 // the list should be updated accordingly.
378  "bfloat16",
379  "crt",
380  "cmath",
381  "cmath-fp64",
382  "complex",
383  "complex-fp64",
384 #if defined(_WIN32)
385  "msvc-math",
386 #else
387  "sanitizer",
388 #endif
389  "imf",
390  "imf-fp64",
391  "imf-bf16",
392  "itt-compiler-wrappers",
393  "itt-stubs",
394  "itt-user-wrappers",
395  "fallback-cassert",
396  "fallback-cstring",
397  "fallback-cmath",
398  "fallback-cmath-fp64",
399  "fallback-complex",
400  "fallback-complex-fp64",
401  "fallback-imf",
402  "fallback-imf-fp64",
403  "fallback-imf-bf16",
404  "fallback-bfloat16",
405  "native-bfloat16"};
406 
407 const char *SYCL::Linker::constructLLVMLinkCommand(
408  Compilation &C, const JobAction &JA, const InputInfo &Output,
409  const ArgList &Args, StringRef SubArchName, StringRef OutputFilePrefix,
410  const InputInfoList &InputFiles) const {
411  // Split inputs into libraries which have 'archive' type and other inputs
412  // which can be either objects or list files. Object files are linked together
413  // in a usual way, but the libraries/list files need to be linked differently.
414  // We need to fetch only required symbols from the libraries. With the current
415  // llvm-link command line interface that can be achieved with two step
416  // linking: at the first step we will link objects into an intermediate
417  // partially linked image which on the second step will be linked with the
418  // libraries with --only-needed option.
419  ArgStringList Opts;
420  ArgStringList Objs;
421  ArgStringList Libs;
422  // Add the input bc's created by compile step.
423  // When offloading, the input file(s) could be from unbundled partially
424  // linked archives. The unbundled information is a list of files and not
425  // an actual object/archive. Take that list and pass those to the linker
426  // instead of the original object.
428  bool IsRDC = !shouldDoPerObjectFileLinking(C);
429  const bool IsSYCLNativeCPU = isSYCLNativeCPU(this->getToolChain());
430  auto isNoRDCDeviceCodeLink = [&](const InputInfo &II) {
431  if (IsRDC)
432  return false;
433  if (II.getType() != clang::driver::types::TY_LLVM_BC)
434  return false;
435  if (InputFiles.size() != 2)
436  return false;
437  return &II == &InputFiles[1];
438  };
439  auto isSYCLDeviceLib = [&](const InputInfo &II) {
440  const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
441  const bool IsNVPTX = this->getToolChain().getTriple().isNVPTX();
442  const bool IsFPGA = this->getToolChain().getTriple().isSPIR() &&
443  this->getToolChain().getTriple().getSubArch() ==
444  llvm::Triple::SPIRSubArch_fpga;
445  StringRef LibPostfix = ".bc";
446  if (IsNVPTX || IsFPGA) {
447  LibPostfix = ".o";
448  if (HostTC->getTriple().isWindowsMSVCEnvironment() &&
449  C.getDriver().IsCLMode())
450  LibPostfix = ".obj";
451  }
452  StringRef NewLibPostfix = ".new.o";
453  if (HostTC->getTriple().isWindowsMSVCEnvironment() &&
454  C.getDriver().IsCLMode())
455  NewLibPostfix = ".new.obj";
456  std::string FileName = this->getToolChain().getInputFilename(II);
457  StringRef InputFilename = llvm::sys::path::filename(FileName);
458  if (IsNVPTX || IsSYCLNativeCPU) {
459  // Linking SYCL Device libs requires libclc as well as libdevice
460  if ((InputFilename.find("libspirv") != InputFilename.npos ||
461  InputFilename.find("libdevice") != InputFilename.npos))
462  return true;
463  if (IsNVPTX) {
464  LibPostfix = ".cubin";
465  NewLibPostfix = ".new.cubin";
466  }
467  }
468  StringRef LibSyclPrefix("libsycl-");
469  if (!InputFilename.starts_with(LibSyclPrefix) ||
470  !InputFilename.ends_with(LibPostfix) ||
471  InputFilename.ends_with(NewLibPostfix))
472  return false;
473  // Skip the prefix "libsycl-"
474  std::string PureLibName =
475  InputFilename.substr(LibSyclPrefix.size()).str();
476  if (isNoRDCDeviceCodeLink(II)) {
477  // Skip the final - until the . because we linked all device libs into a
478  // single BC in a previous action so we have a temp file name.
479  auto FinalDashPos = PureLibName.find_last_of('-');
480  auto DotPos = PureLibName.find_last_of('.');
481  assert((FinalDashPos != std::string::npos &&
482  DotPos != std::string::npos) &&
483  "Unexpected filename");
484  PureLibName =
485  PureLibName.substr(0, FinalDashPos) + PureLibName.substr(DotPos);
486  }
487  for (const auto &L : SYCLDeviceLibList) {
488  std::string DeviceLibName(L);
489  DeviceLibName.append(LibPostfix);
490  if (StringRef(PureLibName) == DeviceLibName ||
491  (IsNVPTX && StringRef(PureLibName).starts_with(L)))
492  return true;
493  }
494  return false;
495  };
496  size_t InputFileNum = InputFiles.size();
497  bool LinkSYCLDeviceLibs = (InputFileNum >= 2);
498  LinkSYCLDeviceLibs = LinkSYCLDeviceLibs && !isSYCLDeviceLib(InputFiles[0]);
499  for (size_t Idx = 1; Idx < InputFileNum; ++Idx)
500  LinkSYCLDeviceLibs =
501  LinkSYCLDeviceLibs && isSYCLDeviceLib(InputFiles[Idx]);
502  if (LinkSYCLDeviceLibs) {
503  Opts.push_back("-only-needed");
504  }
505  // Go through the Inputs to the link. When a listfile is encountered, we
506  // know it is an unbundled generated list.
507  for (const auto &II : InputFiles) {
508  std::string FileName = getToolChain().getInputFilename(II);
509  if (II.getType() == types::TY_Tempfilelist) {
510  if (IsRDC) {
511  // Pass the unbundled list with '@' to be processed.
512  Libs.push_back(C.getArgs().MakeArgString("@" + FileName));
513  } else {
514  assert(InputFiles.size() == 2 &&
515  "Unexpected inputs for no-RDC with temp file list");
516  // If we're in no-RDC mode and the input is a temp file list,
517  // we want to link multiple object files each against device libs,
518  // so we should consider this input as an object and not pass '@'.
519  Objs.push_back(C.getArgs().MakeArgString(FileName));
520  }
521  } else if (II.getType() == types::TY_Archive && !LinkSYCLDeviceLibs) {
522  Libs.push_back(C.getArgs().MakeArgString(FileName));
523  } else
524  Objs.push_back(C.getArgs().MakeArgString(FileName));
525  }
526  } else
527  for (const auto &II : InputFiles)
528  Objs.push_back(
529  C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
530 
531  // Get llvm-link path.
532  SmallString<128> ExecPath(C.getDriver().Dir);
533  llvm::sys::path::append(ExecPath, "llvm-link");
534  const char *Exec = C.getArgs().MakeArgString(ExecPath);
535 
536  auto AddLinkCommand = [this, &C, &JA, Exec](const char *Output,
537  const ArgStringList &Inputs,
538  const ArgStringList &Options) {
539  ArgStringList CmdArgs;
540  llvm::copy(Options, std::back_inserter(CmdArgs));
541  llvm::copy(Inputs, std::back_inserter(CmdArgs));
542  CmdArgs.push_back("-o");
543  CmdArgs.push_back(Output);
544  // TODO: temporary workaround for a problem with warnings reported by
545  // llvm-link when driver links LLVM modules with empty modules
546  CmdArgs.push_back("--suppress-warnings");
547  C.addCommand(std::make_unique<Command>(JA, *this,
549  Exec, CmdArgs, std::nullopt));
550  };
551 
552  // Add an intermediate output file.
553  const char *OutputFileName =
554  C.getArgs().MakeArgString(getToolChain().getInputFilename(Output));
555 
556  if (Libs.empty())
557  AddLinkCommand(OutputFileName, Objs, Opts);
558  else {
559  assert(Opts.empty() && "unexpected options");
560 
561  // Linker will be invoked twice if inputs contain libraries. First time we
562  // will link input objects into an intermediate temporary file, and on the
563  // second invocation intermediate temporary object will be linked with the
564  // libraries, but now only required symbols will be added to the final
565  // output.
566  std::string TempFile =
567  C.getDriver().GetTemporaryPath(OutputFilePrefix.str() + "-link", "bc");
568  const char *LinkOutput = C.addTempFile(C.getArgs().MakeArgString(TempFile));
569  AddLinkCommand(LinkOutput, Objs, {});
570 
571  // Now invoke linker for the second time to link required symbols from the
572  // input libraries.
573  ArgStringList LinkInputs{LinkOutput};
574  llvm::copy(Libs, std::back_inserter(LinkInputs));
575  AddLinkCommand(OutputFileName, LinkInputs, {"--only-needed"});
576  }
577  return OutputFileName;
578 }
579 
580 void SYCL::Linker::constructLlcCommand(Compilation &C, const JobAction &JA,
581  const InputInfo &Output,
582  const char *InputFileName) const {
583  // Construct llc command.
584  // The output is an object file.
585  ArgStringList LlcArgs{"-filetype=obj", "-o", Output.getFilename(),
586  InputFileName};
587  SmallString<128> LlcPath(C.getDriver().Dir);
588  llvm::sys::path::append(LlcPath, "llc");
589  const char *Llc = C.getArgs().MakeArgString(LlcPath);
590  C.addCommand(std::make_unique<Command>(JA, *this,
592  LlcArgs, std::nullopt));
593 }
594 
595 // For SYCL the inputs of the linker job are SPIR-V binaries and output is
596 // a single SPIR-V binary. Input can also be bitcode when specified by
597 // the user.
598 void SYCL::Linker::ConstructJob(Compilation &C, const JobAction &JA,
599  const InputInfo &Output,
600  const InputInfoList &Inputs,
601  const ArgList &Args,
602  const char *LinkingOutput) const {
603 
604  assert((getToolChain().getTriple().isSPIROrSPIRV() ||
605  getToolChain().getTriple().isNVPTX() ||
606  getToolChain().getTriple().isAMDGCN() || isSYCLNativeCPU(Args)) &&
607  "Unsupported target");
608 
609  std::string SubArchName =
610  std::string(getToolChain().getTriple().getArchName());
611 
612  // Prefix for temporary file name.
613  std::string Prefix = std::string(llvm::sys::path::stem(SubArchName));
614 
615  // For CUDA, we want to link all BC files before resuming the normal
616  // compilation path
617  if (getToolChain().getTriple().isNVPTX() ||
618  getToolChain().getTriple().isAMDGCN()) {
619  InputInfoList NvptxInputs;
620  for (const auto &II : Inputs) {
621  if (!II.isFilename())
622  continue;
623  NvptxInputs.push_back(II);
624  }
625 
626  constructLLVMLinkCommand(C, JA, Output, Args, SubArchName, Prefix,
627  NvptxInputs);
628  return;
629  }
630 
631  InputInfoList SpirvInputs;
632  for (const auto &II : Inputs) {
633  if (!II.isFilename())
634  continue;
635  SpirvInputs.push_back(II);
636  }
637 
638  constructLLVMLinkCommand(C, JA, Output, Args, SubArchName, Prefix,
639  SpirvInputs);
640 }
641 
642 static const char *makeExeName(Compilation &C, StringRef Name) {
643  llvm::SmallString<8> ExeName(Name);
644  const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
645  if (HostTC->getTriple().isWindowsMSVCEnvironment())
646  ExeName.append(".exe");
647  return C.getArgs().MakeArgString(ExeName);
648 }
649 
650 void SYCL::fpga::BackendCompiler::constructOpenCLAOTCommand(
651  Compilation &C, const JobAction &JA, const InputInfo &Output,
652  const InputInfoList &Inputs, const ArgList &Args) const {
653  // Construct opencl-aot command. This is used for FPGA AOT compilations
654  // when performing emulation. Input file will be a SPIR-V binary which
655  // will be compiled to an aocx file.
656  InputInfoList ForeachInputs;
657  InputInfoList FPGADepFiles;
658  ArgStringList CmdArgs{"-device=fpga_fast_emu"};
659 
660  for (const auto &II : Inputs) {
661  if (II.getType() == types::TY_TempAOCOfilelist ||
662  II.getType() == types::TY_FPGA_Dependencies ||
663  II.getType() == types::TY_FPGA_Dependencies_List)
664  continue;
665  if (II.getType() == types::TY_Tempfilelist)
666  ForeachInputs.push_back(II);
667  CmdArgs.push_back(
668  C.getArgs().MakeArgString("-spv=" + Twine(II.getFilename())));
669  }
670  CmdArgs.push_back(
671  C.getArgs().MakeArgString("-ir=" + Twine(Output.getFilename())));
672 
673  StringRef ForeachExt = "aocx";
674  if (Arg *A = Args.getLastArg(options::OPT_fsycl_link_EQ))
675  if (A->getValue() == StringRef("early"))
676  ForeachExt = "aocr";
677 
678  // Add any implied arguments before user defined arguments.
679  const toolchains::SYCLToolChain &TC =
680  static_cast<const toolchains::SYCLToolChain &>(getToolChain());
681  const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
682  llvm::Triple CPUTriple("spir64_x86_64");
683  TC.AddImpliedTargetArgs(CPUTriple, Args, CmdArgs, JA, *HostTC);
684  // Add the target args passed in
685  TC.TranslateBackendTargetArgs(CPUTriple, Args, CmdArgs);
686  TC.TranslateLinkerTargetArgs(CPUTriple, Args, CmdArgs);
687 
688  SmallString<128> ExecPath(
689  getToolChain().GetProgramPath(makeExeName(C, "opencl-aot")));
690  const char *Exec = C.getArgs().MakeArgString(ExecPath);
691  auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
692  Exec, CmdArgs, std::nullopt);
693  if (!ForeachInputs.empty()) {
694  StringRef ParallelJobs =
695  Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
696  constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
697  this, "", ForeachExt, ParallelJobs);
698  } else
699  C.addCommand(std::move(Cmd));
700 }
701 
702 void SYCL::fpga::BackendCompiler::ConstructJob(
703  Compilation &C, const JobAction &JA, const InputInfo &Output,
704  const InputInfoList &Inputs, const ArgList &Args,
705  const char *LinkingOutput) const {
706  assert(getToolChain().getTriple().isSPIROrSPIRV() && "Unsupported target");
707 
708  // Grab the -Xsycl-target* options.
709  const toolchains::SYCLToolChain &TC =
710  static_cast<const toolchains::SYCLToolChain &>(getToolChain());
711  ArgStringList TargetArgs;
712  TC.TranslateBackendTargetArgs(TC.getTriple(), Args, TargetArgs);
713 
714  // When performing emulation compilations for FPGA AOT, we want to use
715  // opencl-aot instead of aoc.
716  if (C.getDriver().IsFPGAEmulationMode()) {
717  constructOpenCLAOTCommand(C, JA, Output, Inputs, Args);
718  return;
719  }
720 
721  InputInfoList ForeachInputs;
722  InputInfoList FPGADepFiles;
723  StringRef CreatedReportName;
724  ArgStringList CmdArgs{"-o", Output.getFilename()};
725  for (const auto &II : Inputs) {
726  std::string Filename(II.getFilename());
727  if (II.getType() == types::TY_Tempfilelist)
728  ForeachInputs.push_back(II);
729  if (II.getType() == types::TY_TempAOCOfilelist)
730  // Add any FPGA library lists. These come in as special tempfile lists.
731  CmdArgs.push_back(Args.MakeArgString(Twine("-library-list=") + Filename));
732  else if (II.getType() == types::TY_FPGA_Dependencies ||
733  II.getType() == types::TY_FPGA_Dependencies_List)
734  FPGADepFiles.push_back(II);
735  else
736  CmdArgs.push_back(C.getArgs().MakeArgString(Filename));
737  // Check for any AOCR input, if found use that as the project report name
738  StringRef Ext(llvm::sys::path::extension(Filename));
739  if (Ext.empty())
740  continue;
741  if (getToolChain().LookupTypeForExtension(Ext.drop_front()) ==
742  types::TY_FPGA_AOCR) {
743  // Keep the base of the .aocr file name. Input file is a temporary,
744  // so we are stripping off the additional naming information for a
745  // cleaner name. The suffix being stripped from the name is the
746  // added temporary string and the extension.
747  StringRef SuffixFormat("-XXXXXX.aocr");
748  SmallString<128> NameBase(
749  Filename.substr(0, Filename.length() - SuffixFormat.size()));
750  NameBase.append(".prj");
751  CreatedReportName =
752  Args.MakeArgString(llvm::sys::path::filename(NameBase));
753  }
754  }
755  CmdArgs.push_back("-sycl");
756 
757  StringRef ForeachExt = "aocx";
758  if (Arg *A = Args.getLastArg(options::OPT_fsycl_link_EQ))
759  if (A->getValue() == StringRef("early")) {
760  CmdArgs.push_back("-rtl");
761  ForeachExt = "aocr";
762  }
763 
764  for (auto *A : Args) {
765  // Any input file is assumed to have a dependency file associated and
766  // the report folder can also be named based on the first input.
767  if (A->getOption().getKind() != Option::InputClass)
768  continue;
769  SmallString<128> ArgName(A->getSpelling());
770  StringRef Ext(llvm::sys::path::extension(ArgName));
771  if (Ext.empty())
772  continue;
773  types::ID Ty = getToolChain().LookupTypeForExtension(Ext.drop_front());
774  if (Ty == types::TY_INVALID)
775  continue;
776  if (types::isSrcFile(Ty) || Ty == types::TY_Object) {
777  // The project report is created in CWD, so strip off any directory
778  // information if provided with the input file.
779  StringRef TrimmedArgName = llvm::sys::path::filename(ArgName);
780  if (types::isSrcFile(Ty)) {
781  SmallString<128> DepName(
782  C.getDriver().getFPGATempDepFile(std::string(TrimmedArgName)));
783  if (!DepName.empty())
784  FPGADepFiles.push_back(InputInfo(types::TY_Dependencies,
785  Args.MakeArgString(DepName),
786  Args.MakeArgString(DepName)));
787  }
788  if (CreatedReportName.empty()) {
789  // Project report should be saved into CWD, so strip off any
790  // directory information if provided with the input file.
791  llvm::sys::path::replace_extension(ArgName, "prj");
792  CreatedReportName = Args.MakeArgString(ArgName);
793  }
794  }
795  }
796 
797  // Add any dependency files.
798  if (!FPGADepFiles.empty()) {
799  SmallString<128> DepOpt("-dep-files=");
800  for (unsigned I = 0; I < FPGADepFiles.size(); ++I) {
801  if (I)
802  DepOpt += ',';
803  if (FPGADepFiles[I].getType() == types::TY_FPGA_Dependencies_List)
804  DepOpt += "@";
805  DepOpt += FPGADepFiles[I].getFilename();
806  }
807  CmdArgs.push_back(C.getArgs().MakeArgString(DepOpt));
808  }
809 
810  // Depending on output file designations, set the report folder
811  SmallString<128> ReportOptArg;
812  if (Arg *FinalOutput = Args.getLastArg(options::OPT_o, options::OPT__SLASH_o,
813  options::OPT__SLASH_Fe)) {
814  SmallString<128> FN(FinalOutput->getValue());
815  // For "-o file.xxx" where the option value has an extension, if the
816  // extension is one of .a .o .out .lib .obj .exe, the output project
817  // directory name will be file.proj which omits the extension. Otherwise
818  // the output project directory name will be file.xxx.prj which keeps
819  // the original extension.
820  StringRef Ext = llvm::sys::path::extension(FN);
821  SmallVector<StringRef, 6> Exts = {".o", ".a", ".out",
822  ".obj", ".lib", ".exe"};
823  if (std::find(Exts.begin(), Exts.end(), Ext) != Exts.end())
824  llvm::sys::path::replace_extension(FN, "prj");
825  else
826  FN.append(".prj");
827  const char *FolderName = Args.MakeArgString(FN);
828  ReportOptArg += FolderName;
829  } else {
830  // Default output directory should match default output executable name
831  ReportOptArg += "a.prj";
832  }
833  if (!ReportOptArg.empty())
834  CmdArgs.push_back(C.getArgs().MakeArgString(
835  Twine("-output-report-folder=") + ReportOptArg));
836 
837  // Add any implied arguments before user defined arguments.
838  const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
839  TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA,
840  *HostTC);
841 
842  // Add -Xsycl-target* options.
843  TC.TranslateBackendTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
844  TC.TranslateLinkerTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
845 
846  // Look for -reuse-exe=XX option
847  if (Arg *A = Args.getLastArg(options::OPT_reuse_exe_EQ)) {
848  Args.ClaimAllArgs(options::OPT_reuse_exe_EQ);
849  CmdArgs.push_back(Args.MakeArgString(A->getAsString(Args)));
850  }
851 
852  SmallString<128> ExecPath(
853  getToolChain().GetProgramPath(makeExeName(C, "aoc")));
854  const char *Exec = C.getArgs().MakeArgString(ExecPath);
855  auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
856  Exec, CmdArgs, std::nullopt);
858  if (!ForeachInputs.empty()) {
859  StringRef ParallelJobs =
860  Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
861  constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
862  this, ReportOptArg, ForeachExt, ParallelJobs);
863  } else
864  C.addCommand(std::move(Cmd));
865 }
866 
867 struct OclocInfo {
868  const char *DeviceName;
869  const char *PackageName;
870  const char *Version;
872 };
873 
874 // The PVCDevices data structure is organized by device name, with the
875 // corresponding ocloc split release, version and possible Hex representations
876 // of various PVC devices. This information is gathered from the following:
877 // https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_base.inl
878 // https://github.com/intel/compute-runtime/blob/master/shared/source/dll/devices/devices_additional.inl
879 static OclocInfo PVCDevices[] = {
880  {"pvc-sdv", "gen12+", "12.60.1", {}},
881  {"pvc",
882  "gen12+",
883  "12.60.7",
884  {0x0BD0, 0x0BD5, 0x0BD6, 0x0BD7, 0x0BD8, 0x0BD9, 0x0BDA, 0x0BDB}}};
885 
886 // Determine if any of the given arguments contain any PVC based values for
887 // the -device option.
888 static bool hasPVCDevice(const ArgStringList &CmdArgs) {
889  bool DeviceSeen = false;
890  StringRef DeviceArg;
891  for (StringRef Arg : CmdArgs) {
892  // -device <arg> comes in as a single arg, split up all potential space
893  // separated values.
894  SmallVector<StringRef> SplitArgs;
895  Arg.split(SplitArgs, ' ');
896  for (StringRef SplitArg : SplitArgs) {
897  if (DeviceSeen) {
898  DeviceArg = SplitArg;
899  break;
900  }
901  if (SplitArg == "-device")
902  DeviceSeen = true;
903  }
904  if (DeviceSeen)
905  break;
906  }
907  if (DeviceArg.empty())
908  return false;
909 
910  // Go through all of the arguments to '-device' and determine if any of these
911  // are pvc based. We only match literal values and will not find a match
912  // when ranges or wildcards are used.
913  // Here we parse the targets, tokenizing via ','
914  SmallVector<StringRef> SplitArgs;
915  DeviceArg.split(SplitArgs, ",");
916  for (const auto &SingleArg : SplitArgs) {
917  StringRef OclocTarget;
918  // Handle shortened versions.
919  bool CheckShortVersion = true;
920  for (auto Char : SingleArg.str()) {
921  if (!std::isdigit(Char) && Char != '.') {
922  CheckShortVersion = false;
923  break;
924  }
925  }
926  // Check for device, version or hex (literal values)
927  for (unsigned int I = 0; I < std::size(PVCDevices); I++) {
928  if (SingleArg.equals_insensitive(PVCDevices[I].DeviceName) ||
929  SingleArg.equals_insensitive(PVCDevices[I].Version))
930  return true;
931  for (int HexVal : PVCDevices[I].HexValues) {
932  int Value = 0;
933  if (!SingleArg.getAsInteger(0, Value) && Value == HexVal)
934  return true;
935  }
936  if (CheckShortVersion &&
937  StringRef(PVCDevices[I].Version).starts_with(SingleArg))
938  return true;
939  }
940  }
941  return false;
942 }
943 
944 static llvm::StringMap<StringRef> GRFModeFlagMap{
945  {"auto", "-ze-intel-enable-auto-large-GRF-mode"},
946  {"small", "-ze-intel-128-GRF-per-thread"},
947  {"large", "-ze-opt-large-register-file"}};
948 
949 StringRef SYCL::gen::getGenGRFFlag(StringRef GRFMode) {
950  if (!GRFModeFlagMap.contains(GRFMode))
951  return "";
952  return GRFModeFlagMap[GRFMode];
953 }
954 
955 void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C,
956  const JobAction &JA,
957  const InputInfo &Output,
958  const InputInfoList &Inputs,
959  const ArgList &Args,
960  const char *LinkingOutput) const {
961  assert(getToolChain().getTriple().isSPIROrSPIRV() && "Unsupported target");
962  ArgStringList CmdArgs{"-output", Output.getFilename()};
963  InputInfoList ForeachInputs;
964  for (const auto &II : Inputs) {
965  CmdArgs.push_back("-file");
966  std::string Filename(II.getFilename());
967  if (II.getType() == types::TY_Tempfilelist)
968  ForeachInputs.push_back(II);
969  CmdArgs.push_back(C.getArgs().MakeArgString(Filename));
970  }
971  // The next line prevents ocloc from modifying the image name
972  CmdArgs.push_back("-output_no_suffix");
973  CmdArgs.push_back("-spirv_input");
974  StringRef Device = JA.getOffloadingArch();
975 
976  // Add -Xsycl-target* options.
977  const toolchains::SYCLToolChain &TC =
978  static_cast<const toolchains::SYCLToolChain &>(getToolChain());
979  const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
980  TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA,
981  *HostTC, Device);
982  TC.TranslateBackendTargetArgs(getToolChain().getTriple(), Args, CmdArgs,
983  Device);
984  TC.TranslateLinkerTargetArgs(getToolChain().getTriple(), Args, CmdArgs,
985  Device);
986  SmallString<128> ExecPath(
987  getToolChain().GetProgramPath(makeExeName(C, "ocloc")));
988  const char *Exec = C.getArgs().MakeArgString(ExecPath);
989  auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
990  Exec, CmdArgs, std::nullopt);
991  if (!ForeachInputs.empty()) {
992  StringRef ParallelJobs =
993  Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
994  constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
995  this, "", "out", ParallelJobs);
996  } else
997  C.addCommand(std::move(Cmd));
998 }
999 
1000 StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
1001  StringRef Device;
1002  Device =
1003  llvm::StringSwitch<StringRef>(DeviceName)
1004  .Cases("intel_gpu_bdw", "intel_gpu_8_0_0", "bdw")
1005  .Cases("intel_gpu_skl", "intel_gpu_9_0_9", "skl")
1006  .Cases("intel_gpu_kbl", "intel_gpu_9_1_9", "kbl")
1007  .Cases("intel_gpu_cfl", "intel_gpu_9_2_9", "cfl")
1008  .Cases("intel_gpu_apl", "intel_gpu_bxt", "intel_gpu_9_3_0", "apl")
1009  .Cases("intel_gpu_glk", "intel_gpu_9_4_0", "glk")
1010  .Cases("intel_gpu_whl", "intel_gpu_9_5_0", "whl")
1011  .Cases("intel_gpu_aml", "intel_gpu_9_6_0", "aml")
1012  .Cases("intel_gpu_cml", "intel_gpu_9_7_0", "cml")
1013  .Cases("intel_gpu_icllp", "intel_gpu_icl", "intel_gpu_11_0_0",
1014  "icllp")
1015  .Cases("intel_gpu_ehl", "intel_gpu_jsl", "intel_gpu_11_2_0", "ehl")
1016  .Cases("intel_gpu_tgllp", "intel_gpu_tgl", "intel_gpu_12_0_0",
1017  "tgllp")
1018  .Cases("intel_gpu_rkl", "intel_gpu_12_1_0", "rkl")
1019  .Cases("intel_gpu_adl_s", "intel_gpu_rpl_s", "intel_gpu_12_2_0",
1020  "adl_s")
1021  .Cases("intel_gpu_adl_p", "intel_gpu_12_3_0", "adl_p")
1022  .Cases("intel_gpu_adl_n", "intel_gpu_12_4_0", "adl_n")
1023  .Cases("intel_gpu_dg1", "intel_gpu_12_10_0", "dg1")
1024  .Cases("intel_gpu_acm_g10", "intel_gpu_dg2_g10", "intel_gpu_12_55_8",
1025  "acm_g10")
1026  .Cases("intel_gpu_acm_g11", "intel_gpu_dg2_g11", "intel_gpu_12_56_5",
1027  "acm_g11")
1028  .Cases("intel_gpu_acm_g12", "intel_gpu_dg2_g12", "intel_gpu_12_57_0",
1029  "acm_g12")
1030  .Cases("intel_gpu_pvc", "intel_gpu_12_60_7", "pvc")
1031  .Cases("intel_gpu_pvc_vg", "intel_gpu_12_61_7", "pvc_vg")
1032  .Cases("intel_gpu_mtl_u", "intel_gpu_mtl_s", "intel_gpu_arl_u",
1033  "intel_gpu_arl_s", "intel_gpu_12_70_4", "mtl_u")
1034  .Cases("intel_gpu_mtl_h", "intel_gpu_12_71_4", "mtl_h")
1035  .Cases("intel_gpu_arl_h", "intel_gpu_12_74_4", "arl_h")
1036  .Cases("intel_gpu_bmg_g21", "intel_gpu_20_1_4", "bmg_g21")
1037  .Cases("intel_gpu_lnl_m", "intel_gpu_20_4_4", "lnl_m")
1038  .Case("nvidia_gpu_sm_50", "sm_50")
1039  .Case("nvidia_gpu_sm_52", "sm_52")
1040  .Case("nvidia_gpu_sm_53", "sm_53")
1041  .Case("nvidia_gpu_sm_60", "sm_60")
1042  .Case("nvidia_gpu_sm_61", "sm_61")
1043  .Case("nvidia_gpu_sm_62", "sm_62")
1044  .Case("nvidia_gpu_sm_70", "sm_70")
1045  .Case("nvidia_gpu_sm_72", "sm_72")
1046  .Case("nvidia_gpu_sm_75", "sm_75")
1047  .Case("nvidia_gpu_sm_80", "sm_80")
1048  .Case("nvidia_gpu_sm_86", "sm_86")
1049  .Case("nvidia_gpu_sm_87", "sm_87")
1050  .Case("nvidia_gpu_sm_89", "sm_89")
1051  .Case("nvidia_gpu_sm_90", "sm_90")
1052  .Case("nvidia_gpu_sm_90a", "sm_90a")
1053  .Case("amd_gpu_gfx700", "gfx700")
1054  .Case("amd_gpu_gfx701", "gfx701")
1055  .Case("amd_gpu_gfx702", "gfx702")
1056  .Case("amd_gpu_gfx801", "gfx801")
1057  .Case("amd_gpu_gfx802", "gfx802")
1058  .Case("amd_gpu_gfx803", "gfx803")
1059  .Case("amd_gpu_gfx805", "gfx805")
1060  .Case("amd_gpu_gfx810", "gfx810")
1061  .Case("amd_gpu_gfx900", "gfx900")
1062  .Case("amd_gpu_gfx902", "gfx902")
1063  .Case("amd_gpu_gfx904", "gfx904")
1064  .Case("amd_gpu_gfx906", "gfx906")
1065  .Case("amd_gpu_gfx908", "gfx908")
1066  .Case("amd_gpu_gfx909", "gfx909")
1067  .Case("amd_gpu_gfx90a", "gfx90a")
1068  .Case("amd_gpu_gfx90c", "gfx90c")
1069  .Case("amd_gpu_gfx940", "gfx940")
1070  .Case("amd_gpu_gfx941", "gfx941")
1071  .Case("amd_gpu_gfx942", "gfx942")
1072  .Case("amd_gpu_gfx1010", "gfx1010")
1073  .Case("amd_gpu_gfx1011", "gfx1011")
1074  .Case("amd_gpu_gfx1012", "gfx1012")
1075  .Case("amd_gpu_gfx1013", "gfx1013")
1076  .Case("amd_gpu_gfx1030", "gfx1030")
1077  .Case("amd_gpu_gfx1031", "gfx1031")
1078  .Case("amd_gpu_gfx1032", "gfx1032")
1079  .Case("amd_gpu_gfx1033", "gfx1033")
1080  .Case("amd_gpu_gfx1034", "gfx1034")
1081  .Case("amd_gpu_gfx1035", "gfx1035")
1082  .Case("amd_gpu_gfx1036", "gfx1036")
1083  .Case("amd_gpu_gfx1100", "gfx1100")
1084  .Case("amd_gpu_gfx1101", "gfx1101")
1085  .Case("amd_gpu_gfx1102", "gfx1102")
1086  .Case("amd_gpu_gfx1103", "gfx1103")
1087  .Case("amd_gpu_gfx1150", "gfx1150")
1088  .Case("amd_gpu_gfx1151", "gfx1151")
1089  .Case("amd_gpu_gfx1200", "gfx1200")
1090  .Case("amd_gpu_gfx1201", "gfx1201")
1091  .Default("");
1092  return Device;
1093 }
1094 
1096  SmallString<64> Macro;
1097  StringRef Ext = llvm::StringSwitch<StringRef>(DeviceName)
1098  .Case("bdw", "INTEL_GPU_BDW")
1099  .Case("skl", "INTEL_GPU_SKL")
1100  .Case("kbl", "INTEL_GPU_KBL")
1101  .Case("cfl", "INTEL_GPU_CFL")
1102  .Case("apl", "INTEL_GPU_APL")
1103  .Case("glk", "INTEL_GPU_GLK")
1104  .Case("whl", "INTEL_GPU_WHL")
1105  .Case("aml", "INTEL_GPU_AML")
1106  .Case("cml", "INTEL_GPU_CML")
1107  .Case("icllp", "INTEL_GPU_ICLLP")
1108  .Case("ehl", "INTEL_GPU_EHL")
1109  .Case("tgllp", "INTEL_GPU_TGLLP")
1110  .Case("rkl", "INTEL_GPU_RKL")
1111  .Case("adl_s", "INTEL_GPU_ADL_S")
1112  .Case("adl_p", "INTEL_GPU_ADL_P")
1113  .Case("adl_n", "INTEL_GPU_ADL_N")
1114  .Case("dg1", "INTEL_GPU_DG1")
1115  .Case("acm_g10", "INTEL_GPU_ACM_G10")
1116  .Case("acm_g11", "INTEL_GPU_ACM_G11")
1117  .Case("acm_g12", "INTEL_GPU_ACM_G12")
1118  .Case("pvc", "INTEL_GPU_PVC")
1119  .Case("pvc_vg", "INTEL_GPU_PVC_VG")
1120  .Case("mtl_u", "INTEL_GPU_MTL_U")
1121  .Case("mtl_h", "INTEL_GPU_MTL_H")
1122  .Case("arl_h", "INTEL_GPU_ARL_H")
1123  .Case("bmg_g21", "INTEL_GPU_BMG_G21")
1124  .Case("lnl_m", "INTEL_GPU_LNL_M")
1125  .Case("sm_50", "NVIDIA_GPU_SM_50")
1126  .Case("sm_52", "NVIDIA_GPU_SM_52")
1127  .Case("sm_53", "NVIDIA_GPU_SM_53")
1128  .Case("sm_60", "NVIDIA_GPU_SM_60")
1129  .Case("sm_61", "NVIDIA_GPU_SM_61")
1130  .Case("sm_62", "NVIDIA_GPU_SM_62")
1131  .Case("sm_70", "NVIDIA_GPU_SM_70")
1132  .Case("sm_72", "NVIDIA_GPU_SM_72")
1133  .Case("sm_75", "NVIDIA_GPU_SM_75")
1134  .Case("sm_80", "NVIDIA_GPU_SM_80")
1135  .Case("sm_86", "NVIDIA_GPU_SM_86")
1136  .Case("sm_87", "NVIDIA_GPU_SM_87")
1137  .Case("sm_89", "NVIDIA_GPU_SM_89")
1138  .Case("sm_90", "NVIDIA_GPU_SM_90")
1139  .Case("sm_90a", "NVIDIA_GPU_SM_90A")
1140  .Case("gfx700", "AMD_GPU_GFX700")
1141  .Case("gfx701", "AMD_GPU_GFX701")
1142  .Case("gfx702", "AMD_GPU_GFX702")
1143  .Case("gfx801", "AMD_GPU_GFX801")
1144  .Case("gfx802", "AMD_GPU_GFX802")
1145  .Case("gfx803", "AMD_GPU_GFX803")
1146  .Case("gfx805", "AMD_GPU_GFX805")
1147  .Case("gfx810", "AMD_GPU_GFX810")
1148  .Case("gfx900", "AMD_GPU_GFX900")
1149  .Case("gfx902", "AMD_GPU_GFX902")
1150  .Case("gfx904", "AMD_GPU_GFX904")
1151  .Case("gfx906", "AMD_GPU_GFX906")
1152  .Case("gfx908", "AMD_GPU_GFX908")
1153  .Case("gfx909", "AMD_GPU_GFX909")
1154  .Case("gfx90a", "AMD_GPU_GFX90A")
1155  .Case("gfx90c", "AMD_GPU_GFX90C")
1156  .Case("gfx940", "AMD_GPU_GFX940")
1157  .Case("gfx941", "AMD_GPU_GFX941")
1158  .Case("gfx942", "AMD_GPU_GFX942")
1159  .Case("gfx1010", "AMD_GPU_GFX1010")
1160  .Case("gfx1011", "AMD_GPU_GFX1011")
1161  .Case("gfx1012", "AMD_GPU_GFX1012")
1162  .Case("gfx1013", "AMD_GPU_GFX1013")
1163  .Case("gfx1030", "AMD_GPU_GFX1030")
1164  .Case("gfx1031", "AMD_GPU_GFX1031")
1165  .Case("gfx1032", "AMD_GPU_GFX1032")
1166  .Case("gfx1033", "AMD_GPU_GFX1033")
1167  .Case("gfx1034", "AMD_GPU_GFX1034")
1168  .Case("gfx1035", "AMD_GPU_GFX1035")
1169  .Case("gfx1036", "AMD_GPU_GFX1036")
1170  .Case("gfx1100", "AMD_GPU_GFX1100")
1171  .Case("gfx1101", "AMD_GPU_GFX1101")
1172  .Case("gfx1102", "AMD_GPU_GFX1102")
1173  .Case("gfx1103", "AMD_GPU_GFX1103")
1174  .Case("gfx1150", "AMD_GPU_GFX1150")
1175  .Case("gfx1151", "AMD_GPU_GFX1151")
1176  .Case("gfx1200", "AMD_GPU_GFX1200")
1177  .Case("gfx1201", "AMD_GPU_GFX1201")
1178  .Default("");
1179  if (!Ext.empty()) {
1180  Macro = "__SYCL_TARGET_";
1181  Macro += Ext;
1182  Macro += "__";
1183  }
1184  return Macro;
1185 }
1186 
1187 void SYCL::x86_64::BackendCompiler::ConstructJob(
1188  Compilation &C, const JobAction &JA, const InputInfo &Output,
1189  const InputInfoList &Inputs, const ArgList &Args,
1190  const char *LinkingOutput) const {
1191  ArgStringList CmdArgs;
1192  CmdArgs.push_back(Args.MakeArgString(Twine("-o=") + Output.getFilename()));
1193  CmdArgs.push_back("--device=cpu");
1194  InputInfoList ForeachInputs;
1195  for (const auto &II : Inputs) {
1196  std::string Filename(II.getFilename());
1197  if (II.getType() == types::TY_Tempfilelist)
1198  ForeachInputs.push_back(II);
1199  CmdArgs.push_back(Args.MakeArgString(Filename));
1200  }
1201  // Add -Xsycl-target* options.
1202  const toolchains::SYCLToolChain &TC =
1203  static_cast<const toolchains::SYCLToolChain &>(getToolChain());
1204  const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
1205  TC.AddImpliedTargetArgs(getToolChain().getTriple(), Args, CmdArgs, JA,
1206  *HostTC);
1207  TC.TranslateBackendTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
1208  TC.TranslateLinkerTargetArgs(getToolChain().getTriple(), Args, CmdArgs);
1209  SmallString<128> ExecPath(
1210  getToolChain().GetProgramPath(makeExeName(C, "opencl-aot")));
1211  const char *Exec = C.getArgs().MakeArgString(ExecPath);
1212  auto Cmd = std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
1213  Exec, CmdArgs, std::nullopt);
1214  if (!ForeachInputs.empty()) {
1215  StringRef ParallelJobs =
1216  Args.getLastArgValue(options::OPT_fsycl_max_parallel_jobs_EQ);
1217  constructLLVMForeachCommand(C, JA, std::move(Cmd), ForeachInputs, Output,
1218  this, "", "out", ParallelJobs);
1219  } else
1220  C.addCommand(std::move(Cmd));
1221 }
1222 
1223 // Unsupported options for device compilation
1224 // -fcf-protection, -fsanitize, -fprofile-generate, -fprofile-instr-generate
1225 // -ftest-coverage, -fcoverage-mapping, -fcreate-profile, -fprofile-arcs
1226 // -fcs-profile-generate -forder-file-instrumentation, --coverage
1227 static std::vector<OptSpecifier> getUnsupportedOpts(void) {
1228  std::vector<OptSpecifier> UnsupportedOpts = {
1229  options::OPT_fsanitize_EQ,
1230  options::OPT_fcf_protection_EQ,
1231  options::OPT_fprofile_generate,
1232  options::OPT_fprofile_generate_EQ,
1233  options::OPT_fno_profile_generate,
1234  options::OPT_ftest_coverage,
1235  options::OPT_fno_test_coverage,
1236  options::OPT_fcoverage_mapping,
1237  options::OPT_fno_coverage_mapping,
1238  options::OPT_coverage,
1239  options::OPT_fprofile_instr_generate,
1240  options::OPT_fprofile_instr_generate_EQ,
1241  options::OPT_fprofile_arcs,
1242  options::OPT_fno_profile_arcs,
1243  options::OPT_fno_profile_instr_generate,
1244  options::OPT_fcreate_profile,
1245  options::OPT_fprofile_instr_use,
1246  options::OPT_fprofile_instr_use_EQ,
1247  options::OPT_forder_file_instrumentation,
1248  options::OPT_fcs_profile_generate,
1249  options::OPT_fcs_profile_generate_EQ};
1250  return UnsupportedOpts;
1251 }
1252 
1253 SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple,
1254  const ToolChain &HostTC, const ArgList &Args)
1255  : ToolChain(D, Triple, Args), HostTC(HostTC),
1256  IsSYCLNativeCPU(Triple == HostTC.getTriple()) {
1257  // Lookup binaries into the driver directory, this is used to
1258  // discover the clang-offload-bundler executable.
1259  getProgramPaths().push_back(getDriver().Dir);
1260 
1261  // Diagnose unsupported options only once.
1262  for (OptSpecifier Opt : getUnsupportedOpts()) {
1263  if (const Arg *A = Args.getLastArg(Opt)) {
1264  // All sanitizer options are not currently supported, except
1265  // AddressSanitizer
1266  if (A->getOption().getID() == options::OPT_fsanitize_EQ &&
1267  A->getValues().size() == 1) {
1268  std::string SanitizeVal = A->getValue();
1269  if (SanitizeVal == "address")
1270  continue;
1271  }
1272  D.Diag(clang::diag::warn_drv_unsupported_option_for_target)
1273  << A->getAsString(Args) << getTriple().str();
1274  }
1275  }
1276 }
1277 
1279  const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
1280  Action::OffloadKind DeviceOffloadingKind) const {
1281  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
1282 }
1283 
1284 llvm::opt::DerivedArgList *
1285 SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
1286  StringRef BoundArch,
1287  Action::OffloadKind DeviceOffloadKind) const {
1288  DerivedArgList *DAL =
1289  HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
1290 
1291  bool IsNewDAL = false;
1292  if (!DAL) {
1293  DAL = new DerivedArgList(Args.getBaseArgs());
1294  IsNewDAL = true;
1295  }
1296 
1297  for (Arg *A : Args) {
1298  // Filter out any options we do not want to pass along to the device
1299  // compilation.
1300  auto Opt(A->getOption());
1301  bool Unsupported = false;
1302  for (OptSpecifier UnsupportedOpt : getUnsupportedOpts()) {
1303  if (Opt.matches(UnsupportedOpt)) {
1304  if (Opt.getID() == options::OPT_fsanitize_EQ &&
1305  A->getValues().size() == 1) {
1306  std::string SanitizeVal = A->getValue();
1307  if (SanitizeVal == "address") {
1308  if (IsNewDAL)
1309  DAL->append(A);
1310  continue;
1311  }
1312  }
1313  if (!IsNewDAL)
1314  DAL->eraseArg(Opt.getID());
1315  Unsupported = true;
1316  }
1317  }
1318  if (Unsupported)
1319  continue;
1320  if (IsNewDAL)
1321  DAL->append(A);
1322  }
1323  // Strip out -O0 for FPGA Hardware device compilation.
1324  if (getDriver().IsFPGAHWMode() &&
1325  getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga)
1326  DAL->eraseArg(options::OPT_O0);
1327 
1328  const OptTable &Opts = getDriver().getOpts();
1329  if (!BoundArch.empty()) {
1330  DAL->eraseArg(options::OPT_march_EQ);
1331  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
1332  BoundArch);
1333  }
1334  return DAL;
1335 }
1336 
1337 static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args,
1338  llvm::opt::ArgStringList &CmdArgs) {
1339  // Tokenize the string.
1340  SmallVector<const char *, 8> TargetArgs;
1341  llvm::BumpPtrAllocator A;
1342  llvm::StringSaver S(A);
1343  llvm::cl::TokenizeGNUCommandLine(ArgString, S, TargetArgs);
1344  for (StringRef TA : TargetArgs)
1345  CmdArgs.push_back(Args.MakeArgString(TA));
1346 }
1347 
1348 void SYCLToolChain::TranslateGPUTargetOpt(const llvm::opt::ArgList &Args,
1349  llvm::opt::ArgStringList &CmdArgs,
1350  OptSpecifier Opt_EQ) const {
1351  for (auto *A : Args) {
1352  if (A->getOption().matches(Opt_EQ)) {
1353  if (auto GpuDevice =
1354  tools::SYCL::gen::isGPUTarget<tools::SYCL::gen::AmdGPU>(
1355  A->getValue())) {
1356  StringRef ArgString;
1357  SmallString<64> OffloadArch("--offload-arch=");
1358  OffloadArch += GpuDevice->data();
1359  ArgString = OffloadArch;
1360  parseTargetOpts(ArgString, Args, CmdArgs);
1361  A->claim();
1362  }
1363  }
1364  }
1365 }
1366 
1368  const llvm::Triple &Triple,
1369  StringRef Device, StringRef ArgString,
1370  const llvm::opt::Arg *A) {
1371  // Suggest users passing GRF backend opts on PVC to use
1372  // -ftarget-register-alloc-mode and
1373 
1374  if (!ArgString.contains("-device pvc") && !Device.contains("pvc"))
1375  return;
1376  // Make sure to only warn for once for gen targets as the translate
1377  // options tree is called twice but only the second time has the
1378  // device set.
1379  if (Triple.isSPIR() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
1380  !A->isClaimed())
1381  return;
1382  for (const auto &[Mode, Flag] : GRFModeFlagMap)
1383  if (ArgString.contains(Flag))
1384  D.Diag(diag::warn_drv_ftarget_register_alloc_mode_pvc) << Flag << Mode;
1385 }
1386 
1387 // Expects a specific type of option (e.g. -Xsycl-target-backend) and will
1388 // extract the arguments.
1389 void SYCLToolChain::TranslateTargetOpt(const llvm::Triple &Triple,
1390  const llvm::opt::ArgList &Args,
1391  llvm::opt::ArgStringList &CmdArgs,
1392  OptSpecifier Opt, OptSpecifier Opt_EQ,
1393  StringRef Device) const {
1394  for (auto *A : Args) {
1395  bool OptNoTriple;
1396  OptNoTriple = A->getOption().matches(Opt);
1397  if (A->getOption().matches(Opt_EQ)) {
1398  // Passing device args: -X<Opt>=<triple> -opt=val.
1399  StringRef GenDevice = SYCL::gen::resolveGenDevice(A->getValue());
1400  bool IsGenTriple = Triple.isSPIR() &&
1401  Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen;
1402  if (IsGenTriple) {
1403  if (Device != GenDevice && !Device.empty())
1404  continue;
1405  if (getDriver().MakeSYCLDeviceTriple(A->getValue()) != Triple &&
1406  GenDevice.empty())
1407  // Triples do not match, but only skip when we know we are not
1408  // comparing against intel_gpu_*
1409  continue;
1410  if (getDriver().MakeSYCLDeviceTriple(A->getValue()) == Triple &&
1411  !Device.empty())
1412  // Triples match, but we are expecting a specific device to be set.
1413  continue;
1414  } else if (getDriver().MakeSYCLDeviceTriple(A->getValue()) != Triple)
1415  continue;
1416  } else if (!OptNoTriple)
1417  // Don't worry about any of the other args, we only want to pass what is
1418  // passed in -X<Opt>
1419  continue;
1420 
1421  // Add the argument from -X<Opt>
1422  StringRef ArgString;
1423  if (OptNoTriple) {
1424  // With multiple -fsycl-targets, a triple is required so we know where
1425  // the options should go.
1426  const Arg *TargetArg = Args.getLastArg(options::OPT_fsycl_targets_EQ);
1427  if (TargetArg && TargetArg->getValues().size() != 1) {
1428  getDriver().Diag(diag::err_drv_Xsycl_target_missing_triple)
1429  << A->getSpelling();
1430  continue;
1431  }
1432  // No triple, so just add the argument.
1433  ArgString = A->getValue();
1434  } else
1435  // Triple found, add the next argument in line.
1436  ArgString = A->getValue(1);
1437  WarnForDeprecatedBackendOpts(getDriver(), Triple, Device, ArgString, A);
1438  parseTargetOpts(ArgString, Args, CmdArgs);
1439  A->claim();
1440  }
1441 }
1442 
1443 void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
1444  const llvm::opt::ArgList &Args,
1445  llvm::opt::ArgStringList &CmdArgs,
1446  const JobAction &JA,
1447  const ToolChain &HostTC,
1448  StringRef Device) const {
1449  // Current implied args are for debug information and disabling of
1450  // optimizations. They are passed along to the respective areas as follows:
1451  // FPGA: -g -cl-opt-disable
1452  // Default device AOT: -g -cl-opt-disable
1453  // Default device JIT: -g (-O0 is handled by the runtime)
1454  // GEN: -options "-g -O0"
1455  // CPU: "--bo=-g -cl-opt-disable"
1456  llvm::opt::ArgStringList BeArgs;
1457  // Per-device argument vector storing the device name and the backend argument
1458  // string
1460  bool IsGen = Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen;
1461  bool IsJIT =
1462  Triple.isSPIROrSPIRV() && Triple.getSubArch() == llvm::Triple::NoSubArch;
1463  if (IsGen && Args.hasArg(options::OPT_fsycl_fp64_conv_emu))
1464  BeArgs.push_back("-ze-fp64-gen-conv-emu");
1465  if (Arg *A = Args.getLastArg(options::OPT_g_Group, options::OPT__SLASH_Z7))
1466  if (!A->getOption().matches(options::OPT_g0))
1467  BeArgs.push_back("-g");
1468  // Only pass -cl-opt-disable for non-JIT, as the runtime
1469  // handles O0 for the JIT case.
1470  if (Triple.getSubArch() != llvm::Triple::NoSubArch)
1471  if (Arg *A = Args.getLastArg(options::OPT_O_Group))
1472  if (A->getOption().matches(options::OPT_O0))
1473  BeArgs.push_back("-cl-opt-disable");
1474  StringRef RegAllocModeOptName = "-ftarget-register-alloc-mode=";
1475  if (Arg *A = Args.getLastArg(options::OPT_ftarget_register_alloc_mode_EQ)) {
1476  StringRef RegAllocModeVal = A->getValue(0);
1477  auto ProcessElement = [&](StringRef Ele) {
1478  auto [DeviceName, RegAllocMode] = Ele.split(':');
1479  StringRef BackendOptName = SYCL::gen::getGenGRFFlag(RegAllocMode);
1480  bool IsDefault = RegAllocMode == "default";
1481  if (RegAllocMode.empty() || DeviceName != "pvc" ||
1482  (BackendOptName.empty() && !IsDefault)) {
1483  getDriver().Diag(diag::err_drv_unsupported_option_argument)
1484  << A->getSpelling() << Ele;
1485  }
1486  // "default" means "provide no specification to the backend", so
1487  // we don't need to do anything here.
1488  if (IsDefault)
1489  return;
1490  if (IsGen) {
1491  // For AOT, Use ocloc's per-device options flag with the correct ocloc
1492  // option to honor the user's specification.
1493  PerDeviceArgs.push_back(
1494  {DeviceName, Args.MakeArgString(BackendOptName)});
1495  } else if (IsJIT) {
1496  // For JIT, pass -ftarget-register-alloc-mode=Device:BackendOpt to
1497  // clang-offload-wrapper to be processed by the runtime.
1498  BeArgs.push_back(Args.MakeArgString(RegAllocModeOptName + DeviceName +
1499  ":" + BackendOptName));
1500  }
1501  };
1502  llvm::SmallVector<StringRef, 16> RegAllocModeArgs;
1503  RegAllocModeVal.split(RegAllocModeArgs, ',');
1504  for (StringRef Elem : RegAllocModeArgs)
1505  ProcessElement(Elem);
1506  } else if (!HostTC.getTriple().isWindowsMSVCEnvironment()) {
1507  // If -ftarget-register-alloc-mode is not specified, the default is
1508  // pvc:default on Windows and and pvc:auto otherwise when -device pvc is
1509  // provided by the user.
1510  ArgStringList TargArgs;
1511  Args.AddAllArgValues(TargArgs, options::OPT_Xs, options::OPT_Xs_separate);
1512  Args.AddAllArgValues(TargArgs, options::OPT_Xsycl_backend);
1513  // Check for any -device settings.
1514  if (IsJIT || Device == "pvc" || hasPVCDevice(TargArgs)) {
1515  StringRef DeviceName = "pvc";
1516  StringRef BackendOptName = SYCL::gen::getGenGRFFlag("auto");
1517  if (IsGen)
1518  PerDeviceArgs.push_back(
1519  {DeviceName, Args.MakeArgString(BackendOptName)});
1520  else if (IsJIT)
1521  BeArgs.push_back(Args.MakeArgString(RegAllocModeOptName + DeviceName +
1522  ":" + BackendOptName));
1523  }
1524  }
1525  // only pass -vpfp-relaxed for aoc with -fintelfpga and -fp-model=fast
1526  if (Args.hasArg(options::OPT_fintelfpga) && getDriver().IsFPGAHWMode() &&
1527  Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
1528  if (Arg *A = Args.getLastArg(options::OPT_ffp_model_EQ)) {
1529  if (StringRef(A->getValue()) == "fast")
1530  BeArgs.push_back("-vpfp-relaxed");
1531  }
1532  }
1533  if (IsGen) {
1534  // For GEN (spir64_gen) we have implied -device settings given usage
1535  // of intel_gpu_ as a target. Handle those here, and also check that no
1536  // other -device was passed, as that is a conflict.
1537  StringRef DepInfo = JA.getOffloadingArch();
1538  if (!DepInfo.empty()) {
1539  ArgStringList TargArgs;
1540  Args.AddAllArgValues(TargArgs, options::OPT_Xs, options::OPT_Xs_separate);
1541  Args.AddAllArgValues(TargArgs, options::OPT_Xsycl_backend);
1542  // For -Xsycl-target-backend=<triple> we need to scrutinize the triple
1543  for (auto *A : Args) {
1544  if (!A->getOption().matches(options::OPT_Xsycl_backend_EQ))
1545  continue;
1546  if (StringRef(A->getValue()).starts_with("intel_gpu"))
1547  TargArgs.push_back(A->getValue(1));
1548  }
1549  if (llvm::find_if(TargArgs, [&](auto Cur) {
1550  return !strncmp(Cur, "-device", sizeof("-device") - 1);
1551  }) != TargArgs.end()) {
1552  SmallString<64> Target("intel_gpu_");
1553  Target += DepInfo;
1554  getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
1555  << "-device" << Target;
1556  }
1557  CmdArgs.push_back("-device");
1558  CmdArgs.push_back(Args.MakeArgString(DepInfo));
1559  }
1560  // -ftarget-compile-fast AOT
1561  if (Args.hasArg(options::OPT_ftarget_compile_fast))
1562  BeArgs.push_back("-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'");
1563  // -ftarget-export-symbols
1564  if (Args.hasFlag(options::OPT_ftarget_export_symbols,
1565  options::OPT_fno_target_export_symbols, false))
1566  BeArgs.push_back("-library-compilation");
1567  } else if (IsJIT)
1568  // -ftarget-compile-fast JIT
1569  Args.AddLastArg(BeArgs, options::OPT_ftarget_compile_fast);
1570  if (IsGen) {
1571  for (auto [DeviceName, BackendArgStr] : PerDeviceArgs) {
1572  CmdArgs.push_back("-device_options");
1573  CmdArgs.push_back(Args.MakeArgString(DeviceName));
1574  CmdArgs.push_back(Args.MakeArgString(BackendArgStr));
1575  }
1576  }
1577  if (BeArgs.empty())
1578  return;
1579  if (Triple.getSubArch() == llvm::Triple::NoSubArch ||
1580  Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
1581  for (StringRef A : BeArgs)
1582  CmdArgs.push_back(Args.MakeArgString(A));
1583  return;
1584  }
1585  SmallString<128> BeOpt;
1586  if (IsGen)
1587  CmdArgs.push_back("-options");
1588  else
1589  BeOpt = "--bo=";
1590  for (unsigned I = 0; I < BeArgs.size(); ++I) {
1591  if (I)
1592  BeOpt += ' ';
1593  BeOpt += BeArgs[I];
1594  }
1595  CmdArgs.push_back(Args.MakeArgString(BeOpt));
1596 }
1597 
1599  const llvm::Triple &Triple, const llvm::opt::ArgList &Args,
1600  llvm::opt::ArgStringList &CmdArgs, StringRef Device) const {
1601  // Handle -Xs flags.
1602  for (auto *A : Args) {
1603  // When parsing the target args, the -Xs<opt> type option applies to all
1604  // target compilations is not associated with a specific triple. The
1605  // option can be used in 3 different ways:
1606  // -Xs -DFOO -Xs -DBAR
1607  // -Xs "-DFOO -DBAR"
1608  // -XsDFOO -XsDBAR
1609  // All of the above examples will pass -DFOO -DBAR to the backend compiler.
1610 
1611  // Do not add the -Xs to the default SYCL triple when we know we have
1612  // implied the setting.
1613  if ((A->getOption().matches(options::OPT_Xs) ||
1614  A->getOption().matches(options::OPT_Xs_separate)) &&
1615  Triple.getSubArch() == llvm::Triple::NoSubArch &&
1616  Triple.isSPIROrSPIRV() && getDriver().isSYCLDefaultTripleImplied())
1617  continue;
1618 
1619  if (A->getOption().matches(options::OPT_Xs)) {
1620  // Take the arg and create an option out of it.
1621  CmdArgs.push_back(Args.MakeArgString(Twine("-") + A->getValue()));
1622  WarnForDeprecatedBackendOpts(getDriver(), Triple, Device, A->getValue(),
1623  A);
1624  A->claim();
1625  continue;
1626  }
1627  if (A->getOption().matches(options::OPT_Xs_separate)) {
1628  StringRef ArgString(A->getValue());
1629  parseTargetOpts(ArgString, Args, CmdArgs);
1630  WarnForDeprecatedBackendOpts(getDriver(), Triple, Device, ArgString, A);
1631  A->claim();
1632  continue;
1633  }
1634  }
1635  // Do not process -Xsycl-target-backend for implied spir64/spirv64
1636  if (Triple.getSubArch() == llvm::Triple::NoSubArch &&
1637  Triple.isSPIROrSPIRV() && getDriver().isSYCLDefaultTripleImplied())
1638  return;
1639  // Handle -Xsycl-target-backend.
1640  TranslateTargetOpt(Triple, Args, CmdArgs, options::OPT_Xsycl_backend,
1641  options::OPT_Xsycl_backend_EQ, Device);
1642  TranslateGPUTargetOpt(Args, CmdArgs, options::OPT_fsycl_targets_EQ);
1643 }
1644 
1645 void SYCLToolChain::TranslateLinkerTargetArgs(const llvm::Triple &Triple,
1646  const llvm::opt::ArgList &Args,
1647  llvm::opt::ArgStringList &CmdArgs,
1648  StringRef Device) const {
1649  // Do not process -Xsycl-target-linker for implied spir64/spirv64
1650  if (Triple.getSubArch() == llvm::Triple::NoSubArch &&
1651  Triple.isSPIROrSPIRV() && getDriver().isSYCLDefaultTripleImplied())
1652  return;
1653  // Handle -Xsycl-target-linker.
1654  TranslateTargetOpt(Triple, Args, CmdArgs, options::OPT_Xsycl_linker,
1655  options::OPT_Xsycl_linker_EQ, Device);
1656 }
1657 
1659  if (getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga)
1660  return new tools::SYCL::fpga::BackendCompiler(*this);
1661  if (getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)
1662  return new tools::SYCL::gen::BackendCompiler(*this);
1663  // fall through is CPU.
1664  return new tools::SYCL::x86_64::BackendCompiler(*this);
1665 }
1666 
1668  assert(getTriple().isSPIROrSPIRV() || IsSYCLNativeCPU);
1669  return new tools::SYCL::Linker(*this);
1670 }
1671 
1672 void SYCLToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
1673  HostTC.addClangWarningOptions(CC1Args);
1674 }
1675 
1677 SYCLToolChain::GetCXXStdlibType(const ArgList &Args) const {
1678  return HostTC.GetCXXStdlibType(Args);
1679 }
1680 
1682  const ArgList &DriverArgs,
1683  ArgStringList &CC1Args) {
1684  // Add ../include/sycl, ../include/sycl/stl_wrappers and ../include (in that
1685  // order).
1686  SmallString<128> IncludePath(Driver.Dir);
1687  llvm::sys::path::append(IncludePath, "..");
1688  llvm::sys::path::append(IncludePath, "include");
1689  SmallString<128> SYCLPath(IncludePath);
1690  llvm::sys::path::append(SYCLPath, "sycl");
1691  // This is used to provide our wrappers around STL headers that provide
1692  // additional functions/template specializations when the user includes those
1693  // STL headers in their programs (e.g., <complex>).
1694  SmallString<128> STLWrappersPath(SYCLPath);
1695  llvm::sys::path::append(STLWrappersPath, "stl_wrappers");
1696  CC1Args.push_back("-internal-isystem");
1697  CC1Args.push_back(DriverArgs.MakeArgString(SYCLPath));
1698  CC1Args.push_back("-internal-isystem");
1699  CC1Args.push_back(DriverArgs.MakeArgString(STLWrappersPath));
1700  CC1Args.push_back("-internal-isystem");
1701  CC1Args.push_back(DriverArgs.MakeArgString(IncludePath));
1702 }
1703 
1704 void SYCLToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1705  ArgStringList &CC1Args) const {
1706  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1707 }
1708 
1710  ArgStringList &CC1Args) const {
1711  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
1712 }
1713 
1715  return SanitizerKind::Address;
1716 }
StringRef Filename
Definition: Format.cpp:2976
CompileCommand Cmd
llvm::MachO::Target Target
Definition: MachO.h:50
static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, bool &UseNative)
Definition: SYCL.cpp:160
static void WarnForDeprecatedBackendOpts(const Driver &D, const llvm::Triple &Triple, StringRef Device, StringRef ArgString, const llvm::opt::Arg *A)
Definition: SYCL.cpp:1367
static void addFPGATimingDiagnostic(std::unique_ptr< Command > &Cmd, Compilation &C)
Definition: SYCL.cpp:54
static const char * makeExeName(Compilation &C, StringRef Name)
Definition: SYCL.cpp:642
static llvm::StringMap< StringRef > GRFModeFlagMap
Definition: SYCL.cpp:944
static bool hasPVCDevice(const ArgStringList &CmdArgs)
Definition: SYCL.cpp:888
static OclocInfo PVCDevices[]
Definition: SYCL.cpp:879
static void parseTargetOpts(StringRef ArgString, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs)
Definition: SYCL.cpp:1337
static std::vector< OptSpecifier > getUnsupportedOpts(void)
Definition: SYCL.cpp:1227
static llvm::SmallVector< StringRef, 16 > SYCLDeviceLibList
Definition: SYCL.cpp:377
bool isDeviceOffloading(OffloadKind OKind) const
Definition: Action.h:233
const char * getOffloadingArch() const
Definition: Action.h:223
Compilation - A set of tasks to perform for a single driver invocation.
Definition: Compilation.h:45
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
Definition: Driver.h:77
std::string SysRoot
sysroot, if present
Definition: Driver.h:182
DiagnosticBuilder Diag(unsigned DiagID) const
Definition: Driver.h:146
const llvm::opt::OptTable & getOpts() const
Definition: Driver.h:401
std::string Dir
The path the driver executable was in, as invoked from the command line.
Definition: Driver.h:157
InputInfo - Wrapper for information about an input source.
Definition: InputInfo.h:22
const char * getFilename() const
Definition: InputInfo.h:83
void print(llvm::raw_ostream &OS) const
Definition: SYCL.cpp:45
void getSYCLDeviceLibPath(llvm::SmallVector< llvm::SmallString< 128 >, 4 > &DeviceLibPaths) const
Definition: SYCL.cpp:34
SYCLInstallationDetector(const Driver &D)
Definition: SYCL.cpp:29
ToolChain - Access to tools for a single platform.
Definition: ToolChain.h:92
const Driver & getDriver() const
Definition: ToolChain.h:269
virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const
Add warning options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:1149
virtual llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: ToolChain.h:375
const llvm::Triple & getTriple() const
Definition: ToolChain.h:271
virtual void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: ToolChain.cpp:1317
path_list & getProgramPaths()
Definition: ToolChain.h:314
virtual CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const
Definition: ToolChain.cpp:1221
virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const
Add options that need to be passed to cc1 for this target.
Definition: ToolChain.cpp:1142
virtual void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
Add the clang cc1 arguments for system include paths.
Definition: ToolChain.cpp:1137
Tool - Information on a specific compilation tool.
Definition: Tool.h:32
static void AddSYCLIncludeArgs(const clang::driver::Driver &Driver, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args)
Definition: SYCL.cpp:1681
Tool * buildLinker() const override
Definition: SYCL.cpp:1667
void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override
Add options that need to be passed to cc1 for this target.
Definition: SYCL.cpp:1278
CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override
Definition: SYCL.cpp:1677
void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args) const override
AddClangCXXStdlibIncludeArgs - Add the clang -cc1 level arguments to set the include paths to use for...
Definition: SYCL.cpp:1709
void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override
Add the clang cc1 arguments for system include paths.
Definition: SYCL.cpp:1704
void TranslateLinkerTargetArgs(const llvm::Triple &Triple, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, StringRef Device="") const
Definition: SYCL.cpp:1645
void TranslateTargetOpt(const llvm::Triple &Triple, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, llvm::opt::OptSpecifier Opt, llvm::opt::OptSpecifier Opt_EQ, StringRef Device) const
Definition: SYCL.cpp:1389
Tool * buildBackendCompiler() const override
Definition: SYCL.cpp:1658
llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override
TranslateArgs - Create a new derived argument list for any argument translations this ToolChain may w...
Definition: SYCL.cpp:1285
void AddImpliedTargetArgs(const llvm::Triple &Triple, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const JobAction &JA, const ToolChain &HostTC, StringRef Device="") const
Definition: SYCL.cpp:1443
void TranslateBackendTargetArgs(const llvm::Triple &Triple, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, StringRef Device="") const
Definition: SYCL.cpp:1598
void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override
Add warning options that need to be passed to cc1 for this target.
Definition: SYCL.cpp:1672
SanitizerMask getSupportedSanitizers() const override
Return sanitizers which are available in this toolchain.
Definition: SYCL.cpp:1714
StringRef getGenGRFFlag(StringRef GRFMode)
Definition: SYCL.cpp:949
SmallString< 64 > getGenDeviceMacro(StringRef DeviceName)
Definition: SYCL.cpp:1095
StringRef resolveGenDevice(StringRef DeviceName)
Definition: SYCL.cpp:1000
bool shouldDoPerObjectFileLinking(const Compilation &C)
Definition: SYCL.cpp:154
SmallVector< std::string, 8 > getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, bool IsSpirvAOT)
Definition: SYCL.cpp:209
void constructLLVMForeachCommand(Compilation &C, const JobAction &JA, std::unique_ptr< Command > InputCommand, const InputInfoList &InputFiles, const InputInfo &Output, const Tool *T, StringRef Increment, StringRef Ext="out", StringRef ParallelJobs="")
Definition: SYCL.cpp:65
bool isSrcFile(ID Id)
isSrcFile - Is this a source file, i.e.
Definition: Types.cpp:318
bool isSYCLNativeCPU(const llvm::opt::ArgList &Args)
Definition: SYCL.h:238
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
const char * Version
Definition: SYCL.cpp:870
const char * DeviceName
Definition: SYCL.cpp:868
const char * PackageName
Definition: SYCL.cpp:869
SmallVector< int, 8 > HexValues
Definition: SYCL.cpp:871
static constexpr ResponseFileSupport None()
Returns a ResponseFileSupport indicating that response files are not supported.
Definition: Job.h:79
static constexpr ResponseFileSupport AtFileUTF8()
Definition: Job.h:86