clang  19.0.0git
DeviceOffload.cpp
Go to the documentation of this file.
1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements offloading to CUDA devices.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "DeviceOffload.h"
14 
18 
19 #include "llvm/IR/LegacyPassManager.h"
20 #include "llvm/MC/TargetRegistry.h"
21 #include "llvm/Target/TargetMachine.h"
22 
23 namespace clang {
24 
26  Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,
27  IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,
29  llvm::Error &Err)
30  : IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),
31  HostParser(HostParser), VFS(FS) {
32  if (Err)
33  return;
34  StringRef Arch = CI->getTargetOpts().CPU;
35  if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {
36  Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(
37  "Invalid CUDA architecture",
38  llvm::inconvertibleErrorCode()));
39  return;
40  }
41 }
42 
44 IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {
45  auto PTU = IncrementalParser::Parse(Input);
46  if (!PTU)
47  return PTU.takeError();
48 
49  auto PTX = GeneratePTX();
50  if (!PTX)
51  return PTX.takeError();
52 
53  auto Err = GenerateFatbinary();
54  if (Err)
55  return std::move(Err);
56 
57  std::string FatbinFileName =
58  "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";
59  VFS->addFile(FatbinFileName, 0,
60  llvm::MemoryBuffer::getMemBuffer(
61  llvm::StringRef(FatbinContent.data(), FatbinContent.size()),
62  "", false));
63 
65 
66  FatbinContent.clear();
67 
68  return PTU;
69 }
70 
72  auto &PTU = PTUs.back();
73  std::string Error;
74 
75  const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(
76  PTU.TheModule->getTargetTriple(), Error);
77  if (!Target)
78  return llvm::make_error<llvm::StringError>(std::move(Error),
79  std::error_code());
80  llvm::TargetOptions TO = llvm::TargetOptions();
81  llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
82  PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,
83  llvm::Reloc::Model::PIC_);
84  PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
85 
86  PTXCode.clear();
87  llvm::raw_svector_ostream dest(PTXCode);
88 
89  llvm::legacy::PassManager PM;
90  if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,
91  llvm::CodeGenFileType::AssemblyFile)) {
92  return llvm::make_error<llvm::StringError>(
93  "NVPTX backend cannot produce PTX code.",
94  llvm::inconvertibleErrorCode());
95  }
96 
97  if (!PM.run(*PTU.TheModule))
98  return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",
99  llvm::inconvertibleErrorCode());
100 
101  PTXCode += '\0';
102  while (PTXCode.size() % 8)
103  PTXCode += '\0';
104  return PTXCode.str();
105 }
106 
108  enum FatBinFlags {
109  AddressSize64 = 0x01,
110  HasDebugInfo = 0x02,
111  ProducerCuda = 0x04,
112  HostLinux = 0x10,
113  HostMac = 0x20,
114  HostWindows = 0x40
115  };
116 
117  struct FatBinInnerHeader {
118  uint16_t Kind; // 0x00
119  uint16_t unknown02; // 0x02
120  uint32_t HeaderSize; // 0x04
121  uint32_t DataSize; // 0x08
122  uint32_t unknown0c; // 0x0c
123  uint32_t CompressedSize; // 0x10
124  uint32_t SubHeaderSize; // 0x14
125  uint16_t VersionMinor; // 0x18
126  uint16_t VersionMajor; // 0x1a
127  uint32_t CudaArch; // 0x1c
128  uint32_t unknown20; // 0x20
129  uint32_t unknown24; // 0x24
130  uint32_t Flags; // 0x28
131  uint32_t unknown2c; // 0x2c
132  uint32_t unknown30; // 0x30
133  uint32_t unknown34; // 0x34
134  uint32_t UncompressedSize; // 0x38
135  uint32_t unknown3c; // 0x3c
136  uint32_t unknown40; // 0x40
137  uint32_t unknown44; // 0x44
138  FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)
139  : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
140  DataSize(DataSize), unknown0c(0), CompressedSize(0),
141  SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),
142  CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),
143  unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
144  unknown3c(0), unknown40(0), unknown44(0) {}
145  };
146 
147  struct FatBinHeader {
148  uint32_t Magic; // 0x00
149  uint16_t Version; // 0x04
150  uint16_t HeaderSize; // 0x06
151  uint32_t DataSize; // 0x08
152  uint32_t unknown0c; // 0x0c
153  public:
154  FatBinHeader(uint32_t DataSize)
155  : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
156  DataSize(DataSize), unknown0c(0) {}
157  };
158 
159  FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());
160  FatbinContent.append((char *)&OuterHeader,
161  ((char *)&OuterHeader) + OuterHeader.HeaderSize);
162 
163  FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,
164  FatBinFlags::AddressSize64 |
165  FatBinFlags::HostLinux);
166  FatbinContent.append((char *)&InnerHeader,
167  ((char *)&InnerHeader) + InnerHeader.HeaderSize);
168 
169  FatbinContent.append(PTXCode.begin(), PTXCode.end());
170 
171  return llvm::Error::success();
172 }
173 
175 
176 } // namespace clang
llvm::MachO::Target Target
Definition: MachO.h:50
Defines the clang::TargetOptions class.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
CodeGenOptions & getCodeGenOpts()
llvm::SmallVector< char, 1024 > FatbinContent
Definition: DeviceOffload.h:45
llvm::SmallString< 1024 > PTXCode
Definition: DeviceOffload.h:44
IncrementalCUDADeviceParser(Interpreter &Interp, std::unique_ptr< CompilerInstance > Instance, IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx, llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS, llvm::Error &Err)
llvm::Expected< PartialTranslationUnit & > Parse(llvm::StringRef Input) override
Parses incremental input by creating an in-memory file.
llvm::IntrusiveRefCntPtr< llvm::vfs::InMemoryFileSystem > VFS
Definition: DeviceOffload.h:46
llvm::Expected< llvm::StringRef > GeneratePTX()
Provides support for incremental compilation.
std::list< PartialTranslationUnit > PTUs
List containing every information about every incrementally parsed piece of code.
CompilerInstance * getCI()
virtual llvm::Expected< PartialTranslationUnit & > Parse(llvm::StringRef Input)
Parses incremental input by creating an in-memory file.
std::unique_ptr< CompilerInstance > CI
Compiler instance performing the incremental compilation.
Provides top-level interfaces for incremental compilation and execution.
Definition: Interpreter.h:91
The JSON file list parser is used to communicate input to InstallAPI.
CudaArch
Definition: Cuda.h:54
Definition: Format.h:5433