35 #include <unordered_set>
41 #if defined(__has_include) && __has_include(<filesystem>)
43 #elif defined(__has_include) && __has_include(<experimental/filesystem>)
44 #include <experimental/filesystem>
46 #error "SYCLomatic runtime requires C++ filesystem support"
60 unsigned int,
void **,
void **);
67 const void *
function) {
71 .get_info<sycl::info::device::max_work_group_size>();
74 static inline kernel_function_info
80 .get_info<sycl::info::device::max_work_group_size>();
86 #if defined(__has_include) && __has_include(<filesystem>)
87 namespace fs = std::filesystem;
89 namespace fs = std::experimental::filesystem;
99 if (
sizeof(
size_t) >=
sizeof(std::streamsize) &&
101 throw std::runtime_error(
"[SYCLcompat] data file too large");
104 std::random_device dev;
105 std::mt19937 prng(dev());
106 std::uniform_int_distribution<uint64_t> rand(0);
109 auto tmp_dir = fs::temp_directory_path(ec);
111 throw std::runtime_error(
"[SYCLcompat] could not find temporary directory");
114 std::stringstream directory;
115 fs::path directory_path;
116 constexpr
int max_attempts = 5;
119 for (i = 0; i < max_attempts; i++) {
120 directory << std::hex << rand(prng);
121 directory_path = tmp_dir / directory.str();
122 if (fs::create_directory(directory_path)) {
126 if (i == max_attempts)
127 throw std::runtime_error(
"[SYCLcompat] could not create directory");
130 fs::permissions(directory_path, fs::perms::owner_all, ec);
132 throw std::runtime_error(
133 "[SYCLcompat] could not set directory permissions");
136 std::stringstream filename;
137 filename << std::hex << rand(prng);
139 auto filepath = directory_path / (filename.str() +
".dll");
141 auto filepath = directory_path / filename.str();
145 auto outfile = std::ofstream(filepath, std::ios::out | std::ios::binary);
148 fs::permissions(filepath, fs::perms::owner_write, ec);
150 throw std::runtime_error(
"[SYCLcompat] could not set permissions");
152 outfile.write(data, size);
154 throw std::runtime_error(
"[SYCLcompat] could not write data");
158 fs::permissions(filepath, fs::perms::owner_read | fs::perms::owner_exec,
161 throw std::runtime_error(
"[SYCLcompat] could not set permissions");
163 throw std::runtime_error(
"[SYCLcompat] could not write data");
166 auto infile = std::ifstream(filepath, std::ios::in | std::ios::binary);
168 bool mismatch =
false;
176 if (c != data[cnt++])
179 if (cnt != size || mismatch)
180 throw std::runtime_error(
181 "[SYCLcompat] file contents not written correctly");
183 throw std::runtime_error(
"[SYCLcompat] could not validate file");
185 if (!filepath.is_absolute())
186 throw std::runtime_error(
"[SYCLcompat] temporary filepath is not absolute");
191 static inline uint16_t
extract16(
unsigned char const *
const ptr) {
194 ret |=
static_cast<uint16_t
>(ptr[0]) << 0;
195 ret |=
static_cast<uint16_t
>(ptr[1]) << 8;
200 static inline uint32_t
extract32(
unsigned char const *
const ptr) {
203 ret |=
static_cast<uint32_t
>(ptr[0]) << 0;
204 ret |=
static_cast<uint32_t
>(ptr[1]) << 8;
205 ret |=
static_cast<uint32_t
>(ptr[2]) << 16;
206 ret |=
static_cast<uint32_t
>(ptr[3]) << 24;
211 static inline uint64_t
extract64(
unsigned char const *
const ptr) {
214 ret |=
static_cast<uint64_t
>(ptr[0]) << 0;
215 ret |=
static_cast<uint64_t
>(ptr[1]) << 8;
216 ret |=
static_cast<uint64_t
>(ptr[2]) << 16;
217 ret |=
static_cast<uint64_t
>(ptr[3]) << 24;
218 ret |=
static_cast<uint64_t
>(ptr[4]) << 32;
219 ret |=
static_cast<uint64_t
>(ptr[5]) << 40;
220 ret |=
static_cast<uint64_t
>(ptr[6]) << 48;
221 ret |=
static_cast<uint64_t
>(ptr[7]) << 56;
230 unsigned char const *
const ublob =
231 reinterpret_cast<unsigned char const *const
>(blob);
232 if (ublob[0] != 0x4d || ublob[1] != 0x5a) {
233 throw std::runtime_error(
"[SYCLcompat] blob is not a Windows DLL.");
235 uint32_t pe_header_offset =
extract32(ublob + 0x3c);
239 unsigned char const *
const pe_header = ublob + pe_header_offset;
242 uint32_t pe_signature =
extract32(pe_header + 0);
243 if (pe_signature != 0x00004550) {
244 throw std::runtime_error(
245 "[SYCLcompat] PE-header signature is not 0x00004550");
249 uint16_t machine =
extract16(pe_header + 4);
250 if (machine != 0x8664) {
251 throw std::runtime_error(
"[SYCLcompat] only DLLs for x64 supported");
255 uint16_t number_of_sections =
extract16(pe_header + 6);
258 uint16_t sizeof_optional_header =
extract16(pe_header + 20);
261 uint16_t magic =
extract16(pe_header + 24);
262 if (magic != 0x10b && magic != 0x20b) {
263 throw std::runtime_error(
"[SYCLcompat] MAGIC is not 0x010b or 0x020b");
268 constexpr
int coff_header_size = 24;
270 unsigned char const *
const tail_of_optional_header =
271 pe_header + coff_header_size + sizeof_optional_header;
272 if (
extract64(tail_of_optional_header - 8) != 0) {
273 throw std::runtime_error(
"Optional header not zero-padded");
278 constexpr
int section_header_size = 40;
279 unsigned char const *
const last_section_header =
280 tail_of_optional_header + section_header_size * (number_of_sections - 1);
282 uint32_t sizeof_raw_data =
extract32(last_section_header + 16);
283 uint32_t pointer_to_raw_data =
extract32(last_section_header + 20);
285 return sizeof_raw_data + pointer_to_raw_data;
287 if (blob[0] != 0x7F || blob[1] !=
'E' || blob[2] !=
'L' || blob[3] !=
'F')
288 throw std::runtime_error(
"[SYCLcompat] blob is not in ELF format");
291 throw std::runtime_error(
"[SYCLcompat] only 64-bit headers are supported");
294 throw std::runtime_error(
295 "[SYCLcompat] only little-endian headers are supported");
297 unsigned char const *
const ublob =
298 reinterpret_cast<unsigned char const *const
>(blob);
299 uint64_t e_shoff =
extract64(ublob + 0x28);
300 uint16_t e_shentsize =
extract16(ublob + 0x3A);
301 uint16_t e_shnum =
extract16(ublob + 0x3C);
303 return e_shoff + (e_shentsize * e_shnum);
308 class path_lib_record {
310 void operator=(
const path_lib_record &) =
delete;
312 for (
auto entry : lib_to_path) {
313 FreeLibrary(
static_cast<HMODULE
>(entry.first));
314 fs::permissions(entry.second, fs::perms::owner_all);
315 fs::remove_all(entry.second.remove_filename());
318 static void record_lib_path(fs::path path,
void *library) {
319 lib_to_path[library] = path;
321 static void remove_lib(
void *library) {
322 auto path = lib_to_path[library];
325 FreeLibrary(
static_cast<HMODULE
>(library));
326 fs::permissions(path, fs::perms::owner_all);
327 if (fs::remove_all(path.remove_filename(), ec) != 2 || ec)
329 throw std::runtime_error(
"[SYCLcompat] directory delete failed");
331 lib_to_path.erase(library);
335 static inline std::unordered_map<void *, fs::path> lib_to_path;
346 operator void *()
const {
return ptr; }
351 static inline detail::path_lib_record single_instance_to_trigger_destructor;
361 void *so = LoadLibraryW(filename.wstring().c_str());
363 void *so = dlopen(filename.c_str(), RTLD_LAZY);
366 throw std::runtime_error(
"[SYCLcompat] failed to load kernel library");
369 detail::path_lib_record::record_lib_path(filename, so);
374 if (fs::remove_all(filename.remove_filename(), ec) != 2 || ec)
376 throw std::runtime_error(
"[SYCLcompat] directory delete failed");
388 ifs.open(name, std::ios::in | std::ios::binary);
390 std::stringstream buffer;
391 buffer << ifs.rdbuf();
393 const std::string buffer_string = buffer.str();
410 detail::path_lib_record::remove_lib(library);
421 operator void *()
const {
return ((
void *)ptr); }
424 unsigned int local_mem_size,
void **args,
void **extra) {
425 ptr(q, range, local_mem_size, args, extra);
436 const std::string &name) {
439 GetProcAddress(
static_cast<HMODULE
>(
static_cast<void *
>(library)),
440 (name + std::string(
"_wrapper")).c_str()));
443 dlsym(library, (name + std::string(
"_wrapper")).c_str()));
446 throw std::runtime_error(
"[SYCLcompat] failed to get function");
463 unsigned int local_mem_size,
464 void **kernel_params,
void **extra) {
466 local_mem_size, kernel_params, extra);
Defines the iteration domain of both the work-groups and the overall dispatch.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
static dev_mgr & instance()
Returns the instance of device manager singleton.
device_ext & current_device()
kernel_function(kernel_functor ptr)
void operator()(sycl::queue &q, const sycl::nd_range< 3 > &range, unsigned int local_mem_size, void **args, void **extra)
kernel_library(void *ptr)
annotated_arg & operator=(annotated_arg &)=default
static fs::path write_data_to_file(char const *const data, size_t size)
Write data to temporary file and return absolute path to temporary file.
static uint16_t extract16(unsigned char const *const ptr)
static uint64_t get_lib_size(char const *const blob)
static uint32_t extract32(unsigned char const *const ptr)
static kernel_library load_dl_from_data(char const *const data, size_t size)
static uint64_t extract64(unsigned char const *const ptr)
void(* kernel_functor)(sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)
static kernel_function get_kernel_function(kernel_library &library, const std::string &name)
Find kernel function in a kernel library and return its address.
static kernel_library load_kernel_library(const std::string &name)
Load kernel library and return a handle to use the library.
static void get_kernel_function_info(kernel_function_info *kernel_info, const void *function)
static kernel_library load_kernel_library_mem(char const *const image)
Load kernel library whose image is alreay in memory and return a handle to use the library.
static void invoke_kernel_function(kernel_function &function, sycl::queue &queue, sycl::range< 3 > group_range, sycl::range< 3 > local_range, unsigned int local_mem_size, void **kernel_params, void **extra)
Invoke a kernel function.
static void unload_kernel_library(const kernel_library &library)
Unload kernel library.