DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel.hpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Copyright (C) Codeplay Software Ltd.
4  *
5  * Part of the LLVM Project, under the Apache License v2.0 with LLVM
6  * Exceptions. See https://llvm.org/LICENSE.txt for license information.
7  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  *
15  * SYCL compatibility extension
16  *
17  * kernel.hpp
18  *
19  * Description:
20  * kernel functionality for the SYCL compatibility extension.
21  **************************************************************************/
22 
23 // The original source was under the license below:
24 //==---- kernel.hpp -------------------------------*- C++ -*----------------==//
25 //
26 // Copyright (C) Intel Corporation
27 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
28 // See https://llvm.org/LICENSE.txt for license information.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #pragma once
33 
34 #ifdef _WIN32
35 #include <unordered_set>
36 #include <windows.h>
37 #else
38 #include <dlfcn.h>
39 #endif
40 
41 #if defined(__has_include) && __has_include(<filesystem>)
42 #include <filesystem>
43 #elif defined(__has_include) && __has_include(<experimental/filesystem>)
44 #include <experimental/filesystem>
45 #else
46 #error "SYCLomatic runtime requires C++ filesystem support"
47 #endif
48 
49 #include <fstream>
50 #include <image.hpp>
51 #include <random>
52 
53 #include <sycl/info/info_desc.hpp>
54 #include <sycl/nd_range.hpp>
55 #include <sycl/queue.hpp>
56 
57 namespace syclcompat {
58 
59 typedef void (*kernel_functor)(sycl::queue &, const sycl::nd_range<3> &,
60  unsigned int, void **, void **);
61 
64 };
65 
66 static inline void get_kernel_function_info(kernel_function_info *kernel_info,
67  const void *function) {
68  kernel_info->max_work_group_size =
71  .get_info<sycl::info::device::max_work_group_size>();
72 }
73 
74 static inline kernel_function_info
75 get_kernel_function_info(const void *function) {
76  kernel_function_info kernel_info;
77  kernel_info.max_work_group_size =
80  .get_info<sycl::info::device::max_work_group_size>();
81  return kernel_info;
82 }
83 
84 namespace detail {
85 
86 #if defined(__has_include) && __has_include(<filesystem>)
87 namespace fs = std::filesystem;
88 #else
89 namespace fs = std::experimental::filesystem;
90 #endif
91 
96 static inline fs::path write_data_to_file(char const *const data, size_t size) {
97  std::error_code ec;
98 
99  if (sizeof(size_t) >= sizeof(std::streamsize) &&
101  throw std::runtime_error("[SYCLcompat] data file too large");
102 
103  // random number generator
104  std::random_device dev;
105  std::mt19937 prng(dev());
106  std::uniform_int_distribution<uint64_t> rand(0);
107 
108  // find temporary directory
109  auto tmp_dir = fs::temp_directory_path(ec);
110  if (ec)
111  throw std::runtime_error("[SYCLcompat] could not find temporary directory");
112 
113  // create private directory
114  std::stringstream directory;
115  fs::path directory_path;
116  constexpr int max_attempts = 5;
117  int i;
118 
119  for (i = 0; i < max_attempts; i++) {
120  directory << std::hex << rand(prng);
121  directory_path = tmp_dir / directory.str();
122  if (fs::create_directory(directory_path)) {
123  break;
124  }
125  }
126  if (i == max_attempts)
127  throw std::runtime_error("[SYCLcompat] could not create directory");
128 
129  // only allow owner permissions to private directory
130  fs::permissions(directory_path, fs::perms::owner_all, ec);
131  if (ec)
132  throw std::runtime_error(
133  "[SYCLcompat] could not set directory permissions");
134 
135  // random filename in private directory
136  std::stringstream filename;
137  filename << std::hex << rand(prng);
138 #ifdef _WIN32
139  auto filepath = directory_path / (filename.str() + ".dll");
140 #else
141  auto filepath = directory_path / filename.str();
142 #endif
143 
144  // write data to temporary file
145  auto outfile = std::ofstream(filepath, std::ios::out | std::ios::binary);
146  if (outfile) {
147  // only allow program to write file
148  fs::permissions(filepath, fs::perms::owner_write, ec);
149  if (ec)
150  throw std::runtime_error("[SYCLcompat] could not set permissions");
151 
152  outfile.write(data, size);
153  if (!outfile.good())
154  throw std::runtime_error("[SYCLcompat] could not write data");
155  outfile.close();
156 
157  // only allow program to read/execute file
158  fs::permissions(filepath, fs::perms::owner_read | fs::perms::owner_exec,
159  ec);
160  if (ec)
161  throw std::runtime_error("[SYCLcompat] could not set permissions");
162  } else
163  throw std::runtime_error("[SYCLcompat] could not write data");
164 
165  // check temporary file contents
166  auto infile = std::ifstream(filepath, std::ios::in | std::ios::binary);
167  if (infile) {
168  bool mismatch = false;
169  size_t cnt = 0;
170 
171  while (1) {
172  char c;
173  infile.get(c);
174  if (infile.eof())
175  break;
176  if (c != data[cnt++])
177  mismatch = true;
178  }
179  if (cnt != size || mismatch)
180  throw std::runtime_error(
181  "[SYCLcompat] file contents not written correctly");
182  } else
183  throw std::runtime_error("[SYCLcompat] could not validate file");
184 
185  if (!filepath.is_absolute())
186  throw std::runtime_error("[SYCLcompat] temporary filepath is not absolute");
187 
188  return filepath;
189 }
190 
191 static inline uint16_t extract16(unsigned char const *const ptr) {
192  uint16_t ret = 0;
193 
194  ret |= static_cast<uint16_t>(ptr[0]) << 0;
195  ret |= static_cast<uint16_t>(ptr[1]) << 8;
196 
197  return (ret);
198 }
199 
200 static inline uint32_t extract32(unsigned char const *const ptr) {
201  uint32_t ret = 0;
202 
203  ret |= static_cast<uint32_t>(ptr[0]) << 0;
204  ret |= static_cast<uint32_t>(ptr[1]) << 8;
205  ret |= static_cast<uint32_t>(ptr[2]) << 16;
206  ret |= static_cast<uint32_t>(ptr[3]) << 24;
207 
208  return (ret);
209 }
210 
211 static inline uint64_t extract64(unsigned char const *const ptr) {
212  uint64_t ret = 0;
213 
214  ret |= static_cast<uint64_t>(ptr[0]) << 0;
215  ret |= static_cast<uint64_t>(ptr[1]) << 8;
216  ret |= static_cast<uint64_t>(ptr[2]) << 16;
217  ret |= static_cast<uint64_t>(ptr[3]) << 24;
218  ret |= static_cast<uint64_t>(ptr[4]) << 32;
219  ret |= static_cast<uint64_t>(ptr[5]) << 40;
220  ret |= static_cast<uint64_t>(ptr[6]) << 48;
221  ret |= static_cast<uint64_t>(ptr[7]) << 56;
222 
223  return (ret);
224 }
225 
226 static inline uint64_t get_lib_size(char const *const blob) {
227 #ifdef _WIN32
229  // Analyze DOS stub
230  unsigned char const *const ublob =
231  reinterpret_cast<unsigned char const *const>(blob);
232  if (ublob[0] != 0x4d || ublob[1] != 0x5a) {
233  throw std::runtime_error("[SYCLcompat] blob is not a Windows DLL.");
234  }
235  uint32_t pe_header_offset = extract32(ublob + 0x3c);
236 
238  // Ananlyze PE-header
239  unsigned char const *const pe_header = ublob + pe_header_offset;
240 
241  // signature
242  uint32_t pe_signature = extract32(pe_header + 0);
243  if (pe_signature != 0x00004550) {
244  throw std::runtime_error(
245  "[SYCLcompat] PE-header signature is not 0x00004550");
246  }
247 
248  // machine
249  uint16_t machine = extract16(pe_header + 4);
250  if (machine != 0x8664) {
251  throw std::runtime_error("[SYCLcompat] only DLLs for x64 supported");
252  }
253 
254  // number of sections
255  uint16_t number_of_sections = extract16(pe_header + 6);
256 
257  // sizeof optional header
258  uint16_t sizeof_optional_header = extract16(pe_header + 20);
259 
260  // magic
261  uint16_t magic = extract16(pe_header + 24);
262  if (magic != 0x10b && magic != 0x20b) {
263  throw std::runtime_error("[SYCLcompat] MAGIC is not 0x010b or 0x020b");
264  }
265 
267  // Analyze tail of optional header
268  constexpr int coff_header_size = 24;
269 
270  unsigned char const *const tail_of_optional_header =
271  pe_header + coff_header_size + sizeof_optional_header;
272  if (extract64(tail_of_optional_header - 8) != 0) {
273  throw std::runtime_error("Optional header not zero-padded");
274  }
275 
277  // Analyze last section header
278  constexpr int section_header_size = 40;
279  unsigned char const *const last_section_header =
280  tail_of_optional_header + section_header_size * (number_of_sections - 1);
281 
282  uint32_t sizeof_raw_data = extract32(last_section_header + 16);
283  uint32_t pointer_to_raw_data = extract32(last_section_header + 20);
284 
285  return sizeof_raw_data + pointer_to_raw_data;
286 #else
287  if (blob[0] != 0x7F || blob[1] != 'E' || blob[2] != 'L' || blob[3] != 'F')
288  throw std::runtime_error("[SYCLcompat] blob is not in ELF format");
289 
290  if (blob[4] != 0x02)
291  throw std::runtime_error("[SYCLcompat] only 64-bit headers are supported");
292 
293  if (blob[5] != 0x01)
294  throw std::runtime_error(
295  "[SYCLcompat] only little-endian headers are supported");
296 
297  unsigned char const *const ublob =
298  reinterpret_cast<unsigned char const *const>(blob);
299  uint64_t e_shoff = extract64(ublob + 0x28);
300  uint16_t e_shentsize = extract16(ublob + 0x3A);
301  uint16_t e_shnum = extract16(ublob + 0x3C);
302 
303  return e_shoff + (e_shentsize * e_shnum);
304 #endif
305 }
306 
307 #ifdef _WIN32
308 class path_lib_record {
309 public:
310  void operator=(const path_lib_record &) = delete;
311  ~path_lib_record() {
312  for (auto entry : lib_to_path) {
313  FreeLibrary(static_cast<HMODULE>(entry.first));
314  fs::permissions(entry.second, fs::perms::owner_all);
315  fs::remove_all(entry.second.remove_filename());
316  }
317  }
318  static void record_lib_path(fs::path path, void *library) {
319  lib_to_path[library] = path;
320  }
321  static void remove_lib(void *library) {
322  auto path = lib_to_path[library];
323  std::error_code ec;
324 
325  FreeLibrary(static_cast<HMODULE>(library));
326  fs::permissions(path, fs::perms::owner_all);
327  if (fs::remove_all(path.remove_filename(), ec) != 2 || ec)
328  // one directory and one temporary file should have been deleted
329  throw std::runtime_error("[SYCLcompat] directory delete failed");
330 
331  lib_to_path.erase(library);
332  }
333 
334 private:
335  static inline std::unordered_map<void *, fs::path> lib_to_path;
336 };
337 #endif
338 
339 } // namespace detail
340 
342 public:
343  kernel_library() : ptr{nullptr} {}
344  kernel_library(void *ptr) : ptr{ptr} {}
345 
346  operator void *() const { return ptr; }
347 
348 private:
349  void *ptr;
350 #ifdef _WIN32
351  static inline detail::path_lib_record single_instance_to_trigger_destructor;
352 #endif
353 };
354 
355 namespace detail {
356 
357 static inline kernel_library load_dl_from_data(char const *const data,
358  size_t size) {
359  fs::path filename = write_data_to_file(data, size);
360 #ifdef _WIN32
361  void *so = LoadLibraryW(filename.wstring().c_str());
362 #else
363  void *so = dlopen(filename.c_str(), RTLD_LAZY);
364 #endif
365  if (so == nullptr)
366  throw std::runtime_error("[SYCLcompat] failed to load kernel library");
367 
368 #ifdef _WIN32
369  detail::path_lib_record::record_lib_path(filename, so);
370 #else
371  std::error_code ec;
372 
373  // Windows DLL cannot be deleted while in use
374  if (fs::remove_all(filename.remove_filename(), ec) != 2 || ec)
375  // one directory and one temporary file should have been deleted
376  throw std::runtime_error("[SYCLcompat] directory delete failed");
377 #endif
378 
379  return so;
380 }
381 
382 } // namespace detail
383 
386 static inline kernel_library load_kernel_library(const std::string &name) {
387  std::ifstream ifs;
388  ifs.open(name, std::ios::in | std::ios::binary);
389 
390  std::stringstream buffer;
391  buffer << ifs.rdbuf();
392 
393  const std::string buffer_string = buffer.str();
394  return detail::load_dl_from_data(buffer_string.c_str(), buffer_string.size());
395 }
396 
400 static inline kernel_library load_kernel_library_mem(char const *const image) {
401  const size_t size = detail::get_lib_size(image);
402 
403  return detail::load_dl_from_data(image, size);
404 }
405 
408 static inline void unload_kernel_library(const kernel_library &library) {
409 #ifdef _WIN32
410  detail::path_lib_record::remove_lib(library);
411 #else
412  dlclose(library);
413 #endif
414 }
415 
417 public:
418  kernel_function() : ptr{nullptr} {}
419  kernel_function(kernel_functor ptr) : ptr{ptr} {}
420 
421  operator void *() const { return ((void *)ptr); }
422 
423  void operator()(sycl::queue &q, const sycl::nd_range<3> &range,
424  unsigned int local_mem_size, void **args, void **extra) {
425  ptr(q, range, local_mem_size, args, extra);
426  }
427 
428 private:
429  kernel_functor ptr;
430 };
431 
436  const std::string &name) {
437 #ifdef _WIN32
438  kernel_functor fn = reinterpret_cast<kernel_functor>(
439  GetProcAddress(static_cast<HMODULE>(static_cast<void *>(library)),
440  (name + std::string("_wrapper")).c_str()));
441 #else
442  kernel_functor fn = reinterpret_cast<kernel_functor>(
443  dlsym(library, (name + std::string("_wrapper")).c_str()));
444 #endif
445  if (fn == nullptr)
446  throw std::runtime_error("[SYCLcompat] failed to get function");
447  return fn;
448 }
449 
459 static inline void invoke_kernel_function(kernel_function &function,
460  sycl::queue &queue,
461  sycl::range<3> group_range,
462  sycl::range<3> local_range,
463  unsigned int local_mem_size,
464  void **kernel_params, void **extra) {
465  function(queue, sycl::nd_range<3>(group_range * local_range, local_range),
466  local_mem_size, kernel_params, extra);
467 }
468 
469 } // namespace syclcompat
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:111
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
static dev_mgr & instance()
Returns the instance of device manager singleton.
Definition: device.hpp:688
device_ext & current_device()
Definition: device.hpp:628
kernel_function(kernel_functor ptr)
Definition: kernel.hpp:419
void operator()(sycl::queue &q, const sycl::nd_range< 3 > &range, unsigned int local_mem_size, void **args, void **extra)
Definition: kernel.hpp:423
annotated_arg & operator=(annotated_arg &)=default
static fs::path write_data_to_file(char const *const data, size_t size)
Write data to temporary file and return absolute path to temporary file.
Definition: kernel.hpp:96
static uint16_t extract16(unsigned char const *const ptr)
Definition: kernel.hpp:191
static uint64_t get_lib_size(char const *const blob)
Definition: kernel.hpp:226
static uint32_t extract32(unsigned char const *const ptr)
Definition: kernel.hpp:200
static kernel_library load_dl_from_data(char const *const data, size_t size)
Definition: kernel.hpp:357
static uint64_t extract64(unsigned char const *const ptr)
Definition: kernel.hpp:211
void(* kernel_functor)(sycl::queue &, const sycl::nd_range< 3 > &, unsigned int, void **, void **)
Definition: kernel.hpp:59
static kernel_function get_kernel_function(kernel_library &library, const std::string &name)
Find kernel function in a kernel library and return its address.
Definition: kernel.hpp:435
static kernel_library load_kernel_library(const std::string &name)
Load kernel library and return a handle to use the library.
Definition: kernel.hpp:386
static void get_kernel_function_info(kernel_function_info *kernel_info, const void *function)
Definition: kernel.hpp:66
static kernel_library load_kernel_library_mem(char const *const image)
Load kernel library whose image is alreay in memory and return a handle to use the library.
Definition: kernel.hpp:400
static void invoke_kernel_function(kernel_function &function, sycl::queue &queue, sycl::range< 3 > group_range, sycl::range< 3 > local_range, unsigned int local_mem_size, void **kernel_params, void **extra)
Invoke a kernel function.
Definition: kernel.hpp:459
error_code
Definition: defs.hpp:59
static void unload_kernel_library(const kernel_library &library)
Unload kernel library.
Definition: kernel.hpp:408