DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, access...
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aspects.hpp> // for aspect
14 #include <sycl/async_handler.hpp> // for async_handler
15 #include <sycl/backend_types.hpp> // for backend, backe...
16 #include <sycl/buffer.hpp> // for buffer
17 #include <sycl/context.hpp> // for context
18 #include <sycl/detail/assert_happened.hpp> // for AssertHappened
19 #include <sycl/detail/cg_types.hpp> // for check_fn_signa...
20 #include <sycl/detail/common.hpp> // for code_location
21 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEP...
22 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
23 #include <sycl/detail/info_desc_helpers.hpp> // for is_queue_info_...
24 #include <sycl/detail/kernel_desc.hpp> // for KernelInfo
25 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
26 #include <sycl/detail/pi.h> // for pi_mem_advice
27 #include <sycl/device.hpp> // for device
28 #include <sycl/device_selector.hpp> // for device_selector
29 #include <sycl/event.hpp> // for event
30 #include <sycl/exception.hpp> // for make_error_code
31 #include <sycl/exception_list.hpp> // for defaultAsyncHa...
32 #include <sycl/ext/oneapi/bindless_images_descriptor.hpp> // for image_descriptor
33 #include <sycl/ext/oneapi/bindless_images_interop.hpp> // for interop_semaph...
34 #include <sycl/ext/oneapi/bindless_images_memory.hpp> // for image_mem_handle
35 #include <sycl/ext/oneapi/device_global/device_global.hpp> // for device_global
36 #include <sycl/ext/oneapi/device_global/properties.hpp> // for device_image_s...
37 #include <sycl/ext/oneapi/experimental/graph.hpp> // for command_graph...
38 #include <sycl/ext/oneapi/properties/properties.hpp> // for empty_properti...
39 #include <sycl/handler.hpp> // for handler, isDev...
40 #include <sycl/id.hpp> // for id
41 #include <sycl/kernel.hpp> // for auto_name
42 #include <sycl/kernel_handler.hpp> // for kernel_handler
43 #include <sycl/nd_range.hpp> // for nd_range
44 #include <sycl/property_list.hpp> // for property_list
45 #include <sycl/range.hpp> // for range
46 
47 #include <cstddef> // for size_t
48 #include <functional> // for function
49 #include <memory> // for shared_ptr, hash
50 #include <stdint.h> // for int32_t
51 #include <tuple> // for tuple
52 #include <type_traits> // for remove_all_ext...
53 #include <variant> // for hash
54 #include <vector> // for vector
55 
56 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
57 // Here we simplify the KernelFunc param is simplified to be
58 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
59 // these macros are #undef immediately.
60 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
61 // or const KernelType &KernelFunc
62 #ifdef __SYCL_NONCONST_FUNCTOR__
63 #define _KERNELFUNCPARAM(a) KernelType a
64 #else
65 #define _KERNELFUNCPARAM(a) const KernelType &a
66 #endif
67 
68 namespace sycl {
69 inline namespace _V1 {
70 
71 // Forward declaration
72 class context;
73 class device;
74 class event;
75 class queue;
76 
77 template <backend BackendName, class SyclObjectT>
78 auto get_native(const SyclObjectT &Obj)
79  -> backend_return_t<BackendName, SyclObjectT>;
80 
81 namespace detail {
82 class queue_impl;
83 
84 #if __SYCL_USE_FALLBACK_ASSERT
85 inline event submitAssertCapture(queue &, event &, queue *,
86  const detail::code_location &);
87 #endif
88 } // namespace detail
89 
90 namespace ext {
91 namespace oneapi {
92 namespace experimental {
93 // State of a queue with regards to graph recording,
94 // returned by info::queue::state
96 } // namespace experimental
97 } // namespace oneapi
98 } // namespace ext
99 
111 class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
112 public:
117  explicit queue(const property_list &PropList = {})
119 
125  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
126  : queue(default_selector_v, AsyncHandler, PropList) {}
127 
134  template <typename DeviceSelector,
135  typename =
136  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
137  explicit queue(const DeviceSelector &deviceSelector,
138  const async_handler &AsyncHandler,
139  const property_list &PropList = {})
140  : queue(detail::select_device(deviceSelector), AsyncHandler, PropList) {}
141 
147  template <typename DeviceSelector,
148  typename =
149  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
150  explicit queue(const DeviceSelector &deviceSelector,
151  const property_list &PropList = {})
152  : queue(detail::select_device(deviceSelector),
153  detail::defaultAsyncHandler, PropList) {}
154 
161  template <typename DeviceSelector,
162  typename =
163  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
164  explicit queue(const context &syclContext,
165  const DeviceSelector &deviceSelector,
166  const property_list &propList = {})
167  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
168  propList) {}
169 
177  template <typename DeviceSelector,
178  typename =
179  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
180  explicit queue(const context &syclContext,
181  const DeviceSelector &deviceSelector,
182  const async_handler &AsyncHandler,
183  const property_list &propList = {})
184  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
185  AsyncHandler, propList) {}
186 
192  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
193  "use SYCL 2020 device selectors instead.")
194  queue(const device_selector &DeviceSelector,
195  const property_list &PropList = {})
196  : queue(DeviceSelector.select_device(), detail::defaultAsyncHandler,
197  PropList) {}
198 
205  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
206  "use SYCL 2020 device selectors instead.")
207  queue(const device_selector &DeviceSelector,
208  const async_handler &AsyncHandler, const property_list &PropList = {})
209  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
210 
215  explicit queue(const device &SyclDevice, const property_list &PropList = {})
216  : queue(SyclDevice, detail::defaultAsyncHandler, PropList) {}
217 
224  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
225  const property_list &PropList = {});
226 
233  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
234  "use SYCL 2020 device selectors instead.")
235  queue(const context &SyclContext, const device_selector &DeviceSelector,
236  const property_list &PropList = {});
237 
246  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
247  "use SYCL 2020 device selectors instead.")
248  queue(const context &SyclContext, const device_selector &DeviceSelector,
249  const async_handler &AsyncHandler, const property_list &PropList = {});
250 
257  queue(const context &SyclContext, const device &SyclDevice,
258  const property_list &PropList = {});
259 
267  queue(const context &SyclContext, const device &SyclDevice,
268  const async_handler &AsyncHandler, const property_list &PropList = {});
269 
278 #ifdef __SYCL_INTERNAL_API
279  queue(cl_command_queue ClQueue, const context &SyclContext,
280  const async_handler &AsyncHandler = {});
281 #endif
282 
283  queue(const queue &RHS) = default;
284 
285  queue(queue &&RHS) = default;
286 
287  queue &operator=(const queue &RHS) = default;
288 
289  queue &operator=(queue &&RHS) = default;
290 
291  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
292 
293  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
294 
297 #ifdef __SYCL_INTERNAL_API
298  cl_command_queue get() const;
299 #endif
300 
302  context get_context() const;
303 
305  device get_device() const;
306 
308  ext::oneapi::experimental::queue_state ext_oneapi_get_state() const;
309 
313  ext_oneapi_get_graph() const;
314 
317  "is_host() is deprecated as the host device is no longer supported.")
318  bool is_host() const;
319 
323  template <typename Param>
324  typename detail::is_queue_info_desc<Param>::return_type get_info() const;
325 
329  template <typename Param>
330  typename detail::is_backend_info_desc<Param>::return_type
331  get_backend_info() const;
332 
333 private:
334  // A shorthand for `get_device().has()' which is expected to be a bit quicker
335  // than the long version
336  bool device_has(aspect Aspect) const;
337 
338 public:
345  template <typename T>
346  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
347  T CGF,
348  const detail::code_location &CodeLoc = detail::code_location::current()) {
349  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
350 #if __SYCL_USE_FALLBACK_ASSERT
351  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
352  event &E) {
353  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
354  KernelUsesAssert && !device_has(aspect::accelerator)) {
355  // __devicelib_assert_fail isn't supported by Device-side Runtime
356  // Linking against fallback impl of __devicelib_assert_fail is
357  // performed by program manager class
358  // Fallback assert isn't supported for FPGA
359  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc);
360  }
361  };
362 
363  return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
364 #else
365  return submit_impl(CGF, CodeLoc);
366 #endif // __SYCL_USE_FALLBACK_ASSERT
367  }
368 
380  template <typename T>
381  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
382  T CGF, queue &SecondaryQueue,
384  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
385 #if __SYCL_USE_FALLBACK_ASSERT
386  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
387  bool IsKernel, bool KernelUsesAssert, event &E) {
388  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
389  KernelUsesAssert && !device_has(aspect::accelerator)) {
390  // Only secondary queues on devices need to be added to the assert
391  // capture.
392  // __devicelib_assert_fail isn't supported by Device-side Runtime
393  // Linking against fallback impl of __devicelib_assert_fail is
394  // performed by program manager class
395  // Fallback assert isn't supported for FPGA
396  submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc);
397  }
398  };
399 
400  return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
401  PostProcess);
402 #else
403  return submit_impl(CGF, SecondaryQueue, CodeLoc);
404 #endif // __SYCL_USE_FALLBACK_ASSERT
405  }
406 
414  event ext_oneapi_submit_barrier(
416 
426  event ext_oneapi_submit_barrier(
427  const std::vector<event> &WaitList,
429 
435  void wait(
437  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
438  wait_proxy(CodeLoc);
439  }
440 
451  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
452  wait_and_throw_proxy(CodeLoc);
453  }
454 
457  void wait_proxy(const detail::code_location &CodeLoc);
460  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
461 
467  void throw_asynchronous();
468 
471  template <typename PropertyT> bool has_property() const noexcept;
472 
476  template <typename PropertyT> PropertyT get_property() const;
477 
485  template <typename T>
487  void *Ptr, const T &Pattern, size_t Count,
488  const detail::code_location &CodeLoc = detail::code_location::current()) {
489  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
490  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); },
491  CodeLoc);
492  }
493 
502  template <typename T>
503  event fill(
504  void *Ptr, const T &Pattern, size_t Count, event DepEvent,
506  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
507  return submit(
508  [&](handler &CGH) {
509  CGH.depends_on(DepEvent);
510  CGH.fill<T>(Ptr, Pattern, Count);
511  },
512  CodeLoc);
513  }
514 
524  template <typename T>
525  event fill(
526  void *Ptr, const T &Pattern, size_t Count,
527  const std::vector<event> &DepEvents,
529  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
530  return submit(
531  [&](handler &CGH) {
532  CGH.depends_on(DepEvents);
533  CGH.fill<T>(Ptr, Pattern, Count);
534  },
535  CodeLoc);
536  }
537 
547  event memset(
548  void *Ptr, int Value, size_t Count,
550 
561  event memset(
562  void *Ptr, int Value, size_t Count, event DepEvent,
564 
576  event memset(
577  void *Ptr, int Value, size_t Count, const std::vector<event> &DepEvents,
579 
591  event memcpy(
592  void *Dest, const void *Src, size_t Count,
594 
607  event memcpy(
608  void *Dest, const void *Src, size_t Count, event DepEvent,
610 
624  event memcpy(
625  void *Dest, const void *Src, size_t Count,
626  const std::vector<event> &DepEvents,
628 
641  template <typename T>
642  event copy(
643  const T *Src, T *Dest, size_t Count,
645  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
646  return this->memcpy(Dest, Src, Count * sizeof(T));
647  }
648 
662  template <typename T>
663  event copy(
664  const T *Src, T *Dest, size_t Count, event DepEvent,
666  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
667  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
668  }
669 
683  template <typename T>
684  event copy(
685  const T *Src, T *Dest, size_t Count, const std::vector<event> &DepEvents,
687  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
688  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
689  }
690 
698  __SYCL2020_DEPRECATED("use the overload with int Advice instead")
700  const void *Ptr, size_t Length, pi_mem_advice Advice,
701  const detail::code_location &CodeLoc = detail::code_location::current());
702 
711  const void *Ptr, size_t Length, int Advice,
712  const detail::code_location &CodeLoc = detail::code_location::current());
713 
723  const void *Ptr, size_t Length, int Advice, event DepEvent,
724  const detail::code_location &CodeLoc = detail::code_location::current());
725 
736  const void *Ptr, size_t Length, int Advice,
737  const std::vector<event> &DepEvents,
738  const detail::code_location &CodeLoc = detail::code_location::current());
739 
747  event prefetch(
748  const void *Ptr, size_t Count,
749  const detail::code_location &CodeLoc = detail::code_location::current()) {
750  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
751  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc);
752  }
753 
762  event prefetch(
763  const void *Ptr, size_t Count, event DepEvent,
765  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
766  return submit(
767  [=](handler &CGH) {
768  CGH.depends_on(DepEvent);
769  CGH.prefetch(Ptr, Count);
770  },
771  CodeLoc);
772  }
773 
783  event prefetch(
784  const void *Ptr, size_t Count, const std::vector<event> &DepEvents,
786  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
787  return submit(
788  [=](handler &CGH) {
789  CGH.depends_on(DepEvents);
790  CGH.prefetch(Ptr, Count);
791  },
792  CodeLoc);
793  }
794 
813  template <typename T = unsigned char,
814  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
816  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
817  size_t Width, size_t Height,
819  return submit(
820  [=](handler &CGH) {
821  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width,
822  Height);
823  },
824  CodeLoc);
825  }
826 
846  template <typename T = unsigned char,
847  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
848  event ext_oneapi_memcpy2d(
849  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
850  size_t Width, size_t Height, event DepEvent,
852 
873  template <typename T = unsigned char,
874  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
875  event ext_oneapi_memcpy2d(
876  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
877  size_t Width, size_t Height, const std::vector<event> &DepEvents,
879 
895  template <typename T>
896  event ext_oneapi_copy2d(
897  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
898  size_t Height,
900 
917  template <typename T>
918  event ext_oneapi_copy2d(
919  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
920  size_t Height, event DepEvent,
922 
940  template <typename T>
941  event ext_oneapi_copy2d(
942  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
943  size_t Height, const std::vector<event> &DepEvents,
945 
962  template <typename T = unsigned char,
963  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
964  event ext_oneapi_memset2d(
965  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
967 
985  template <typename T = unsigned char,
986  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
987  event ext_oneapi_memset2d(
988  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
989  event DepEvent,
991 
1010  template <typename T = unsigned char,
1011  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
1012  event ext_oneapi_memset2d(
1013  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
1014  const std::vector<event> &DepEvents,
1016 
1030  template <typename T>
1031  event ext_oneapi_fill2d(
1032  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1033  size_t Height,
1035 
1050  template <typename T>
1051  event ext_oneapi_fill2d(
1052  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1053  size_t Height, event DepEvent,
1055 
1071  template <typename T>
1072  event ext_oneapi_fill2d(
1073  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1074  size_t Height, const std::vector<event> &DepEvents,
1076 
1089  template <typename T, typename PropertyListT>
1090  event memcpy(
1092  const void *Src, size_t NumBytes, size_t Offset,
1093  const std::vector<event> &DepEvents,
1095  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1096  if (sizeof(T) < Offset + NumBytes)
1098  "Copy to device_global is out of bounds.");
1099 
1100  if (!detail::isDeviceGlobalUsedInKernel(&Dest)) {
1101  // device_global is unregistered so we need a fallback. We let the handler
1102  // implement this fallback.
1103  return submit(
1104  [&](handler &CGH) {
1105  CGH.depends_on(DepEvents);
1106  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1107  },
1108  CodeLoc);
1109  }
1110 
1111  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1113  return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1114  Offset, DepEvents);
1115  }
1116 
1129  template <typename T, typename PropertyListT>
1130  event memcpy(
1132  const void *Src, size_t NumBytes, size_t Offset, event DepEvent,
1134  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1135  return this->memcpy(Dest, Src, NumBytes, Offset,
1136  std::vector<event>{DepEvent});
1137  }
1138 
1149  template <typename T, typename PropertyListT>
1150  event memcpy(
1152  const void *Src, size_t NumBytes = sizeof(T), size_t Offset = 0,
1154  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1155  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1156  }
1157 
1170  template <typename T, typename PropertyListT>
1171  event memcpy(
1172  void *Dest,
1174  size_t NumBytes, size_t Offset, const std::vector<event> &DepEvents,
1176  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1177  if (sizeof(T) < Offset + NumBytes)
1179  "Copy from device_global is out of bounds.");
1180 
1182  // device_global is unregistered so we need a fallback. We let the handler
1183  // implement this fallback.
1184  return submit([&](handler &CGH) {
1185  CGH.depends_on(DepEvents);
1186  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1187  });
1188  }
1189 
1190  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1192  return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1193  Offset, DepEvents);
1194  }
1195 
1208  template <typename T, typename PropertyListT>
1209  event memcpy(
1210  void *Dest,
1212  size_t NumBytes, size_t Offset, event DepEvent,
1214  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1215  return this->memcpy(Dest, Src, NumBytes, Offset,
1216  std::vector<event>{DepEvent});
1217  }
1218 
1229  template <typename T, typename PropertyListT>
1230  event memcpy(
1231  void *Dest,
1233  size_t NumBytes = sizeof(T), size_t Offset = 0,
1235  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1236  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1237  }
1238 
1252  template <typename T, typename PropertyListT>
1253  event copy(
1254  const std::remove_all_extents_t<T> *Src,
1256  size_t Count, size_t StartIndex, const std::vector<event> &DepEvents,
1258  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1259  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1260  StartIndex * sizeof(std::remove_all_extents_t<T>),
1261  DepEvents);
1262  }
1263 
1277  template <typename T, typename PropertyListT>
1278  event copy(
1279  const std::remove_all_extents_t<T> *Src,
1281  size_t Count, size_t StartIndex, event DepEvent,
1283  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1284  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1285  StartIndex * sizeof(std::remove_all_extents_t<T>),
1286  DepEvent);
1287  }
1288 
1300  template <typename T, typename PropertyListT>
1301  event copy(
1302  const std::remove_all_extents_t<T> *Src,
1304  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1305  size_t StartIndex = 0,
1307  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1308  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1309  StartIndex * sizeof(std::remove_all_extents_t<T>));
1310  }
1311 
1325  template <typename T, typename PropertyListT>
1326  event copy(
1328  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1329  const std::vector<event> &DepEvents,
1331  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1332  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1333  StartIndex * sizeof(std::remove_all_extents_t<T>),
1334  DepEvents);
1335  }
1336 
1350  template <typename T, typename PropertyListT>
1351  event copy(
1353  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1354  event DepEvent,
1356  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1357  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1358  StartIndex * sizeof(std::remove_all_extents_t<T>),
1359  DepEvent);
1360  }
1361 
1373  template <typename T, typename PropertyListT>
1374  event copy(
1376  std::remove_all_extents_t<T> *Dest,
1377  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1378  size_t StartIndex = 0,
1380  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1381  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1382  StartIndex * sizeof(std::remove_all_extents_t<T>));
1383  }
1384 
1400  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1401  return submit(
1402  [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, DestImgDesc); },
1403  CodeLoc);
1404  }
1405 
1426  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1428  sycl::range<3> DestOffset,
1430  sycl::range<3> CopyExtent,
1432  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1433  return submit(
1434  [&](handler &CGH) {
1435  CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset,
1436  DestImgDesc, CopyExtent);
1437  },
1438  CodeLoc);
1439  }
1440 
1456  event DepEvent,
1458  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1459  return submit(
1460  [&](handler &CGH) {
1461  CGH.depends_on(DepEvent);
1462  CGH.ext_oneapi_copy(Src, Dest, DestImgDesc);
1463  },
1464  CodeLoc);
1465  }
1466 
1488  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1490  sycl::range<3> DestOffset,
1492  sycl::range<3> CopyExtent, event DepEvent,
1494  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1495  return submit(
1496  [&](handler &CGH) {
1497  CGH.depends_on(DepEvent);
1498  CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset,
1499  DestImgDesc, CopyExtent);
1500  },
1501  CodeLoc);
1502  }
1503 
1520  const std::vector<event> &DepEvents,
1522  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1523  return submit(
1524  [&](handler &CGH) {
1525  CGH.depends_on(DepEvents);
1526  CGH.ext_oneapi_copy(Src, Dest, DestImgDesc);
1527  },
1528  CodeLoc);
1529  }
1530 
1553  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1555  sycl::range<3> DestOffset,
1557  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1559  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1560  return submit(
1561  [&](handler &CGH) {
1562  CGH.depends_on(DepEvents);
1563  CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset,
1564  DestImgDesc, CopyExtent);
1565  },
1566  CodeLoc);
1567  }
1568 
1583  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1584  return submit(
1585  [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc); },
1586  CodeLoc);
1587  }
1588 
1612  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1613  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1614  sycl::range<3> CopyExtent,
1616  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1617  return submit(
1618  [&](handler &CGH) {
1619  CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset,
1620  DestExtent, CopyExtent);
1621  },
1622  CodeLoc);
1623  }
1624 
1639  event DepEvent,
1641  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1642  return submit(
1643  [&](handler &CGH) {
1644  CGH.depends_on(DepEvent);
1645  CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc);
1646  },
1647  CodeLoc);
1648  }
1649 
1674  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1675  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1676  sycl::range<3> CopyExtent, event DepEvent,
1678  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1679  return submit(
1680  [&](handler &CGH) {
1681  CGH.depends_on(DepEvent);
1682  CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset,
1683  DestExtent, CopyExtent);
1684  },
1685  CodeLoc);
1686  }
1687 
1703  const std::vector<event> &DepEvents,
1705  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1706  return submit(
1707  [&](handler &CGH) {
1708  CGH.depends_on(DepEvents);
1709  CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc);
1710  },
1711  CodeLoc);
1712  }
1713 
1739  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1740  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1741  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1743  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1744  return submit(
1745  [&](handler &CGH) {
1746  CGH.depends_on(DepEvents);
1747  CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset,
1748  DestExtent, CopyExtent);
1749  },
1750  CodeLoc);
1751  }
1752 
1765  void *Src, void *Dest,
1766  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1767  size_t DeviceRowPitch,
1769  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1770  return submit(
1771  [&](handler &CGH) {
1772  CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch);
1773  },
1774  CodeLoc);
1775  }
1776 
1800  void *Src, sycl::range<3> SrcOffset, void *Dest,
1801  sycl::range<3> DestOffset,
1802  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1803  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1804  sycl::range<3> CopyExtent,
1806  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1807  return submit(
1808  [&](handler &CGH) {
1809  CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc,
1810  DeviceRowPitch, HostExtent, CopyExtent);
1811  },
1812  CodeLoc);
1813  }
1814 
1828  void *Src, void *Dest,
1829  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1830  size_t DeviceRowPitch, event DepEvent,
1832  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1833  return submit(
1834  [&](handler &CGH) {
1835  CGH.depends_on(DepEvent);
1836  CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch);
1837  },
1838  CodeLoc);
1839  }
1840 
1854  event DepEvent,
1856  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1857  return submit(
1858  [&](handler &CGH) {
1859  CGH.depends_on(DepEvent);
1860  CGH.ext_oneapi_copy(Src, Dest, ImageDesc);
1861  },
1862  CodeLoc);
1863  }
1864 
1879  const std::vector<event> &DepEvents,
1881  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1882  return submit(
1883  [&](handler &CGH) {
1884  CGH.depends_on(DepEvents);
1885  CGH.ext_oneapi_copy(Src, Dest, ImageDesc);
1886  },
1887  CodeLoc);
1888  }
1889 
1903  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1904  return submit(
1905  [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, ImageDesc); },
1906  CodeLoc);
1907  }
1908 
1933  void *Src, sycl::range<3> SrcOffset, void *Dest,
1934  sycl::range<3> DestOffset,
1935  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1936  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1937  sycl::range<3> CopyExtent, event DepEvent,
1939  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1940  return submit(
1941  [&](handler &CGH) {
1942  CGH.depends_on(DepEvent);
1943  CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc,
1944  DeviceRowPitch, HostExtent, CopyExtent);
1945  },
1946  CodeLoc);
1947  }
1948 
1963  void *Src, void *Dest,
1964  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1965  size_t DeviceRowPitch, const std::vector<event> &DepEvents,
1967  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1968  return submit(
1969  [&](handler &CGH) {
1970  CGH.depends_on(DepEvents);
1971  CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch);
1972  },
1973  CodeLoc);
1974  }
1975 
2001  void *Src, sycl::range<3> SrcOffset, void *Dest,
2002  sycl::range<3> DestOffset,
2003  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
2004  size_t DeviceRowPitch, sycl::range<3> HostExtent,
2005  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
2007  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2008  return submit(
2009  [&](handler &CGH) {
2010  CGH.depends_on(DepEvents);
2011  CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc,
2012  DeviceRowPitch, HostExtent, CopyExtent);
2013  },
2014  CodeLoc);
2015  }
2016 
2025  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2026  return submit(
2027  [&](handler &CGH) {
2028  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
2029  },
2030  CodeLoc);
2031  }
2032 
2041  event DepEvent,
2043  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2044  return submit(
2045  [&](handler &CGH) {
2046  CGH.depends_on(DepEvent);
2047  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
2048  },
2049  CodeLoc);
2050  }
2051 
2061  const std::vector<event> &DepEvents,
2063  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2064  return submit(
2065  [&](handler &CGH) {
2066  CGH.depends_on(DepEvents);
2067  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
2068  },
2069  CodeLoc);
2070  }
2071 
2081  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2082  return submit(
2083  [&](handler &CGH) {
2084  CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle);
2085  },
2086  CodeLoc);
2087  }
2088 
2098  event DepEvent,
2100  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2101  return submit(
2102  [&](handler &CGH) {
2103  CGH.depends_on(DepEvent);
2104  CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle);
2105  },
2106  CodeLoc);
2107  }
2108 
2119  const std::vector<event> &DepEvents,
2121  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2122  return submit(
2123  [&](handler &CGH) {
2124  CGH.depends_on(DepEvents);
2125  CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle);
2126  },
2127  CodeLoc);
2128  }
2129 
2135  template <typename KernelName = detail::auto_name, typename KernelType,
2136  typename PropertiesT>
2137  std::enable_if_t<
2140  PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2142  static_assert(
2143  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2144  void()>::value ||
2145  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2146  void(kernel_handler)>::value),
2147  "sycl::queue.single_task() requires a kernel instead of command group. "
2148  "Use queue.submit() instead");
2149 
2150  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2151  return submit(
2152  [&](handler &CGH) {
2153  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2154  Properties, KernelFunc);
2155  },
2156  CodeLoc);
2157  }
2158 
2163  template <typename KernelName = detail::auto_name, typename KernelType>
2165  _KERNELFUNCPARAM(KernelFunc),
2167  return single_task<KernelName, KernelType>(
2168  ext::oneapi::experimental::empty_properties_t{}, KernelFunc, CodeLoc);
2169  }
2170 
2177  template <typename KernelName = detail::auto_name, typename KernelType,
2178  typename PropertiesT>
2179  std::enable_if_t<
2182  event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2184  static_assert(
2185  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2186  void()>::value ||
2187  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2188  void(kernel_handler)>::value),
2189  "sycl::queue.single_task() requires a kernel instead of command group. "
2190  "Use queue.submit() instead");
2191 
2192  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2193  return submit(
2194  [&](handler &CGH) {
2195  CGH.depends_on(DepEvent);
2196  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2197  Properties, KernelFunc);
2198  },
2199  CodeLoc);
2200  }
2201 
2207  template <typename KernelName = detail::auto_name, typename KernelType>
2209  event DepEvent, _KERNELFUNCPARAM(KernelFunc),
2211  return single_task<KernelName, KernelType>(
2212  DepEvent, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2213  CodeLoc);
2214  }
2215 
2223  template <typename KernelName = detail::auto_name, typename KernelType,
2224  typename PropertiesT>
2225  std::enable_if_t<
2228  const std::vector<event> &DepEvents, PropertiesT Properties,
2229  _KERNELFUNCPARAM(KernelFunc),
2231  static_assert(
2232  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2233  void()>::value ||
2234  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2235  void(kernel_handler)>::value),
2236  "sycl::queue.single_task() requires a kernel instead of command group. "
2237  "Use queue.submit() instead");
2238 
2239  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2240  return submit(
2241  [&](handler &CGH) {
2242  CGH.depends_on(DepEvents);
2243  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2244  Properties, KernelFunc);
2245  },
2246  CodeLoc);
2247  }
2248 
2255  template <typename KernelName = detail::auto_name, typename KernelType>
2257  const std::vector<event> &DepEvents, _KERNELFUNCPARAM(KernelFunc),
2259  return single_task<KernelName, KernelType>(
2260  DepEvents, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2261  CodeLoc);
2262  }
2263 
2270  template <typename KernelName = detail::auto_name, typename... RestT>
2271  event parallel_for(range<1> Range, RestT &&...Rest) {
2272  return parallel_for_impl<KernelName>(Range, Rest...);
2273  }
2274 
2281  template <typename KernelName = detail::auto_name, typename... RestT>
2282  event parallel_for(range<2> Range, RestT &&...Rest) {
2283  return parallel_for_impl<KernelName>(Range, Rest...);
2284  }
2285 
2292  template <typename KernelName = detail::auto_name, typename... RestT>
2293  event parallel_for(range<3> Range, RestT &&...Rest) {
2294  return parallel_for_impl<KernelName>(Range, Rest...);
2295  }
2296 
2304  template <typename KernelName = detail::auto_name, typename... RestT>
2305  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
2306  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2307  }
2308 
2316  template <typename KernelName = detail::auto_name, typename... RestT>
2317  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
2318  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2319  }
2320 
2328  template <typename KernelName = detail::auto_name, typename... RestT>
2329  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
2330  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2331  }
2332 
2341  template <typename KernelName = detail::auto_name, typename... RestT>
2342  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
2343  RestT &&...Rest) {
2344  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2345  }
2346 
2355  template <typename KernelName = detail::auto_name, typename... RestT>
2356  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
2357  RestT &&...Rest) {
2358  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2359  }
2360 
2369  template <typename KernelName = detail::auto_name, typename... RestT>
2370  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
2371  RestT &&...Rest) {
2372  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2373  }
2374 
2375  // While other shortcuts with offsets are able to go through parallel_for(...,
2376  // RestT &&...Rest), those that accept dependency events vector have to be
2377  // overloaded to allow implicit construction from an init-list.
2385  template <typename KernelName = detail::auto_name, typename KernelType,
2386  int Dim>
2387  event parallel_for(range<Dim> Range, id<Dim> WorkItemOffset,
2388  const std::vector<event> &DepEvents,
2389  _KERNELFUNCPARAM(KernelFunc)) {
2390  static_assert(1 <= Dim && Dim <= 3, "Invalid number of dimensions");
2391  return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
2392  KernelFunc);
2393  }
2394 
2402  template <typename KernelName = detail::auto_name, typename KernelType,
2403  int Dims>
2404  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2405  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2406  _KERNELFUNCPARAM(KernelFunc)) {
2407  // Actual code location needs to be captured from KernelInfo object.
2408  const detail::code_location CodeLoc = {};
2409  return submit(
2410  [&](handler &CGH) {
2411  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2412  KernelFunc);
2413  },
2414  CodeLoc);
2415  }
2416 
2425  template <typename KernelName = detail::auto_name, typename KernelType,
2426  int Dims>
2427  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2428  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2429  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
2430  // Actual code location needs to be captured from KernelInfo object.
2431  const detail::code_location CodeLoc = {};
2432  return submit(
2433  [&](handler &CGH) {
2434  CGH.depends_on(DepEvent);
2435  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2436  KernelFunc);
2437  },
2438  CodeLoc);
2439  }
2440 
2450  template <typename KernelName = detail::auto_name, typename KernelType,
2451  int Dims>
2452  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2453  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2454  const std::vector<event> &DepEvents,
2455  _KERNELFUNCPARAM(KernelFunc)) {
2456  // Actual code location needs to be captured from KernelInfo object.
2457  const detail::code_location CodeLoc = {};
2458  return submit(
2459  [&](handler &CGH) {
2460  CGH.depends_on(DepEvents);
2461  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2462  KernelFunc);
2463  },
2464  CodeLoc);
2465  }
2466 
2474  template <typename KernelName = detail::auto_name, int Dims,
2475  typename PropertiesT, typename... RestT>
2476  std::enable_if_t<
2477  detail::AreAllButLastReductions<RestT...>::value &&
2479  event>
2480  parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2481  using KI = sycl::detail::KernelInfo<KernelName>;
2482  constexpr detail::code_location CodeLoc(
2483  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2484  KI::getColumnNumber());
2485  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2486  return submit(
2487  [&](handler &CGH) {
2488  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2489  },
2490  CodeLoc);
2491  }
2492 
2499  template <typename KernelName = detail::auto_name, int Dims,
2500  typename... RestT>
2501  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2502  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
2503  return parallel_for<KernelName>(
2505  }
2506 
2514  template <typename KernelName = detail::auto_name, int Dims,
2515  typename... RestT>
2516  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
2517  using KI = sycl::detail::KernelInfo<KernelName>;
2518  constexpr detail::code_location CodeLoc(
2519  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2520  KI::getColumnNumber());
2521  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2522  return submit(
2523  [&](handler &CGH) {
2524  CGH.depends_on(DepEvent);
2525  CGH.template parallel_for<KernelName>(Range, Rest...);
2526  },
2527  CodeLoc);
2528  }
2529 
2538  template <typename KernelName = detail::auto_name, int Dims,
2539  typename... RestT>
2540  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
2541  RestT &&...Rest) {
2542  using KI = sycl::detail::KernelInfo<KernelName>;
2543  constexpr detail::code_location CodeLoc(
2544  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2545  KI::getColumnNumber());
2546  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2547  return submit(
2548  [&](handler &CGH) {
2549  CGH.depends_on(DepEvents);
2550  CGH.template parallel_for<KernelName>(Range, Rest...);
2551  },
2552  CodeLoc);
2553  }
2554 
2561  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2562  access::placeholder IsPlaceholder, typename DestT>
2563  event copy(
2565  std::shared_ptr<DestT> Dest,
2567  return submit(
2568  [&](handler &CGH) {
2569  CGH.require(Src);
2570  CGH.copy(Src, Dest);
2571  },
2572  CodeLoc);
2573  }
2574 
2581  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2583  event copy(
2584  std::shared_ptr<SrcT> Src,
2587  return submit(
2588  [&](handler &CGH) {
2589  CGH.require(Dest);
2590  CGH.copy(Src, Dest);
2591  },
2592  CodeLoc);
2593  }
2594 
2601  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2602  access::placeholder IsPlaceholder, typename DestT>
2603  event copy(
2606  return submit(
2607  [&](handler &CGH) {
2608  CGH.require(Src);
2609  CGH.copy(Src, Dest);
2610  },
2611  CodeLoc);
2612  }
2613 
2620  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2622  event copy(
2623  const SrcT *Src,
2626  return submit(
2627  [&](handler &CGH) {
2628  CGH.require(Dest);
2629  CGH.copy(Src, Dest);
2630  },
2631  CodeLoc);
2632  }
2633 
2640  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2641  access::placeholder IsSrcPlaceholder, typename DestT, int DestDims,
2642  access_mode DestMode, target DestTgt,
2643  access::placeholder IsDestPlaceholder>
2644  event copy(
2648  return submit(
2649  [&](handler &CGH) {
2650  CGH.require(Src);
2651  CGH.require(Dest);
2652  CGH.copy(Src, Dest);
2653  },
2654  CodeLoc);
2655  }
2656 
2662  template <typename T, int Dims, access_mode Mode, target Tgt,
2667  return submit(
2668  [&](handler &CGH) {
2669  CGH.require(Acc);
2670  CGH.update_host(Acc);
2671  },
2672  CodeLoc);
2673  }
2674 
2681  template <typename T, int Dims, access_mode Mode, target Tgt,
2683  event fill(
2686  return submit(
2687  [&](handler &CGH) {
2688  CGH.require(Dest);
2689  CGH.fill<T>(Dest, Src);
2690  },
2691  CodeLoc);
2692  }
2693 
2700  bool ext_codeplay_supports_fusion() const;
2701 
2702 // Clean KERNELFUNC macros.
2703 #undef _KERNELFUNCPARAM
2704 
2712  Graph,
2714  return submit([&](handler &CGH) { CGH.ext_oneapi_graph(Graph); }, CodeLoc);
2715  }
2716 
2726  Graph,
2727  event DepEvent,
2729  return submit(
2730  [&](handler &CGH) {
2731  CGH.depends_on(DepEvent);
2732  CGH.ext_oneapi_graph(Graph);
2733  },
2734  CodeLoc);
2735  }
2736 
2746  Graph,
2747  const std::vector<event> &DepEvents,
2749  return submit(
2750  [&](handler &CGH) {
2751  CGH.depends_on(DepEvents);
2752  CGH.ext_oneapi_graph(Graph);
2753  },
2754  CodeLoc);
2755  }
2756 
2760  bool is_in_order() const;
2761 
2765  backend get_backend() const noexcept;
2766 
2771  bool ext_oneapi_empty() const;
2772 
2773  pi_native_handle getNative(int32_t &NativeHandleDesc) const;
2774 
2775  event ext_oneapi_get_last_event() const;
2776 
2777  void ext_oneapi_set_external_event(const event &external_event);
2778 
2779 private:
2780  std::shared_ptr<detail::queue_impl> impl;
2781  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
2782 
2783  template <class Obj>
2784  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
2785  template <class T>
2786  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
2787 
2788  template <backend BackendName, class SyclObjectT>
2789  friend auto get_native(const SyclObjectT &Obj)
2791 
2792 #if __SYCL_USE_FALLBACK_ASSERT
2793  friend event detail::submitAssertCapture(queue &, event &, queue *,
2794  const detail::code_location &);
2795 #endif
2796 
2798  event submit_impl(std::function<void(handler &)> CGH,
2799  const detail::code_location &CodeLoc);
2801  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
2802  const detail::code_location &CodeLoc);
2803 
2807  event discard_or_return(const event &Event);
2808 
2809  // Function to postprocess submitted command
2810  // Arguments:
2811  // bool IsKernel - true if the submitted command was kernel, false otherwise
2812  // bool KernelUsesAssert - true if submitted kernel uses assert, only
2813  // meaningful when IsKernel is true
2814  // event &Event - event after which post processing should be executed
2815  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
2816 
2822  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2823  const detail::code_location &CodeLoc,
2824  const SubmitPostProcessF &PostProcess);
2831  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2832  queue secondQueue,
2833  const detail::code_location &CodeLoc,
2834  const SubmitPostProcessF &PostProcess);
2835 
2842  template <typename KernelName, int Dims, typename PropertiesT,
2843  typename... RestT>
2844  std::enable_if_t<
2845  detail::AreAllButLastReductions<RestT...>::value &&
2847  event>
2848  parallel_for_impl(range<Dims> Range, PropertiesT Properties,
2849  RestT &&...Rest) {
2850  using KI = sycl::detail::KernelInfo<KernelName>;
2851  constexpr detail::code_location CodeLoc(
2852  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2853  KI::getColumnNumber());
2854  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2855  return submit(
2856  [&](handler &CGH) {
2857  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2858  },
2859  CodeLoc);
2860  }
2861 
2867  template <typename KernelName, int Dims, typename... RestT>
2868  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2869  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
2870  return parallel_for_impl<KernelName>(
2872  }
2873 
2881  template <typename KernelName, int Dims, typename PropertiesT,
2882  typename... RestT>
2883  std::enable_if_t<
2884  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2885  parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2886  RestT &&...Rest) {
2887  using KI = sycl::detail::KernelInfo<KernelName>;
2888  constexpr detail::code_location CodeLoc(
2889  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2890  KI::getColumnNumber());
2891  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2892  return submit(
2893  [&](handler &CGH) {
2894  CGH.depends_on(DepEvent);
2895  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2896  },
2897  CodeLoc);
2898  }
2899 
2906  template <typename KernelName, int Dims, typename... RestT>
2907  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2908  return parallel_for_impl<KernelName>(
2910  Rest...);
2911  }
2912 
2921  template <typename KernelName, int Dims, typename PropertiesT,
2922  typename... RestT>
2923  std::enable_if_t<
2924  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2925  parallel_for_impl(range<Dims> Range, const std::vector<event> &DepEvents,
2926  PropertiesT Properties, RestT &&...Rest) {
2927  using KI = sycl::detail::KernelInfo<KernelName>;
2928  constexpr detail::code_location CodeLoc(
2929  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2930  KI::getColumnNumber());
2931  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2932  return submit(
2933  [&](handler &CGH) {
2934  CGH.depends_on(DepEvents);
2935  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2936  },
2937  CodeLoc);
2938  }
2939 
2947  template <typename KernelName, int Dims, typename... RestT>
2948  event parallel_for_impl(range<Dims> Range,
2949  const std::vector<event> &DepEvents,
2950  RestT &&...Rest) {
2951  return parallel_for_impl<KernelName>(
2953  Rest...);
2954  }
2955 
2956  event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
2957  bool IsDeviceImageScope, size_t NumBytes,
2958  size_t Offset,
2959  const std::vector<event> &DepEvents);
2960  event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
2961  bool IsDeviceImageScope, size_t NumBytes,
2962  size_t Offset,
2963  const std::vector<event> &DepEvents);
2964 };
2965 
2966 } // namespace _V1
2967 } // namespace sycl
2968 
2969 namespace std {
2970 template <> struct __SYCL_EXPORT hash<sycl::queue> {
2971  size_t operator()(const sycl::queue &Q) const;
2972 };
2973 } // namespace std
2974 
2975 #if __SYCL_USE_FALLBACK_ASSERT
2976 // Explicitly request format macros
2977 #ifndef __STDC_FORMAT_MACROS
2978 #define __STDC_FORMAT_MACROS 1
2979 #endif
2980 #include <cinttypes>
2981 
2982 namespace sycl {
2983 inline namespace _V1 {
2984 
2985 namespace detail {
2986 #define __SYCL_ASSERT_START 1
2987 
2988 namespace __sycl_service_kernel__ {
2989 class AssertInfoCopier;
2990 } // namespace __sycl_service_kernel__
2991 
3003 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
3004  const detail::code_location &CodeLoc) {
3005  buffer<detail::AssertHappened, 1> Buffer{1};
3006 
3007  event CopierEv, CheckerEv, PostCheckerEv;
3008  auto CopierCGF = [&](handler &CGH) {
3009  CGH.depends_on(Event);
3010 
3011  auto Acc = Buffer.get_access<access::mode::write>(CGH);
3012 
3013  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
3014 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
3015  __devicelib_assert_read(&Acc[0]);
3016 #else
3017  (void)Acc;
3018 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
3019  });
3020  };
3021  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
3022  CGH.depends_on(CopierEv);
3023  using mode = access::mode;
3024  using target = access::target;
3025 
3026  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
3027 
3028  CGH.host_task([=] {
3029  const detail::AssertHappened *AH = &Acc[0];
3030 
3031  // Don't use assert here as msvc will insert reference to __imp__wassert
3032  // which won't be properly resolved in separate compile use-case
3033 #ifndef NDEBUG
3034  if (AH->Flag == __SYCL_ASSERT_START)
3035  throw sycl::exception(
3037  "Internal Error. Invalid value in assert description.");
3038 #endif
3039 
3040  if (AH->Flag) {
3041  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
3042  const char *File = AH->File[0] ? AH->File : "<unknown file>";
3043  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
3044 
3045  fprintf(stderr,
3046  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
3047  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
3048  "Assertion `%s` failed.\n",
3049  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
3050  AH->LID1, AH->LID2, Expr);
3051  fflush(stderr);
3052  abort(); // no need to release memory as it's abort anyway
3053  }
3054  });
3055  };
3056 
3057  if (SecondaryQueue) {
3058  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
3059  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
3060  } else {
3061  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
3062  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
3063  }
3064 
3065  return CheckerEv;
3066 }
3067 #undef __SYCL_ASSERT_START
3068 } // namespace detail
3069 
3070 } // namespace _V1
3071 } // namespace sycl
3072 #endif // __SYCL_USE_FALLBACK_ASSERT
The file contains implementations of accessor class.
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:45
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:458
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1427
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: handler.cpp:1383
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2594
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1604
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:946
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: handler.cpp:1015
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
Definition: handler.hpp:1905
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2823
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
Definition: handler.hpp:2792
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: memcpy2d.hpp:18
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:963
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: handler.cpp:1393
A unique identifier of an item in an index space.
Definition: id.hpp:36
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:111
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2603
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1253
void wait(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:435
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1799
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:525
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:117
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:503
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:815
queue & operator=(const queue &RHS)=default
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:783
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2139
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from device to device memory, where Src and Dest are opaque image memory handles.
Definition: queue.hpp:1850
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from device to device memory, where Src and Dest are opaque image memory handles.
Definition: queue.hpp:1898
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:684
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:180
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2342
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2370
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1636
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1610
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2227
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2271
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1932
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const device_selector &DeviceSelector
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
Definition: queue.hpp:2387
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1150
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
Definition: queue.hpp:2583
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:164
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with multiple dependencies.
Definition: queue.hpp:2743
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: queue.hpp:2096
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, both pointed by placeholder accessors.
Definition: queue.hpp:2644
event copy(const T *Src, T *Dest, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:642
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2305
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1517
queue(queue &&RHS)=default
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:2022
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2329
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2293
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc, const detail::code_location &CodeLoc=detail::code_location::current())
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
Definition: queue.hpp:2664
event copy(const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:663
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:2000
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1453
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:2039
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1700
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1374
event prefetch(const void *Ptr, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:762
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1672
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:150
__SYCL2020_DEPRECATED("is_host() is deprecated as the host device is no longer supported.") bool is_host() const
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1487
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1230
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1326
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2282
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:125
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2540
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1552
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2516
queue & operator=(queue &&RHS)=default
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1962
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:137
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands.
Definition: queue.hpp:2709
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: queue.hpp:2078
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const context &SyclContext
Constructs a SYCL queue instance that is associated with the context provided, using the device retur...
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1396
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2208
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:2059
event single_task(_KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2164
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified data.
Definition: queue.hpp:2683
bool operator==(const queue &RHS) const
Definition: queue.hpp:291
queue(const queue &RHS)=default
Constructs a SYCL queue with an optional async_handler from an OpenCL cl_command_queue.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from device to device memory, where Src and Dest are opaque image memory handles.
Definition: queue.hpp:1875
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1209
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1130
void wait_and_throw(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:449
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2256
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1764
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1301
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1090
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with a single dependency.
Definition: queue.hpp:2723
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2502
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1278
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
Definition: queue.hpp:2622
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2563
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1579
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2181
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1827
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2317
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1737
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1351
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: queue.hpp:2117
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:381
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1425
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1171
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2356
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2480
bool operator!=(const queue &RHS) const
Definition: queue.hpp:293
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:39
void defaultAsyncHandler(exception_list Exceptions)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:48
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
@ modifiable
In modifiable state, commands can be added to graph.
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
constexpr device_has_key::value_t< Aspects... > device_has
Definition: properties.hpp:127
static constexpr auto get_property()
void submit(queue Q, CommandGroupFunc &&CGF)
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
decltype(properties{}) empty_properties_t
Definition: properties.hpp:190
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
pointer get() const
Definition: multi_ptr.hpp:544
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:136
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
Definition: backend.hpp:87
class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:91
Definition: access.hpp:18
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:777
uintptr_t pi_native_handle
Definition: pi.h:217
_pi_mem_advice
Definition: pi.h:599
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:63
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Predicate returning true if all template type parameters except the last one are reductions.
static constexpr code_location current(const char *fileName=__CODELOC_FILE_NAME, const char *funcName=__CODELOC_FUNCTION, unsigned long lineNo=__CODELOC_LINE, unsigned long columnNo=__CODELOC_COLUMN) noexcept
Definition: common.hpp:68
A struct to describe the properties of an image.