DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, access...
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aspects.hpp> // for aspect
14 #include <sycl/async_handler.hpp> // for async_handler
15 #include <sycl/backend_types.hpp> // for backend, backe...
16 #include <sycl/buffer.hpp> // for buffer
17 #include <sycl/context.hpp> // for context
18 #include <sycl/detail/assert_happened.hpp> // for AssertHappened
19 #include <sycl/detail/cg_types.hpp> // for check_fn_signa...
20 #include <sycl/detail/common.hpp> // for code_location
21 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEP...
22 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
23 #include <sycl/detail/info_desc_helpers.hpp> // for is_queue_info_...
24 #include <sycl/detail/kernel_desc.hpp> // for KernelInfo
25 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
26 #include <sycl/detail/pi.h> // for pi_mem_advice
27 #include <sycl/device.hpp> // for device
28 #include <sycl/device_selector.hpp> // for device_selector
29 #include <sycl/event.hpp> // for event
30 #include <sycl/exception.hpp> // for make_error_code
31 #include <sycl/exception_list.hpp> // for defaultAsyncHa...
32 #include <sycl/ext/oneapi/bindless_images_descriptor.hpp> // for image_descriptor
33 #include <sycl/ext/oneapi/bindless_images_interop.hpp> // for interop_semaph...
34 #include <sycl/ext/oneapi/bindless_images_memory.hpp> // for image_mem_handle
35 #include <sycl/ext/oneapi/device_global/device_global.hpp> // for device_global
36 #include <sycl/ext/oneapi/device_global/properties.hpp> // for device_image_s...
37 #include <sycl/ext/oneapi/experimental/graph.hpp> // for command_graph...
38 #include <sycl/ext/oneapi/properties/properties.hpp> // for empty_properti...
39 #include <sycl/handler.hpp> // for handler, isDev...
40 #include <sycl/id.hpp> // for id
41 #include <sycl/kernel.hpp> // for auto_name
42 #include <sycl/kernel_handler.hpp> // for kernel_handler
43 #include <sycl/nd_range.hpp> // for nd_range
44 #include <sycl/property_list.hpp> // for property_list
45 #include <sycl/range.hpp> // for range
46 
47 #include <cstddef> // for size_t
48 #include <functional> // for function
49 #include <memory> // for shared_ptr, hash
50 #include <stdint.h> // for int32_t
51 #include <tuple> // for tuple
52 #include <type_traits> // for remove_all_ext...
53 #include <variant> // for hash
54 #include <vector> // for vector
55 
56 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
57 // Here we simplify the KernelFunc param is simplified to be
58 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
59 // these macros are #undef immediately.
60 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
61 // or const KernelType &KernelFunc
62 #ifdef __SYCL_NONCONST_FUNCTOR__
63 #define _KERNELFUNCPARAM(a) KernelType a
64 #else
65 #define _KERNELFUNCPARAM(a) const KernelType &a
66 #endif
67 
68 // Helper macro to identify if fallback assert is needed
69 // FIXME remove __NVPTX__ condition once devicelib supports CUDA
70 #if defined(SYCL_FALLBACK_ASSERT)
71 #define __SYCL_USE_FALLBACK_ASSERT SYCL_FALLBACK_ASSERT
72 #else
73 #define __SYCL_USE_FALLBACK_ASSERT 0
74 #endif
75 
76 namespace sycl {
77 inline namespace _V1 {
78 
79 // Forward declaration
80 class context;
81 class device;
82 class event;
83 class queue;
84 
85 template <backend BackendName, class SyclObjectT>
86 auto get_native(const SyclObjectT &Obj)
87  -> backend_return_t<BackendName, SyclObjectT>;
88 
89 namespace detail {
90 class queue_impl;
91 
92 #if __SYCL_USE_FALLBACK_ASSERT
93 inline event submitAssertCapture(queue &, event &, queue *,
94  const detail::code_location &);
95 #endif
96 } // namespace detail
97 
98 namespace ext {
99 namespace oneapi {
100 namespace experimental {
101 // State of a queue with regards to graph recording,
102 // returned by info::queue::state
104 } // namespace experimental
105 } // namespace oneapi
106 } // namespace ext
107 
119 class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
120 public:
125  explicit queue(const property_list &PropList = {})
127 
133  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
134  : queue(default_selector_v, AsyncHandler, PropList) {}
135 
142  template <typename DeviceSelector,
143  typename =
144  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
145  explicit queue(const DeviceSelector &deviceSelector,
146  const async_handler &AsyncHandler,
147  const property_list &PropList = {})
148  : queue(detail::select_device(deviceSelector), AsyncHandler, PropList) {}
149 
155  template <typename DeviceSelector,
156  typename =
157  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
158  explicit queue(const DeviceSelector &deviceSelector,
159  const property_list &PropList = {})
160  : queue(detail::select_device(deviceSelector),
161  detail::defaultAsyncHandler, PropList) {}
162 
169  template <typename DeviceSelector,
170  typename =
171  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
172  explicit queue(const context &syclContext,
173  const DeviceSelector &deviceSelector,
174  const property_list &propList = {})
175  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
176  propList) {}
177 
185  template <typename DeviceSelector,
186  typename =
187  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
188  explicit queue(const context &syclContext,
189  const DeviceSelector &deviceSelector,
190  const async_handler &AsyncHandler,
191  const property_list &propList = {})
192  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
193  AsyncHandler, propList) {}
194 
200  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
201  "use SYCL 2020 device selectors instead.")
202  queue(const device_selector &DeviceSelector,
203  const property_list &PropList = {})
204  : queue(DeviceSelector.select_device(), detail::defaultAsyncHandler,
205  PropList) {}
206 
213  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
214  "use SYCL 2020 device selectors instead.")
215  queue(const device_selector &DeviceSelector,
216  const async_handler &AsyncHandler, const property_list &PropList = {})
217  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
218 
223  explicit queue(const device &SyclDevice, const property_list &PropList = {})
224  : queue(SyclDevice, detail::defaultAsyncHandler, PropList) {}
225 
232  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
233  const property_list &PropList = {});
234 
241  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
242  "use SYCL 2020 device selectors instead.")
243  queue(const context &SyclContext, const device_selector &DeviceSelector,
244  const property_list &PropList = {});
245 
254  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
255  "use SYCL 2020 device selectors instead.")
256  queue(const context &SyclContext, const device_selector &DeviceSelector,
257  const async_handler &AsyncHandler, const property_list &PropList = {});
258 
265  queue(const context &SyclContext, const device &SyclDevice,
266  const property_list &PropList = {});
267 
275  queue(const context &SyclContext, const device &SyclDevice,
276  const async_handler &AsyncHandler, const property_list &PropList = {});
277 
286 #ifdef __SYCL_INTERNAL_API
287  queue(cl_command_queue ClQueue, const context &SyclContext,
288  const async_handler &AsyncHandler = {});
289 #endif
290 
291  queue(const queue &RHS) = default;
292 
293  queue(queue &&RHS) = default;
294 
295  queue &operator=(const queue &RHS) = default;
296 
297  queue &operator=(queue &&RHS) = default;
298 
299  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
300 
301  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
302 
305 #ifdef __SYCL_INTERNAL_API
306  cl_command_queue get() const;
307 #endif
308 
310  context get_context() const;
311 
313  device get_device() const;
314 
316  ext::oneapi::experimental::queue_state ext_oneapi_get_state() const;
317 
321  ext_oneapi_get_graph() const;
322 
325  "is_host() is deprecated as the host device is no longer supported.")
326  bool is_host() const;
327 
331  template <typename Param>
332  typename detail::is_queue_info_desc<Param>::return_type get_info() const;
333 
334 private:
335  // A shorthand for `get_device().has()' which is expected to be a bit quicker
336  // than the long version
337  bool device_has(aspect Aspect) const;
338 
339 public:
346  template <typename T>
347  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
348  T CGF,
349  const detail::code_location &CodeLoc = detail::code_location::current()) {
350  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
351 #if __SYCL_USE_FALLBACK_ASSERT
352  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
353  event &E) {
354  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
355  KernelUsesAssert && !device_has(aspect::accelerator)) {
356  // __devicelib_assert_fail isn't supported by Device-side Runtime
357  // Linking against fallback impl of __devicelib_assert_fail is
358  // performed by program manager class
359  // Fallback assert isn't supported for FPGA
360  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc);
361  }
362  };
363 
364  return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
365 #else
366  return submit_impl(CGF, CodeLoc);
367 #endif // __SYCL_USE_FALLBACK_ASSERT
368  }
369 
381  template <typename T>
382  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
383  T CGF, queue &SecondaryQueue,
385  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
386 #if __SYCL_USE_FALLBACK_ASSERT
387  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
388  bool IsKernel, bool KernelUsesAssert, event &E) {
389  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
390  KernelUsesAssert && !device_has(aspect::accelerator)) {
391  // Only secondary queues on devices need to be added to the assert
392  // capture.
393  // __devicelib_assert_fail isn't supported by Device-side Runtime
394  // Linking against fallback impl of __devicelib_assert_fail is
395  // performed by program manager class
396  // Fallback assert isn't supported for FPGA
397  submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc);
398  }
399  };
400 
401  return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
402  PostProcess);
403 #else
404  return submit_impl(CGF, SecondaryQueue, CodeLoc);
405 #endif // __SYCL_USE_FALLBACK_ASSERT
406  }
407 
415  event ext_oneapi_submit_barrier(
417 
427  event ext_oneapi_submit_barrier(
428  const std::vector<event> &WaitList,
430 
436  void wait(
438  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
439  wait_proxy(CodeLoc);
440  }
441 
452  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
453  wait_and_throw_proxy(CodeLoc);
454  }
455 
458  void wait_proxy(const detail::code_location &CodeLoc);
461  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
462 
468  void throw_asynchronous();
469 
472  template <typename PropertyT> bool has_property() const noexcept;
473 
477  template <typename PropertyT> PropertyT get_property() const;
478 
486  template <typename T>
488  void *Ptr, const T &Pattern, size_t Count,
489  const detail::code_location &CodeLoc = detail::code_location::current()) {
490  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
491  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); },
492  CodeLoc);
493  }
494 
503  template <typename T>
504  event fill(
505  void *Ptr, const T &Pattern, size_t Count, event DepEvent,
507  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
508  return submit(
509  [&](handler &CGH) {
510  CGH.depends_on(DepEvent);
511  CGH.fill<T>(Ptr, Pattern, Count);
512  },
513  CodeLoc);
514  }
515 
525  template <typename T>
526  event fill(
527  void *Ptr, const T &Pattern, size_t Count,
528  const std::vector<event> &DepEvents,
530  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
531  return submit(
532  [&](handler &CGH) {
533  CGH.depends_on(DepEvents);
534  CGH.fill<T>(Ptr, Pattern, Count);
535  },
536  CodeLoc);
537  }
538 
548  event memset(
549  void *Ptr, int Value, size_t Count,
551 
562  event memset(
563  void *Ptr, int Value, size_t Count, event DepEvent,
565 
577  event memset(
578  void *Ptr, int Value, size_t Count, const std::vector<event> &DepEvents,
580 
592  event memcpy(
593  void *Dest, const void *Src, size_t Count,
595 
608  event memcpy(
609  void *Dest, const void *Src, size_t Count, event DepEvent,
611 
625  event memcpy(
626  void *Dest, const void *Src, size_t Count,
627  const std::vector<event> &DepEvents,
629 
642  template <typename T>
643  event copy(
644  const T *Src, T *Dest, size_t Count,
646  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
647  return this->memcpy(Dest, Src, Count * sizeof(T));
648  }
649 
663  template <typename T>
664  event copy(
665  const T *Src, T *Dest, size_t Count, event DepEvent,
667  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
668  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
669  }
670 
684  template <typename T>
685  event copy(
686  const T *Src, T *Dest, size_t Count, const std::vector<event> &DepEvents,
688  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
689  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
690  }
691 
699  __SYCL2020_DEPRECATED("use the overload with int Advice instead")
700  event mem_advise(
701  const void *Ptr, size_t Length, pi_mem_advice Advice,
702  const detail::code_location &CodeLoc = detail::code_location::current());
703 
711  event mem_advise(
712  const void *Ptr, size_t Length, int Advice,
713  const detail::code_location &CodeLoc = detail::code_location::current());
714 
723  event mem_advise(
724  const void *Ptr, size_t Length, int Advice, event DepEvent,
725  const detail::code_location &CodeLoc = detail::code_location::current());
726 
736  event mem_advise(
737  const void *Ptr, size_t Length, int Advice,
738  const std::vector<event> &DepEvents,
739  const detail::code_location &CodeLoc = detail::code_location::current());
740 
749  const void *Ptr, size_t Count,
750  const detail::code_location &CodeLoc = detail::code_location::current()) {
751  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
752  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc);
753  }
754 
763  event prefetch(
764  const void *Ptr, size_t Count, event DepEvent,
766  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
767  return submit(
768  [=](handler &CGH) {
769  CGH.depends_on(DepEvent);
770  CGH.prefetch(Ptr, Count);
771  },
772  CodeLoc);
773  }
774 
784  event prefetch(
785  const void *Ptr, size_t Count, const std::vector<event> &DepEvents,
787  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
788  return submit(
789  [=](handler &CGH) {
790  CGH.depends_on(DepEvents);
791  CGH.prefetch(Ptr, Count);
792  },
793  CodeLoc);
794  }
795 
814  template <typename T = unsigned char,
815  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
817  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
818  size_t Width, size_t Height,
820  return submit(
821  [=](handler &CGH) {
822  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width,
823  Height);
824  },
825  CodeLoc);
826  }
827 
847  template <typename T = unsigned char,
848  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
850  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
851  size_t Width, size_t Height, event DepEvent,
853  return submit(
854  [=](handler &CGH) {
855  CGH.depends_on(DepEvent);
856  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width,
857  Height);
858  },
859  CodeLoc);
860  }
861 
882  template <typename T = unsigned char,
883  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
885  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
886  size_t Width, size_t Height, const std::vector<event> &DepEvents,
888  return submit(
889  [=](handler &CGH) {
890  CGH.depends_on(DepEvents);
891  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width,
892  Height);
893  },
894  CodeLoc);
895  }
896 
912  template <typename T>
914  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
915  size_t Height,
917  return submit(
918  [=](handler &CGH) {
919  CGH.ext_oneapi_copy2d<T>(Src, SrcPitch, Dest, DestPitch, Width,
920  Height);
921  },
922  CodeLoc);
923  }
924 
941  template <typename T>
943  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
944  size_t Height, event DepEvent,
946  return submit(
947  [=](handler &CGH) {
948  CGH.depends_on(DepEvent);
949  CGH.ext_oneapi_copy2d<T>(Src, SrcPitch, Dest, DestPitch, Width,
950  Height);
951  },
952  CodeLoc);
953  }
954 
972  template <typename T>
974  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
975  size_t Height, const std::vector<event> &DepEvents,
977  return submit(
978  [=](handler &CGH) {
979  CGH.depends_on(DepEvents);
980  CGH.ext_oneapi_copy2d<T>(Src, SrcPitch, Dest, DestPitch, Width,
981  Height);
982  },
983  CodeLoc);
984  }
985 
1002  template <typename T = unsigned char,
1003  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
1005  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
1007  return submit(
1008  [=](handler &CGH) {
1009  CGH.ext_oneapi_memset2d<T>(Dest, DestPitch, Value, Width, Height);
1010  },
1011  CodeLoc);
1012  }
1013 
1031  template <typename T = unsigned char,
1032  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
1034  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
1035  event DepEvent,
1037  return submit(
1038  [=](handler &CGH) {
1039  CGH.depends_on(DepEvent);
1040  CGH.ext_oneapi_memset2d<T>(Dest, DestPitch, Value, Width, Height);
1041  },
1042  CodeLoc);
1043  }
1044 
1063  template <typename T = unsigned char,
1064  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
1066  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
1067  const std::vector<event> &DepEvents,
1069  return submit(
1070  [=](handler &CGH) {
1071  CGH.depends_on(DepEvents);
1072  CGH.ext_oneapi_memset2d<T>(Dest, DestPitch, Value, Width, Height);
1073  },
1074  CodeLoc);
1075  }
1076 
1090  template <typename T>
1092  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1093  size_t Height,
1095  return submit(
1096  [=](handler &CGH) {
1097  CGH.ext_oneapi_fill2d<T>(Dest, DestPitch, Pattern, Width, Height);
1098  },
1099  CodeLoc);
1100  }
1101 
1116  template <typename T>
1118  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1119  size_t Height, event DepEvent,
1121  return submit(
1122  [=](handler &CGH) {
1123  CGH.depends_on(DepEvent);
1124  CGH.ext_oneapi_fill2d<T>(Dest, DestPitch, Pattern, Width, Height);
1125  },
1126  CodeLoc);
1127  }
1128 
1144  template <typename T>
1146  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1147  size_t Height, const std::vector<event> &DepEvents,
1149  return submit(
1150  [=](handler &CGH) {
1151  CGH.depends_on(DepEvents);
1152  CGH.ext_oneapi_fill2d<T>(Dest, DestPitch, Pattern, Width, Height);
1153  },
1154  CodeLoc);
1155  }
1156 
1169  template <typename T, typename PropertyListT>
1170  event memcpy(
1172  const void *Src, size_t NumBytes, size_t Offset,
1173  const std::vector<event> &DepEvents,
1175  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1176  if (sizeof(T) < Offset + NumBytes)
1178  "Copy to device_global is out of bounds.");
1179 
1180  if (!detail::isDeviceGlobalUsedInKernel(&Dest)) {
1181  // device_global is unregistered so we need a fallback. We let the handler
1182  // implement this fallback.
1183  return submit(
1184  [&](handler &CGH) {
1185  CGH.depends_on(DepEvents);
1186  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1187  },
1188  CodeLoc);
1189  }
1190 
1191  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1193  return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1194  Offset, DepEvents);
1195  }
1196 
1209  template <typename T, typename PropertyListT>
1210  event memcpy(
1212  const void *Src, size_t NumBytes, size_t Offset, event DepEvent,
1214  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1215  return this->memcpy(Dest, Src, NumBytes, Offset,
1216  std::vector<event>{DepEvent});
1217  }
1218 
1229  template <typename T, typename PropertyListT>
1230  event memcpy(
1232  const void *Src, size_t NumBytes = sizeof(T), size_t Offset = 0,
1234  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1235  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1236  }
1237 
1250  template <typename T, typename PropertyListT>
1251  event memcpy(
1252  void *Dest,
1254  size_t NumBytes, size_t Offset, const std::vector<event> &DepEvents,
1256  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1257  if (sizeof(T) < Offset + NumBytes)
1259  "Copy from device_global is out of bounds.");
1260 
1262  // device_global is unregistered so we need a fallback. We let the handler
1263  // implement this fallback.
1264  return submit([&](handler &CGH) {
1265  CGH.depends_on(DepEvents);
1266  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1267  });
1268  }
1269 
1270  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1272  return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1273  Offset, DepEvents);
1274  }
1275 
1288  template <typename T, typename PropertyListT>
1289  event memcpy(
1290  void *Dest,
1292  size_t NumBytes, size_t Offset, event DepEvent,
1294  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1295  return this->memcpy(Dest, Src, NumBytes, Offset,
1296  std::vector<event>{DepEvent});
1297  }
1298 
1309  template <typename T, typename PropertyListT>
1310  event memcpy(
1311  void *Dest,
1313  size_t NumBytes = sizeof(T), size_t Offset = 0,
1315  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1316  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1317  }
1318 
1332  template <typename T, typename PropertyListT>
1333  event copy(
1334  const std::remove_all_extents_t<T> *Src,
1336  size_t Count, size_t StartIndex, const std::vector<event> &DepEvents,
1338  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1339  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1340  StartIndex * sizeof(std::remove_all_extents_t<T>),
1341  DepEvents);
1342  }
1343 
1357  template <typename T, typename PropertyListT>
1358  event copy(
1359  const std::remove_all_extents_t<T> *Src,
1361  size_t Count, size_t StartIndex, event DepEvent,
1363  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1364  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1365  StartIndex * sizeof(std::remove_all_extents_t<T>),
1366  DepEvent);
1367  }
1368 
1380  template <typename T, typename PropertyListT>
1381  event copy(
1382  const std::remove_all_extents_t<T> *Src,
1384  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1385  size_t StartIndex = 0,
1387  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1388  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1389  StartIndex * sizeof(std::remove_all_extents_t<T>));
1390  }
1391 
1405  template <typename T, typename PropertyListT>
1406  event copy(
1408  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1409  const std::vector<event> &DepEvents,
1411  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1412  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1413  StartIndex * sizeof(std::remove_all_extents_t<T>),
1414  DepEvents);
1415  }
1416 
1430  template <typename T, typename PropertyListT>
1431  event copy(
1433  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1434  event DepEvent,
1436  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1437  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1438  StartIndex * sizeof(std::remove_all_extents_t<T>),
1439  DepEvent);
1440  }
1441 
1453  template <typename T, typename PropertyListT>
1454  event copy(
1456  std::remove_all_extents_t<T> *Dest,
1457  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1458  size_t StartIndex = 0,
1460  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1461  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1462  StartIndex * sizeof(std::remove_all_extents_t<T>));
1463  }
1464 
1480  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1481  return submit(
1482  [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, DestImgDesc); },
1483  CodeLoc);
1484  }
1485 
1506  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1508  sycl::range<3> DestOffset,
1510  sycl::range<3> CopyExtent,
1512  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1513  return submit(
1514  [&](handler &CGH) {
1515  CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset,
1516  DestImgDesc, CopyExtent);
1517  },
1518  CodeLoc);
1519  }
1520 
1536  event DepEvent,
1538  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1539  return submit(
1540  [&](handler &CGH) {
1541  CGH.depends_on(DepEvent);
1542  CGH.ext_oneapi_copy(Src, Dest, DestImgDesc);
1543  },
1544  CodeLoc);
1545  }
1546 
1568  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1570  sycl::range<3> DestOffset,
1572  sycl::range<3> CopyExtent, event DepEvent,
1574  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1575  return submit(
1576  [&](handler &CGH) {
1577  CGH.depends_on(DepEvent);
1578  CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset,
1579  DestImgDesc, CopyExtent);
1580  },
1581  CodeLoc);
1582  }
1583 
1600  const std::vector<event> &DepEvents,
1602  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1603  return submit(
1604  [&](handler &CGH) {
1605  CGH.depends_on(DepEvents);
1606  CGH.ext_oneapi_copy(Src, Dest, DestImgDesc);
1607  },
1608  CodeLoc);
1609  }
1610 
1633  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1635  sycl::range<3> DestOffset,
1637  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1639  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1640  return submit(
1641  [&](handler &CGH) {
1642  CGH.depends_on(DepEvents);
1643  CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset,
1644  DestImgDesc, CopyExtent);
1645  },
1646  CodeLoc);
1647  }
1648 
1663  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1664  return submit(
1665  [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc); },
1666  CodeLoc);
1667  }
1668 
1692  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1693  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1694  sycl::range<3> CopyExtent,
1696  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1697  return submit(
1698  [&](handler &CGH) {
1699  CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset,
1700  DestExtent, CopyExtent);
1701  },
1702  CodeLoc);
1703  }
1704 
1719  event DepEvent,
1721  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1722  return submit(
1723  [&](handler &CGH) {
1724  CGH.depends_on(DepEvent);
1725  CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc);
1726  },
1727  CodeLoc);
1728  }
1729 
1754  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1755  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1756  sycl::range<3> CopyExtent, event DepEvent,
1758  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1759  return submit(
1760  [&](handler &CGH) {
1761  CGH.depends_on(DepEvent);
1762  CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset,
1763  DestExtent, CopyExtent);
1764  },
1765  CodeLoc);
1766  }
1767 
1783  const std::vector<event> &DepEvents,
1785  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1786  return submit(
1787  [&](handler &CGH) {
1788  CGH.depends_on(DepEvents);
1789  CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc);
1790  },
1791  CodeLoc);
1792  }
1793 
1819  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1820  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1821  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1823  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1824  return submit(
1825  [&](handler &CGH) {
1826  CGH.depends_on(DepEvents);
1827  CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset,
1828  DestExtent, CopyExtent);
1829  },
1830  CodeLoc);
1831  }
1832 
1845  void *Src, void *Dest,
1846  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1847  size_t DeviceRowPitch,
1849  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1850  return submit(
1851  [&](handler &CGH) {
1852  CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch);
1853  },
1854  CodeLoc);
1855  }
1856 
1880  void *Src, sycl::range<3> SrcOffset, void *Dest,
1881  sycl::range<3> DestOffset,
1882  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1883  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1884  sycl::range<3> CopyExtent,
1886  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1887  return submit(
1888  [&](handler &CGH) {
1889  CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc,
1890  DeviceRowPitch, HostExtent, CopyExtent);
1891  },
1892  CodeLoc);
1893  }
1894 
1908  void *Src, void *Dest,
1909  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1910  size_t DeviceRowPitch, event DepEvent,
1912  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1913  return submit(
1914  [&](handler &CGH) {
1915  CGH.depends_on(DepEvent);
1916  CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch);
1917  },
1918  CodeLoc);
1919  }
1920 
1945  void *Src, sycl::range<3> SrcOffset, void *Dest,
1946  sycl::range<3> DestOffset,
1947  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1948  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1949  sycl::range<3> CopyExtent, event DepEvent,
1951  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1952  return submit(
1953  [&](handler &CGH) {
1954  CGH.depends_on(DepEvent);
1955  CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc,
1956  DeviceRowPitch, HostExtent, CopyExtent);
1957  },
1958  CodeLoc);
1959  }
1960 
1975  void *Src, void *Dest,
1976  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1977  size_t DeviceRowPitch, const std::vector<event> &DepEvents,
1979  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1980  return submit(
1981  [&](handler &CGH) {
1982  CGH.depends_on(DepEvents);
1983  CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch);
1984  },
1985  CodeLoc);
1986  }
1987 
2013  void *Src, sycl::range<3> SrcOffset, void *Dest,
2014  sycl::range<3> DestOffset,
2015  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
2016  size_t DeviceRowPitch, sycl::range<3> HostExtent,
2017  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
2019  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2020  return submit(
2021  [&](handler &CGH) {
2022  CGH.depends_on(DepEvents);
2023  CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc,
2024  DeviceRowPitch, HostExtent, CopyExtent);
2025  },
2026  CodeLoc);
2027  }
2028 
2037  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2038  return submit(
2039  [&](handler &CGH) {
2040  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
2041  },
2042  CodeLoc);
2043  }
2044 
2053  event DepEvent,
2055  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2056  return submit(
2057  [&](handler &CGH) {
2058  CGH.depends_on(DepEvent);
2059  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
2060  },
2061  CodeLoc);
2062  }
2063 
2073  const std::vector<event> &DepEvents,
2075  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2076  return submit(
2077  [&](handler &CGH) {
2078  CGH.depends_on(DepEvents);
2079  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
2080  },
2081  CodeLoc);
2082  }
2083 
2093  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2094  return submit(
2095  [&](handler &CGH) {
2096  CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle);
2097  },
2098  CodeLoc);
2099  }
2100 
2110  event DepEvent,
2112  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2113  return submit(
2114  [&](handler &CGH) {
2115  CGH.depends_on(DepEvent);
2116  CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle);
2117  },
2118  CodeLoc);
2119  }
2120 
2131  const std::vector<event> &DepEvents,
2133  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2134  return submit(
2135  [&](handler &CGH) {
2136  CGH.depends_on(DepEvents);
2137  CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle);
2138  },
2139  CodeLoc);
2140  }
2141 
2147  template <typename KernelName = detail::auto_name, typename KernelType,
2148  typename PropertiesT>
2149  std::enable_if_t<
2152  PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2154  static_assert(
2155  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2156  void()>::value ||
2157  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2158  void(kernel_handler)>::value),
2159  "sycl::queue.single_task() requires a kernel instead of command group. "
2160  "Use queue.submit() instead");
2161 
2162  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2163  return submit(
2164  [&](handler &CGH) {
2165  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2166  Properties, KernelFunc);
2167  },
2168  CodeLoc);
2169  }
2170 
2175  template <typename KernelName = detail::auto_name, typename KernelType>
2177  _KERNELFUNCPARAM(KernelFunc),
2179  return single_task<KernelName, KernelType>(
2180  ext::oneapi::experimental::empty_properties_t{}, KernelFunc, CodeLoc);
2181  }
2182 
2189  template <typename KernelName = detail::auto_name, typename KernelType,
2190  typename PropertiesT>
2191  std::enable_if_t<
2194  event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2196  static_assert(
2197  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2198  void()>::value ||
2199  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2200  void(kernel_handler)>::value),
2201  "sycl::queue.single_task() requires a kernel instead of command group. "
2202  "Use queue.submit() instead");
2203 
2204  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2205  return submit(
2206  [&](handler &CGH) {
2207  CGH.depends_on(DepEvent);
2208  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2209  Properties, KernelFunc);
2210  },
2211  CodeLoc);
2212  }
2213 
2219  template <typename KernelName = detail::auto_name, typename KernelType>
2221  event DepEvent, _KERNELFUNCPARAM(KernelFunc),
2223  return single_task<KernelName, KernelType>(
2224  DepEvent, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2225  CodeLoc);
2226  }
2227 
2235  template <typename KernelName = detail::auto_name, typename KernelType,
2236  typename PropertiesT>
2237  std::enable_if_t<
2240  const std::vector<event> &DepEvents, PropertiesT Properties,
2241  _KERNELFUNCPARAM(KernelFunc),
2243  static_assert(
2244  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2245  void()>::value ||
2246  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2247  void(kernel_handler)>::value),
2248  "sycl::queue.single_task() requires a kernel instead of command group. "
2249  "Use queue.submit() instead");
2250 
2251  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2252  return submit(
2253  [&](handler &CGH) {
2254  CGH.depends_on(DepEvents);
2255  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2256  Properties, KernelFunc);
2257  },
2258  CodeLoc);
2259  }
2260 
2267  template <typename KernelName = detail::auto_name, typename KernelType>
2269  const std::vector<event> &DepEvents, _KERNELFUNCPARAM(KernelFunc),
2271  return single_task<KernelName, KernelType>(
2272  DepEvents, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2273  CodeLoc);
2274  }
2275 
2282  template <typename KernelName = detail::auto_name, typename... RestT>
2283  event parallel_for(range<1> Range, RestT &&...Rest) {
2284  return parallel_for_impl<KernelName>(Range, Rest...);
2285  }
2286 
2293  template <typename KernelName = detail::auto_name, typename... RestT>
2294  event parallel_for(range<2> Range, RestT &&...Rest) {
2295  return parallel_for_impl<KernelName>(Range, Rest...);
2296  }
2297 
2304  template <typename KernelName = detail::auto_name, typename... RestT>
2305  event parallel_for(range<3> Range, RestT &&...Rest) {
2306  return parallel_for_impl<KernelName>(Range, Rest...);
2307  }
2308 
2316  template <typename KernelName = detail::auto_name, typename... RestT>
2317  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
2318  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2319  }
2320 
2328  template <typename KernelName = detail::auto_name, typename... RestT>
2329  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
2330  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2331  }
2332 
2340  template <typename KernelName = detail::auto_name, typename... RestT>
2341  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
2342  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2343  }
2344 
2353  template <typename KernelName = detail::auto_name, typename... RestT>
2354  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
2355  RestT &&...Rest) {
2356  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2357  }
2358 
2367  template <typename KernelName = detail::auto_name, typename... RestT>
2368  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
2369  RestT &&...Rest) {
2370  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2371  }
2372 
2381  template <typename KernelName = detail::auto_name, typename... RestT>
2382  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
2383  RestT &&...Rest) {
2384  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2385  }
2386 
2387  // While other shortcuts with offsets are able to go through parallel_for(...,
2388  // RestT &&...Rest), those that accept dependency events vector have to be
2389  // overloaded to allow implicit construction from an init-list.
2397  template <typename KernelName = detail::auto_name, typename KernelType,
2398  int Dim>
2399  event parallel_for(range<Dim> Range, id<Dim> WorkItemOffset,
2400  const std::vector<event> &DepEvents,
2401  _KERNELFUNCPARAM(KernelFunc)) {
2402  static_assert(1 <= Dim && Dim <= 3, "Invalid number of dimensions");
2403  return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
2404  KernelFunc);
2405  }
2406 
2414  template <typename KernelName = detail::auto_name, typename KernelType,
2415  int Dims>
2416  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2417  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2418  _KERNELFUNCPARAM(KernelFunc)) {
2419  // Actual code location needs to be captured from KernelInfo object.
2420  const detail::code_location CodeLoc = {};
2421  return submit(
2422  [&](handler &CGH) {
2423  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2424  KernelFunc);
2425  },
2426  CodeLoc);
2427  }
2428 
2437  template <typename KernelName = detail::auto_name, typename KernelType,
2438  int Dims>
2439  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2440  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2441  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
2442  // Actual code location needs to be captured from KernelInfo object.
2443  const detail::code_location CodeLoc = {};
2444  return submit(
2445  [&](handler &CGH) {
2446  CGH.depends_on(DepEvent);
2447  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2448  KernelFunc);
2449  },
2450  CodeLoc);
2451  }
2452 
2462  template <typename KernelName = detail::auto_name, typename KernelType,
2463  int Dims>
2464  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2465  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2466  const std::vector<event> &DepEvents,
2467  _KERNELFUNCPARAM(KernelFunc)) {
2468  // Actual code location needs to be captured from KernelInfo object.
2469  const detail::code_location CodeLoc = {};
2470  return submit(
2471  [&](handler &CGH) {
2472  CGH.depends_on(DepEvents);
2473  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2474  KernelFunc);
2475  },
2476  CodeLoc);
2477  }
2478 
2486  template <typename KernelName = detail::auto_name, int Dims,
2487  typename PropertiesT, typename... RestT>
2488  std::enable_if_t<
2489  detail::AreAllButLastReductions<RestT...>::value &&
2491  event>
2492  parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2493  using KI = sycl::detail::KernelInfo<KernelName>;
2494  constexpr detail::code_location CodeLoc(
2495  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2496  KI::getColumnNumber());
2497  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2498  return submit(
2499  [&](handler &CGH) {
2500  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2501  },
2502  CodeLoc);
2503  }
2504 
2511  template <typename KernelName = detail::auto_name, int Dims,
2512  typename... RestT>
2513  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2514  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
2515  return parallel_for<KernelName>(
2517  }
2518 
2526  template <typename KernelName = detail::auto_name, int Dims,
2527  typename... RestT>
2528  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
2529  using KI = sycl::detail::KernelInfo<KernelName>;
2530  constexpr detail::code_location CodeLoc(
2531  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2532  KI::getColumnNumber());
2533  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2534  return submit(
2535  [&](handler &CGH) {
2536  CGH.depends_on(DepEvent);
2537  CGH.template parallel_for<KernelName>(Range, Rest...);
2538  },
2539  CodeLoc);
2540  }
2541 
2550  template <typename KernelName = detail::auto_name, int Dims,
2551  typename... RestT>
2552  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
2553  RestT &&...Rest) {
2554  using KI = sycl::detail::KernelInfo<KernelName>;
2555  constexpr detail::code_location CodeLoc(
2556  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2557  KI::getColumnNumber());
2558  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2559  return submit(
2560  [&](handler &CGH) {
2561  CGH.depends_on(DepEvents);
2562  CGH.template parallel_for<KernelName>(Range, Rest...);
2563  },
2564  CodeLoc);
2565  }
2566 
2573  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2574  access::placeholder IsPlaceholder, typename DestT>
2575  event copy(
2577  std::shared_ptr<DestT> Dest,
2579  return submit(
2580  [&](handler &CGH) {
2581  CGH.require(Src);
2582  CGH.copy(Src, Dest);
2583  },
2584  CodeLoc);
2585  }
2586 
2593  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2595  event copy(
2596  std::shared_ptr<SrcT> Src,
2599  return submit(
2600  [&](handler &CGH) {
2601  CGH.require(Dest);
2602  CGH.copy(Src, Dest);
2603  },
2604  CodeLoc);
2605  }
2606 
2613  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2614  access::placeholder IsPlaceholder, typename DestT>
2615  event copy(
2618  return submit(
2619  [&](handler &CGH) {
2620  CGH.require(Src);
2621  CGH.copy(Src, Dest);
2622  },
2623  CodeLoc);
2624  }
2625 
2632  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2634  event copy(
2635  const SrcT *Src,
2638  return submit(
2639  [&](handler &CGH) {
2640  CGH.require(Dest);
2641  CGH.copy(Src, Dest);
2642  },
2643  CodeLoc);
2644  }
2645 
2652  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2653  access::placeholder IsSrcPlaceholder, typename DestT, int DestDims,
2654  access_mode DestMode, target DestTgt,
2655  access::placeholder IsDestPlaceholder>
2656  event copy(
2660  return submit(
2661  [&](handler &CGH) {
2662  CGH.require(Src);
2663  CGH.require(Dest);
2664  CGH.copy(Src, Dest);
2665  },
2666  CodeLoc);
2667  }
2668 
2674  template <typename T, int Dims, access_mode Mode, target Tgt,
2679  return submit(
2680  [&](handler &CGH) {
2681  CGH.require(Acc);
2682  CGH.update_host(Acc);
2683  },
2684  CodeLoc);
2685  }
2686 
2693  template <typename T, int Dims, access_mode Mode, target Tgt,
2695  event fill(
2698  return submit(
2699  [&](handler &CGH) {
2700  CGH.require(Dest);
2701  CGH.fill<T>(Dest, Src);
2702  },
2703  CodeLoc);
2704  }
2705 
2712  bool ext_codeplay_supports_fusion() const;
2713 
2714 // Clean KERNELFUNC macros.
2715 #undef _KERNELFUNCPARAM
2716 
2724  Graph,
2726  return submit([&](handler &CGH) { CGH.ext_oneapi_graph(Graph); }, CodeLoc);
2727  }
2728 
2738  Graph,
2739  event DepEvent,
2741  return submit(
2742  [&](handler &CGH) {
2743  CGH.depends_on(DepEvent);
2744  CGH.ext_oneapi_graph(Graph);
2745  },
2746  CodeLoc);
2747  }
2748 
2758  Graph,
2759  const std::vector<event> &DepEvents,
2761  return submit(
2762  [&](handler &CGH) {
2763  CGH.depends_on(DepEvents);
2764  CGH.ext_oneapi_graph(Graph);
2765  },
2766  CodeLoc);
2767  }
2768 
2772  bool is_in_order() const;
2773 
2777  backend get_backend() const noexcept;
2778 
2783  bool ext_oneapi_empty() const;
2784 
2785  pi_native_handle getNative(int32_t &NativeHandleDesc) const;
2786 
2787  event ext_oneapi_get_last_event() const;
2788 
2789  void ext_oneapi_set_external_event(const event &external_event);
2790 
2791 private:
2792  std::shared_ptr<detail::queue_impl> impl;
2793  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
2794 
2795  template <class Obj>
2796  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
2797  template <class T>
2798  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
2799 
2800  template <backend BackendName, class SyclObjectT>
2801  friend auto get_native(const SyclObjectT &Obj)
2803 
2804 #if __SYCL_USE_FALLBACK_ASSERT
2805  friend event detail::submitAssertCapture(queue &, event &, queue *,
2806  const detail::code_location &);
2807 #endif
2808 
2810  event submit_impl(std::function<void(handler &)> CGH,
2811  const detail::code_location &CodeLoc);
2813  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
2814  const detail::code_location &CodeLoc);
2815 
2819  event discard_or_return(const event &Event);
2820 
2821  // Function to postprocess submitted command
2822  // Arguments:
2823  // bool IsKernel - true if the submitted command was kernel, false otherwise
2824  // bool KernelUsesAssert - true if submitted kernel uses assert, only
2825  // meaningful when IsKernel is true
2826  // event &Event - event after which post processing should be executed
2827  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
2828 
2834  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2835  const detail::code_location &CodeLoc,
2836  const SubmitPostProcessF &PostProcess);
2843  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2844  queue secondQueue,
2845  const detail::code_location &CodeLoc,
2846  const SubmitPostProcessF &PostProcess);
2847 
2854  template <typename KernelName, int Dims, typename PropertiesT,
2855  typename... RestT>
2856  std::enable_if_t<
2857  detail::AreAllButLastReductions<RestT...>::value &&
2859  event>
2860  parallel_for_impl(range<Dims> Range, PropertiesT Properties,
2861  RestT &&...Rest) {
2862  using KI = sycl::detail::KernelInfo<KernelName>;
2863  constexpr detail::code_location CodeLoc(
2864  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2865  KI::getColumnNumber());
2866  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2867  return submit(
2868  [&](handler &CGH) {
2869  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2870  },
2871  CodeLoc);
2872  }
2873 
2879  template <typename KernelName, int Dims, typename... RestT>
2880  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2881  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
2882  return parallel_for_impl<KernelName>(
2884  }
2885 
2893  template <typename KernelName, int Dims, typename PropertiesT,
2894  typename... RestT>
2895  std::enable_if_t<
2896  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2897  parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2898  RestT &&...Rest) {
2899  using KI = sycl::detail::KernelInfo<KernelName>;
2900  constexpr detail::code_location CodeLoc(
2901  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2902  KI::getColumnNumber());
2903  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2904  return submit(
2905  [&](handler &CGH) {
2906  CGH.depends_on(DepEvent);
2907  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2908  },
2909  CodeLoc);
2910  }
2911 
2918  template <typename KernelName, int Dims, typename... RestT>
2919  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2920  return parallel_for_impl<KernelName>(
2922  Rest...);
2923  }
2924 
2933  template <typename KernelName, int Dims, typename PropertiesT,
2934  typename... RestT>
2935  std::enable_if_t<
2936  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2937  parallel_for_impl(range<Dims> Range, const std::vector<event> &DepEvents,
2938  PropertiesT Properties, RestT &&...Rest) {
2939  using KI = sycl::detail::KernelInfo<KernelName>;
2940  constexpr detail::code_location CodeLoc(
2941  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2942  KI::getColumnNumber());
2943  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2944  return submit(
2945  [&](handler &CGH) {
2946  CGH.depends_on(DepEvents);
2947  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2948  },
2949  CodeLoc);
2950  }
2951 
2959  template <typename KernelName, int Dims, typename... RestT>
2960  event parallel_for_impl(range<Dims> Range,
2961  const std::vector<event> &DepEvents,
2962  RestT &&...Rest) {
2963  return parallel_for_impl<KernelName>(
2965  Rest...);
2966  }
2967 
2968 #ifndef __INTEL_PREVIEW_BREAKING_CHANGES
2969  buffer<detail::AssertHappened, 1> &getAssertHappenedBuffer();
2970 #endif
2971 
2972  event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
2973  bool IsDeviceImageScope, size_t NumBytes,
2974  size_t Offset,
2975  const std::vector<event> &DepEvents);
2976  event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
2977  bool IsDeviceImageScope, size_t NumBytes,
2978  size_t Offset,
2979  const std::vector<event> &DepEvents);
2980 };
2981 
2982 } // namespace _V1
2983 } // namespace sycl
2984 
2985 namespace std {
2986 template <> struct __SYCL_EXPORT hash<sycl::queue> {
2987  size_t operator()(const sycl::queue &Q) const;
2988 };
2989 } // namespace std
2990 
2991 #if __SYCL_USE_FALLBACK_ASSERT
2992 // Explicitly request format macros
2993 #ifndef __STDC_FORMAT_MACROS
2994 #define __STDC_FORMAT_MACROS 1
2995 #endif
2996 #include <cinttypes>
2997 
2998 namespace sycl {
2999 inline namespace _V1 {
3000 
3001 namespace detail {
3002 #define __SYCL_ASSERT_START 1
3003 
3004 namespace __sycl_service_kernel__ {
3005 class AssertInfoCopier;
3006 } // namespace __sycl_service_kernel__
3007 
3019 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
3020  const detail::code_location &CodeLoc) {
3021  buffer<detail::AssertHappened, 1> Buffer{1};
3022 
3023  event CopierEv, CheckerEv, PostCheckerEv;
3024  auto CopierCGF = [&](handler &CGH) {
3025  CGH.depends_on(Event);
3026 
3027  auto Acc = Buffer.get_access<access::mode::write>(CGH);
3028 
3029  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
3030 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
3031  __devicelib_assert_read(&Acc[0]);
3032 #else
3033  (void)Acc;
3034 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
3035  });
3036  };
3037  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
3038  CGH.depends_on(CopierEv);
3039  using mode = access::mode;
3040  using target = access::target;
3041 
3042  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
3043 
3044  CGH.host_task([=] {
3045  const detail::AssertHappened *AH = &Acc[0];
3046 
3047  // Don't use assert here as msvc will insert reference to __imp__wassert
3048  // which won't be properly resolved in separate compile use-case
3049 #ifndef NDEBUG
3050  if (AH->Flag == __SYCL_ASSERT_START)
3051  throw sycl::exception(
3053  "Internal Error. Invalid value in assert description.");
3054 #endif
3055 
3056  if (AH->Flag) {
3057  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
3058  const char *File = AH->File[0] ? AH->File : "<unknown file>";
3059  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
3060 
3061  fprintf(stderr,
3062  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
3063  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
3064  "Assertion `%s` failed.\n",
3065  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
3066  AH->LID1, AH->LID2, Expr);
3067  fflush(stderr);
3068  abort(); // no need to release memory as it's abort anyway
3069  }
3070  });
3071  };
3072 
3073  if (SecondaryQueue) {
3074  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
3075  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
3076  } else {
3077  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
3078  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
3079  }
3080 
3081  return CheckerEv;
3082 }
3083 #undef __SYCL_ASSERT_START
3084 } // namespace detail
3085 
3086 } // namespace _V1
3087 } // namespace sycl
3088 #endif // __SYCL_USE_FALLBACK_ASSERT
3089 
3090 #undef __SYCL_USE_FALLBACK_ASSERT
The file contains implementations of accessor class.
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:42
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:65
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:462
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1335
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: handler.cpp:1291
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2513
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1524
void ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.hpp:2992
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:942
void ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.hpp:3028
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: handler.cpp:1011
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
Definition: handler.hpp:1858
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2742
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
Definition: handler.hpp:2711
void ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: handler.hpp:2938
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: handler.hpp:2884
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:959
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: handler.cpp:1301
A unique identifier of an item in an index space.
Definition: id.hpp:36
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:119
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2615
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1333
void wait(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:436
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1879
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:526
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:125
event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:913
event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1117
event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1004
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:504
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:816
queue & operator=(const queue &RHS)=default
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:784
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2151
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:685
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:884
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:188
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2354
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2382
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1716
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1690
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2239
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2283
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1944
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const device_selector &DeviceSelector
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
Definition: queue.hpp:2399
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1230
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
Definition: queue.hpp:2595
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:172
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with multiple dependencies.
Definition: queue.hpp:2755
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: queue.hpp:2108
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, both pointed by placeholder accessors.
Definition: queue.hpp:2656
event copy(const T *Src, T *Dest, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:643
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2317
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1597
queue(queue &&RHS)=default
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:2034
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2341
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2305
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc, const detail::code_location &CodeLoc=detail::code_location::current())
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
Definition: queue.hpp:2676
event copy(const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:664
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:2012
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1533
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:2051
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1780
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1454
event prefetch(const void *Ptr, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:763
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1752
event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:942
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:158
__SYCL2020_DEPRECATED("is_host() is deprecated as the host device is no longer supported.") bool is_host() const
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1567
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1310
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1406
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2294
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:133
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2552
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1632
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2528
queue & operator=(queue &&RHS)=default
event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1033
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1974
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:145
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands.
Definition: queue.hpp:2721
event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1091
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: queue.hpp:2090
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const context &SyclContext
Constructs a SYCL queue instance that is associated with the context provided, using the device retur...
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1476
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2220
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:849
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:2071
event single_task(_KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2176
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified data.
Definition: queue.hpp:2695
bool operator==(const queue &RHS) const
Definition: queue.hpp:299
queue(const queue &RHS)=default
Constructs a SYCL queue with an optional async_handler from an OpenCL cl_command_queue.
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1289
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1210
void wait_and_throw(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:450
event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1065
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2268
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1844
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1381
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1170
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with a single dependency.
Definition: queue.hpp:2735
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2514
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1358
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
Definition: queue.hpp:2634
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2575
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1659
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2193
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
Definition: queue.hpp:1907
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2329
event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1145
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
Definition: queue.hpp:1817
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1431
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: queue.hpp:2129
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:382
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: queue.hpp:1505
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1251
event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:973
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2368
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2492
bool operator!=(const queue &RHS) const
Definition: queue.hpp:301
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:37
void defaultAsyncHandler(exception_list Exceptions)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:48
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
@ modifiable
In modifiable state, commands can be added to graph.
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
constexpr device_has_key::value_t< Aspects... > device_has
Definition: properties.hpp:127
static constexpr auto get_property()
decltype(properties{}) empty_properties_t
Definition: properties.hpp:190
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
pointer_t get() const
Definition: multi_ptr.hpp:974
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:136
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
Definition: backend.hpp:87
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
Definition: accessor.hpp:3234
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:107
void prefetch(size_t NumElements) const
Definition: multi_ptr.hpp:1076
Definition: access.hpp:18
static sycl::event fill(sycl::queue q, void *dev_ptr, const T &pattern, size_t count)
Set pattern to the first count elements of type T starting from dev_ptr.
Definition: memory.hpp:172
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:554
uintptr_t pi_native_handle
Definition: pi.h:206
_pi_mem_advice
Definition: pi.h:579
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:63
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Predicate returning true if all template type parameters except the last one are reductions.
Definition: reduction.hpp:77
static constexpr code_location current(const char *fileName=__CODELOC_FILE_NAME, const char *funcName=__CODELOC_FUNCTION, unsigned long lineNo=__CODELOC_LINE, unsigned long columnNo=__CODELOC_COLUMN) noexcept
Definition: common.hpp:68
A struct to describe the properties of an image.