DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, access...
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aspects.hpp> // for aspect
14 #include <sycl/async_handler.hpp> // for async_handler
15 #include <sycl/backend_types.hpp> // for backend, backe...
16 #include <sycl/buffer.hpp> // for buffer
17 #include <sycl/context.hpp> // for context
18 #include <sycl/detail/assert_happened.hpp> // for AssertHappened
19 #include <sycl/detail/cg_types.hpp> // for check_fn_signa...
20 #include <sycl/detail/common.hpp> // for code_location
21 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEP...
22 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
23 #include <sycl/detail/info_desc_helpers.hpp> // for is_queue_info_...
24 #include <sycl/detail/kernel_desc.hpp> // for KernelInfo
25 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
26 #include <sycl/device.hpp> // for device
27 #include <sycl/device_selector.hpp> // for device_selector
28 #include <sycl/event.hpp> // for event
29 #include <sycl/exception.hpp> // for make_error_code
30 #include <sycl/exception_list.hpp> // for defaultAsyncHa...
31 #include <sycl/ext/oneapi/device_global/device_global.hpp> // for device_global
32 #include <sycl/ext/oneapi/device_global/properties.hpp> // for device_image_s...
33 #include <sycl/ext/oneapi/experimental/graph.hpp> // for command_graph...
34 #include <sycl/ext/oneapi/properties/properties.hpp> // for empty_properti...
35 #include <sycl/handler.hpp> // for handler, isDev...
36 #include <sycl/id.hpp> // for id
37 #include <sycl/kernel.hpp> // for auto_name
38 #include <sycl/kernel_handler.hpp> // for kernel_handler
39 #include <sycl/nd_range.hpp> // for nd_range
40 #include <sycl/property_list.hpp> // for property_list
41 #include <sycl/range.hpp> // for range
42 
43 #include <cstddef> // for size_t
44 #include <functional> // for function
45 #include <memory> // for shared_ptr, hash
46 #include <stdint.h> // for int32_t
47 #include <tuple> // for tuple
48 #include <type_traits> // for remove_all_ext...
49 #include <variant> // for hash
50 #include <vector> // for vector
51 
52 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
53 // Here we simplify the KernelFunc param is simplified to be
54 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
55 // these macros are #undef immediately.
56 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
57 // or const KernelType &KernelFunc
58 #ifdef __SYCL_NONCONST_FUNCTOR__
59 #define _KERNELFUNCPARAM(a) KernelType a
60 #else
61 #define _KERNELFUNCPARAM(a) const KernelType &a
62 #endif
63 
64 namespace sycl {
65 inline namespace _V1 {
66 
67 // Forward declaration
68 class context;
69 class device;
70 class event;
71 class queue;
72 
73 template <backend BackendName, class SyclObjectT>
74 auto get_native(const SyclObjectT &Obj)
75  -> backend_return_t<BackendName, SyclObjectT>;
76 
77 namespace detail {
78 class queue_impl;
79 
80 #if __SYCL_USE_FALLBACK_ASSERT
81 inline event submitAssertCapture(queue &, event &, queue *,
82  const detail::code_location &);
83 #endif
84 } // namespace detail
85 
86 namespace ext ::oneapi ::experimental {
87 // State of a queue with regards to graph recording,
88 // returned by info::queue::state
90 struct image_descriptor;
91 
92 namespace detail {
93 template <typename CommandGroupFunc>
94 void submit_impl(queue &Q, CommandGroupFunc &&CGF,
95  const sycl::detail::code_location &CodeLoc);
96 } // namespace detail
97 } // namespace ext::oneapi::experimental
98 
110 class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
111 public:
116  explicit queue(const property_list &PropList = {})
118 
124  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
125  : queue(default_selector_v, AsyncHandler, PropList) {}
126 
133  template <typename DeviceSelector,
134  typename =
135  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
136  explicit queue(const DeviceSelector &deviceSelector,
137  const async_handler &AsyncHandler,
138  const property_list &PropList = {})
139  : queue(detail::select_device(deviceSelector), AsyncHandler, PropList) {}
140 
146  template <typename DeviceSelector,
147  typename =
148  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
149  explicit queue(const DeviceSelector &deviceSelector,
150  const property_list &PropList = {})
151  : queue(detail::select_device(deviceSelector),
152  detail::defaultAsyncHandler, PropList) {}
153 
160  template <typename DeviceSelector,
161  typename =
162  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
163  explicit queue(const context &syclContext,
164  const DeviceSelector &deviceSelector,
165  const property_list &propList = {})
166  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
167  propList) {}
168 
176  template <typename DeviceSelector,
177  typename =
178  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
179  explicit queue(const context &syclContext,
180  const DeviceSelector &deviceSelector,
181  const async_handler &AsyncHandler,
182  const property_list &propList = {})
183  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
184  AsyncHandler, propList) {}
185 
191  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
192  "use SYCL 2020 device selectors instead.")
193  queue(const device_selector &DeviceSelector,
194  const property_list &PropList = {})
195  : queue(DeviceSelector.select_device(), detail::defaultAsyncHandler,
196  PropList) {}
197 
204  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
205  "use SYCL 2020 device selectors instead.")
206  queue(const device_selector &DeviceSelector,
207  const async_handler &AsyncHandler, const property_list &PropList = {})
208  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
209 
214  explicit queue(const device &SyclDevice, const property_list &PropList = {})
215  : queue(SyclDevice, detail::defaultAsyncHandler, PropList) {}
216 
223  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
224  const property_list &PropList = {});
225 
232  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
233  "use SYCL 2020 device selectors instead.")
234  queue(const context &SyclContext, const device_selector &DeviceSelector,
235  const property_list &PropList = {});
236 
245  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
246  "use SYCL 2020 device selectors instead.")
247  queue(const context &SyclContext, const device_selector &DeviceSelector,
248  const async_handler &AsyncHandler, const property_list &PropList = {});
249 
256  queue(const context &SyclContext, const device &SyclDevice,
257  const property_list &PropList = {});
258 
266  queue(const context &SyclContext, const device &SyclDevice,
267  const async_handler &AsyncHandler, const property_list &PropList = {});
268 
277 #ifdef __SYCL_INTERNAL_API
278  queue(cl_command_queue ClQueue, const context &SyclContext,
279  const async_handler &AsyncHandler = {});
280 #endif
281 
282  queue(const queue &RHS) = default;
283 
284  queue(queue &&RHS) = default;
285 
286  queue &operator=(const queue &RHS) = default;
287 
288  queue &operator=(queue &&RHS) = default;
289 
290  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
291 
292  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
293 
296 #ifdef __SYCL_INTERNAL_API
297  cl_command_queue get() const;
298 #endif
299 
301  context get_context() const;
302 
304  device get_device() const;
305 
307  ext::oneapi::experimental::queue_state ext_oneapi_get_state() const;
308 
312  ext_oneapi_get_graph() const;
313 
317  template <typename Param>
318  typename detail::is_queue_info_desc<Param>::return_type get_info() const;
319 
323  template <typename Param>
325  get_backend_info() const;
326 
327 private:
328  // A shorthand for `get_device().has()' which is expected to be a bit quicker
329  // than the long version
330  bool device_has(aspect Aspect) const;
331 
332 public:
339  template <typename T>
340  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
341  T CGF,
343  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
344 #if __SYCL_USE_FALLBACK_ASSERT
345  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
346  event &E) {
347  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
348  KernelUsesAssert && !device_has(aspect::accelerator)) {
349  // __devicelib_assert_fail isn't supported by Device-side Runtime
350  // Linking against fallback impl of __devicelib_assert_fail is
351  // performed by program manager class
352  // Fallback assert isn't supported for FPGA
353  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc);
354  }
355  };
356 
357  return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
358 #else
359  return submit_impl(CGF, CodeLoc);
360 #endif // __SYCL_USE_FALLBACK_ASSERT
361  }
362 
374  template <typename T>
375  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
376  T CGF, queue &SecondaryQueue,
378  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
379 #if __SYCL_USE_FALLBACK_ASSERT
380  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
381  bool IsKernel, bool KernelUsesAssert, event &E) {
382  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
383  KernelUsesAssert && !device_has(aspect::accelerator)) {
384  // Only secondary queues on devices need to be added to the assert
385  // capture.
386  // __devicelib_assert_fail isn't supported by Device-side Runtime
387  // Linking against fallback impl of __devicelib_assert_fail is
388  // performed by program manager class
389  // Fallback assert isn't supported for FPGA
390  submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc);
391  }
392  };
393 
394  return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
395  PostProcess);
396 #else
397  return submit_impl(CGF, SecondaryQueue, CodeLoc);
398 #endif // __SYCL_USE_FALLBACK_ASSERT
399  }
400 
408  event ext_oneapi_submit_barrier(
410 
420  event ext_oneapi_submit_barrier(
421  const std::vector<event> &WaitList,
423 
429  void wait(
431  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
432  wait_proxy(CodeLoc);
433  }
434 
445  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
446  wait_and_throw_proxy(CodeLoc);
447  }
448 
451  void wait_proxy(const detail::code_location &CodeLoc);
454  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
455 
461  void throw_asynchronous();
462 
465  template <typename PropertyT> bool has_property() const noexcept {
466  return getPropList().template has_property<PropertyT>();
467  }
468 
472  template <typename PropertyT> PropertyT get_property() const {
473  return getPropList().template get_property<PropertyT>();
474  }
475 
483  template <typename T>
484  event fill(
485  void *Ptr, const T &Pattern, size_t Count,
487  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
488  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); },
489  CodeLoc);
490  }
491 
500  template <typename T>
501  event fill(
502  void *Ptr, const T &Pattern, size_t Count, event DepEvent,
504  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
505  return submit(
506  [&](handler &CGH) {
507  CGH.depends_on(DepEvent);
508  CGH.fill<T>(Ptr, Pattern, Count);
509  },
510  CodeLoc);
511  }
512 
522  template <typename T>
523  event fill(
524  void *Ptr, const T &Pattern, size_t Count,
525  const std::vector<event> &DepEvents,
527  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
528  return submit(
529  [&](handler &CGH) {
530  CGH.depends_on(DepEvents);
531  CGH.fill<T>(Ptr, Pattern, Count);
532  },
533  CodeLoc);
534  }
535 
545  event memset(
546  void *Ptr, int Value, size_t Count,
548 
559  event memset(
560  void *Ptr, int Value, size_t Count, event DepEvent,
562 
574  event memset(
575  void *Ptr, int Value, size_t Count, const std::vector<event> &DepEvents,
577 
589  event memcpy(
590  void *Dest, const void *Src, size_t Count,
592 
605  event memcpy(
606  void *Dest, const void *Src, size_t Count, event DepEvent,
608 
622  event memcpy(
623  void *Dest, const void *Src, size_t Count,
624  const std::vector<event> &DepEvents,
626 
639  template <typename T>
640  event copy(
641  const T *Src, T *Dest, size_t Count,
643  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
644  return this->memcpy(Dest, Src, Count * sizeof(T));
645  }
646 
660  template <typename T>
661  event copy(
662  const T *Src, T *Dest, size_t Count, event DepEvent,
664  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
665  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
666  }
667 
681  template <typename T>
682  event copy(
683  const T *Src, T *Dest, size_t Count, const std::vector<event> &DepEvents,
685  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
686  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
687  }
688 
696  event mem_advise(
697  const void *Ptr, size_t Length, int Advice,
699 
708  event mem_advise(
709  const void *Ptr, size_t Length, int Advice, event DepEvent,
711 
721  event mem_advise(
722  const void *Ptr, size_t Length, int Advice,
723  const std::vector<event> &DepEvents,
725 
733  event prefetch(
734  const void *Ptr, size_t Count,
736  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
737  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc);
738  }
739 
748  event prefetch(
749  const void *Ptr, size_t Count, event DepEvent,
751  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
752  return submit(
753  [=](handler &CGH) {
754  CGH.depends_on(DepEvent);
755  CGH.prefetch(Ptr, Count);
756  },
757  CodeLoc);
758  }
759 
769  event prefetch(
770  const void *Ptr, size_t Count, const std::vector<event> &DepEvents,
772  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
773  return submit(
774  [=](handler &CGH) {
775  CGH.depends_on(DepEvents);
776  CGH.prefetch(Ptr, Count);
777  },
778  CodeLoc);
779  }
780 
799  template <typename T = unsigned char,
800  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
802  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
803  size_t Width, size_t Height,
805  return submit(
806  [=](handler &CGH) {
807  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width,
808  Height);
809  },
810  CodeLoc);
811  }
812 
832  template <typename T = unsigned char,
833  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
834  event ext_oneapi_memcpy2d(
835  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
836  size_t Width, size_t Height, event DepEvent,
838 
859  template <typename T = unsigned char,
860  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
861  event ext_oneapi_memcpy2d(
862  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
863  size_t Width, size_t Height, const std::vector<event> &DepEvents,
865 
881  template <typename T>
882  event ext_oneapi_copy2d(
883  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
884  size_t Height,
886 
903  template <typename T>
904  event ext_oneapi_copy2d(
905  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
906  size_t Height, event DepEvent,
908 
926  template <typename T>
927  event ext_oneapi_copy2d(
928  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
929  size_t Height, const std::vector<event> &DepEvents,
931 
948  template <typename T = unsigned char,
949  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
950  event ext_oneapi_memset2d(
951  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
953 
971  template <typename T = unsigned char,
972  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
973  event ext_oneapi_memset2d(
974  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
975  event DepEvent,
977 
996  template <typename T = unsigned char,
997  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
998  event ext_oneapi_memset2d(
999  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
1000  const std::vector<event> &DepEvents,
1002 
1016  template <typename T>
1017  event ext_oneapi_fill2d(
1018  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1019  size_t Height,
1021 
1036  template <typename T>
1037  event ext_oneapi_fill2d(
1038  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1039  size_t Height, event DepEvent,
1041 
1057  template <typename T>
1058  event ext_oneapi_fill2d(
1059  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1060  size_t Height, const std::vector<event> &DepEvents,
1062 
1075  template <typename T, typename PropertyListT>
1076  event memcpy(
1078  const void *Src, size_t NumBytes, size_t Offset,
1079  const std::vector<event> &DepEvents,
1081  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1082  if (sizeof(T) < Offset + NumBytes)
1084  "Copy to device_global is out of bounds.");
1085 
1086  if (!detail::isDeviceGlobalUsedInKernel(&Dest)) {
1087  // device_global is unregistered so we need a fallback. We let the handler
1088  // implement this fallback.
1089  return submit(
1090  [&](handler &CGH) {
1091  CGH.depends_on(DepEvents);
1092  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1093  },
1094  CodeLoc);
1095  }
1096 
1097  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1099  return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1100  Offset, DepEvents);
1101  }
1102 
1115  template <typename T, typename PropertyListT>
1116  event memcpy(
1118  const void *Src, size_t NumBytes, size_t Offset, event DepEvent,
1120  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1121  return this->memcpy(Dest, Src, NumBytes, Offset,
1122  std::vector<event>{DepEvent});
1123  }
1124 
1135  template <typename T, typename PropertyListT>
1136  event memcpy(
1138  const void *Src, size_t NumBytes = sizeof(T), size_t Offset = 0,
1140  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1141  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1142  }
1143 
1156  template <typename T, typename PropertyListT>
1157  event memcpy(
1158  void *Dest,
1160  size_t NumBytes, size_t Offset, const std::vector<event> &DepEvents,
1162  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1163  if (sizeof(T) < Offset + NumBytes)
1165  "Copy from device_global is out of bounds.");
1166 
1168  // device_global is unregistered so we need a fallback. We let the handler
1169  // implement this fallback.
1170  return submit([&](handler &CGH) {
1171  CGH.depends_on(DepEvents);
1172  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1173  });
1174  }
1175 
1176  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1178  return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1179  Offset, DepEvents);
1180  }
1181 
1194  template <typename T, typename PropertyListT>
1195  event memcpy(
1196  void *Dest,
1198  size_t NumBytes, size_t Offset, event DepEvent,
1200  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1201  return this->memcpy(Dest, Src, NumBytes, Offset,
1202  std::vector<event>{DepEvent});
1203  }
1204 
1215  template <typename T, typename PropertyListT>
1216  event memcpy(
1217  void *Dest,
1219  size_t NumBytes = sizeof(T), size_t Offset = 0,
1221  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1222  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1223  }
1224 
1238  template <typename T, typename PropertyListT>
1239  event copy(
1240  const std::remove_all_extents_t<T> *Src,
1242  size_t Count, size_t StartIndex, const std::vector<event> &DepEvents,
1244  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1245  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1246  StartIndex * sizeof(std::remove_all_extents_t<T>),
1247  DepEvents);
1248  }
1249 
1263  template <typename T, typename PropertyListT>
1264  event copy(
1265  const std::remove_all_extents_t<T> *Src,
1267  size_t Count, size_t StartIndex, event DepEvent,
1269  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1270  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1271  StartIndex * sizeof(std::remove_all_extents_t<T>),
1272  DepEvent);
1273  }
1274 
1286  template <typename T, typename PropertyListT>
1287  event copy(
1288  const std::remove_all_extents_t<T> *Src,
1290  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1291  size_t StartIndex = 0,
1293  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1294  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1295  StartIndex * sizeof(std::remove_all_extents_t<T>));
1296  }
1297 
1311  template <typename T, typename PropertyListT>
1312  event copy(
1314  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1315  const std::vector<event> &DepEvents,
1317  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1318  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1319  StartIndex * sizeof(std::remove_all_extents_t<T>),
1320  DepEvents);
1321  }
1322 
1336  template <typename T, typename PropertyListT>
1337  event copy(
1339  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1340  event DepEvent,
1342  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1343  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1344  StartIndex * sizeof(std::remove_all_extents_t<T>),
1345  DepEvent);
1346  }
1347 
1359  template <typename T, typename PropertyListT>
1360  event copy(
1362  std::remove_all_extents_t<T> *Dest,
1363  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1364  size_t StartIndex = 0,
1366  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1367  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1368  StartIndex * sizeof(std::remove_all_extents_t<T>));
1369  }
1370 
1382  event ext_oneapi_copy(
1383  const void *Src, ext::oneapi::experimental::image_mem_handle Dest,
1386 
1406  event ext_oneapi_copy(
1407  const void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1409  sycl::range<3> DestOffset,
1411  sycl::range<3> CopyExtent,
1413 
1426  event ext_oneapi_copy(
1427  const void *Src, ext::oneapi::experimental::image_mem_handle Dest,
1429  event DepEvent,
1431 
1452  event ext_oneapi_copy(
1453  const void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1455  sycl::range<3> DestOffset,
1457  sycl::range<3> CopyExtent, event DepEvent,
1459 
1473  event ext_oneapi_copy(
1474  const void *Src, ext::oneapi::experimental::image_mem_handle Dest,
1476  const std::vector<event> &DepEvents,
1478 
1500  event ext_oneapi_copy(
1501  const void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1503  sycl::range<3> DestOffset,
1505  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1507 
1518  event ext_oneapi_copy(
1519  const ext::oneapi::experimental::image_mem_handle Src, void *Dest,
1522 
1544  event ext_oneapi_copy(
1546  sycl::range<3> SrcOffset,
1547  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1548  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1549  sycl::range<3> CopyExtent,
1551 
1563  event ext_oneapi_copy(
1564  const ext::oneapi::experimental::image_mem_handle Src, void *Dest,
1566  event DepEvent,
1568 
1591  event ext_oneapi_copy(
1593  sycl::range<3> SrcOffset,
1594  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1595  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1596  sycl::range<3> CopyExtent, event DepEvent,
1598 
1611  event ext_oneapi_copy(
1612  const ext::oneapi::experimental::image_mem_handle Src, void *Dest,
1614  const std::vector<event> &DepEvents,
1616 
1640  event ext_oneapi_copy(
1642  sycl::range<3> SrcOffset,
1643  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1644  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1645  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1647 
1659  event ext_oneapi_copy(
1660  const void *Src, void *Dest,
1661  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1662  size_t DeviceRowPitch,
1664 
1687  event ext_oneapi_copy(
1688  const void *Src, sycl::range<3> SrcOffset, void *Dest,
1689  sycl::range<3> DestOffset,
1690  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1691  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1692  sycl::range<3> CopyExtent,
1694 
1707  event ext_oneapi_copy(
1708  const void *Src, void *Dest,
1709  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1710  size_t DeviceRowPitch, event DepEvent,
1712 
1722  event ext_oneapi_copy(
1726  event DepEvent,
1728 
1739  event ext_oneapi_copy(
1743  const std::vector<event> &DepEvents,
1745 
1754  event ext_oneapi_copy(
1759 
1783  event ext_oneapi_copy(
1784  const void *Src, sycl::range<3> SrcOffset, void *Dest,
1785  sycl::range<3> DestOffset,
1786  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1787  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1788  sycl::range<3> CopyExtent, event DepEvent,
1790 
1804  event ext_oneapi_copy(
1805  const void *Src, void *Dest,
1806  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1807  size_t DeviceRowPitch, const std::vector<event> &DepEvents,
1809 
1834  event ext_oneapi_copy(
1835  const void *Src, sycl::range<3> SrcOffset, void *Dest,
1836  sycl::range<3> DestOffset,
1837  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1838  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1839  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1841 
1851  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1852  return submit(
1853  [&](handler &CGH) {
1854  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
1855  },
1856  CodeLoc);
1857  }
1858 
1866  event ext_oneapi_wait_external_semaphore(
1868  event DepEvent,
1870 
1879  event ext_oneapi_wait_external_semaphore(
1881  const std::vector<event> &DepEvents,
1883 
1892  event ext_oneapi_wait_external_semaphore(
1894  uint64_t WaitValue,
1896 
1906  event ext_oneapi_wait_external_semaphore(
1908  uint64_t WaitValue, event DepEvent,
1910 
1921  event ext_oneapi_wait_external_semaphore(
1923  uint64_t WaitValue, const std::vector<event> &DepEvents,
1925 
1933  event ext_oneapi_signal_external_semaphore(
1936 
1945  event ext_oneapi_signal_external_semaphore(
1947  event DepEvent,
1949 
1959  event ext_oneapi_signal_external_semaphore(
1961  const std::vector<event> &DepEvents,
1963 
1973  event ext_oneapi_signal_external_semaphore(
1975  uint64_t SignalValue,
1977 
1988  event ext_oneapi_signal_external_semaphore(
1990  uint64_t SignalValue, event DepEvent,
1992 
2004  event ext_oneapi_signal_external_semaphore(
2006  uint64_t SignalValue, const std::vector<event> &DepEvents,
2008 
2014  template <typename KernelName = detail::auto_name, typename KernelType,
2015  typename PropertiesT>
2016  std::enable_if_t<
2019  PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2021  static_assert(
2022  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2023  void()>::value ||
2024  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2025  void(kernel_handler)>::value),
2026  "sycl::queue.single_task() requires a kernel instead of command group. "
2027  "Use queue.submit() instead");
2028 
2029  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2030  return submit(
2031  [&](handler &CGH) {
2032  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2033  Properties, KernelFunc);
2034  },
2035  CodeLoc);
2036  }
2037 
2042  template <typename KernelName = detail::auto_name, typename KernelType>
2044  _KERNELFUNCPARAM(KernelFunc),
2046  return single_task<KernelName, KernelType>(
2047  ext::oneapi::experimental::empty_properties_t{}, KernelFunc, CodeLoc);
2048  }
2049 
2056  template <typename KernelName = detail::auto_name, typename KernelType,
2057  typename PropertiesT>
2058  std::enable_if_t<
2061  event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2063  static_assert(
2064  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2065  void()>::value ||
2066  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2067  void(kernel_handler)>::value),
2068  "sycl::queue.single_task() requires a kernel instead of command group. "
2069  "Use queue.submit() instead");
2070 
2071  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2072  return submit(
2073  [&](handler &CGH) {
2074  CGH.depends_on(DepEvent);
2075  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2076  Properties, KernelFunc);
2077  },
2078  CodeLoc);
2079  }
2080 
2086  template <typename KernelName = detail::auto_name, typename KernelType>
2088  event DepEvent, _KERNELFUNCPARAM(KernelFunc),
2090  return single_task<KernelName, KernelType>(
2091  DepEvent, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2092  CodeLoc);
2093  }
2094 
2102  template <typename KernelName = detail::auto_name, typename KernelType,
2103  typename PropertiesT>
2104  std::enable_if_t<
2107  const std::vector<event> &DepEvents, PropertiesT Properties,
2108  _KERNELFUNCPARAM(KernelFunc),
2110  static_assert(
2111  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2112  void()>::value ||
2113  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2114  void(kernel_handler)>::value),
2115  "sycl::queue.single_task() requires a kernel instead of command group. "
2116  "Use queue.submit() instead");
2117 
2118  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2119  return submit(
2120  [&](handler &CGH) {
2121  CGH.depends_on(DepEvents);
2122  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2123  Properties, KernelFunc);
2124  },
2125  CodeLoc);
2126  }
2127 
2134  template <typename KernelName = detail::auto_name, typename KernelType>
2136  const std::vector<event> &DepEvents, _KERNELFUNCPARAM(KernelFunc),
2138  return single_task<KernelName, KernelType>(
2139  DepEvents, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2140  CodeLoc);
2141  }
2142 
2149  template <typename KernelName = detail::auto_name, typename... RestT>
2150  event parallel_for(range<1> Range, RestT &&...Rest) {
2151  return parallel_for_impl<KernelName>(Range, Rest...);
2152  }
2153 
2160  template <typename KernelName = detail::auto_name, typename... RestT>
2161  event parallel_for(range<2> Range, RestT &&...Rest) {
2162  return parallel_for_impl<KernelName>(Range, Rest...);
2163  }
2164 
2171  template <typename KernelName = detail::auto_name, typename... RestT>
2172  event parallel_for(range<3> Range, RestT &&...Rest) {
2173  return parallel_for_impl<KernelName>(Range, Rest...);
2174  }
2175 
2183  template <typename KernelName = detail::auto_name, typename... RestT>
2184  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
2185  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2186  }
2187 
2195  template <typename KernelName = detail::auto_name, typename... RestT>
2196  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
2197  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2198  }
2199 
2207  template <typename KernelName = detail::auto_name, typename... RestT>
2208  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
2209  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2210  }
2211 
2220  template <typename KernelName = detail::auto_name, typename... RestT>
2221  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
2222  RestT &&...Rest) {
2223  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2224  }
2225 
2234  template <typename KernelName = detail::auto_name, typename... RestT>
2235  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
2236  RestT &&...Rest) {
2237  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2238  }
2239 
2248  template <typename KernelName = detail::auto_name, typename... RestT>
2249  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
2250  RestT &&...Rest) {
2251  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2252  }
2253 
2254  // While other shortcuts with offsets are able to go through parallel_for(...,
2255  // RestT &&...Rest), those that accept dependency events vector have to be
2256  // overloaded to allow implicit construction from an init-list.
2264  template <typename KernelName = detail::auto_name, typename KernelType,
2265  int Dim>
2266  event parallel_for(range<Dim> Range, id<Dim> WorkItemOffset,
2267  const std::vector<event> &DepEvents,
2268  _KERNELFUNCPARAM(KernelFunc)) {
2269  static_assert(1 <= Dim && Dim <= 3, "Invalid number of dimensions");
2270  return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
2271  KernelFunc);
2272  }
2273 
2281  template <typename KernelName = detail::auto_name, typename KernelType,
2282  int Dims>
2283  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2284  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2285  _KERNELFUNCPARAM(KernelFunc)) {
2286  // Actual code location needs to be captured from KernelInfo object.
2287  const detail::code_location CodeLoc = {};
2288  return submit(
2289  [&](handler &CGH) {
2290  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2291  KernelFunc);
2292  },
2293  CodeLoc);
2294  }
2295 
2304  template <typename KernelName = detail::auto_name, typename KernelType,
2305  int Dims>
2306  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2307  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2308  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
2309  // Actual code location needs to be captured from KernelInfo object.
2310  const detail::code_location CodeLoc = {};
2311  return submit(
2312  [&](handler &CGH) {
2313  CGH.depends_on(DepEvent);
2314  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2315  KernelFunc);
2316  },
2317  CodeLoc);
2318  }
2319 
2329  template <typename KernelName = detail::auto_name, typename KernelType,
2330  int Dims>
2331  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2332  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2333  const std::vector<event> &DepEvents,
2334  _KERNELFUNCPARAM(KernelFunc)) {
2335  // Actual code location needs to be captured from KernelInfo object.
2336  const detail::code_location CodeLoc = {};
2337  return submit(
2338  [&](handler &CGH) {
2339  CGH.depends_on(DepEvents);
2340  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2341  KernelFunc);
2342  },
2343  CodeLoc);
2344  }
2345 
2353  template <typename KernelName = detail::auto_name, int Dims,
2354  typename PropertiesT, typename... RestT>
2355  std::enable_if_t<
2356  detail::AreAllButLastReductions<RestT...>::value &&
2358  event>
2359  parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2360  using KI = sycl::detail::KernelInfo<KernelName>;
2361  constexpr detail::code_location CodeLoc(
2362  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2363  KI::getColumnNumber());
2364  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2365  return submit(
2366  [&](handler &CGH) {
2367  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2368  },
2369  CodeLoc);
2370  }
2371 
2378  template <typename KernelName = detail::auto_name, int Dims,
2379  typename... RestT>
2380  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2381  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
2382  return parallel_for<KernelName>(
2384  }
2385 
2393  template <typename KernelName = detail::auto_name, int Dims,
2394  typename... RestT>
2395  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
2396  using KI = sycl::detail::KernelInfo<KernelName>;
2397  constexpr detail::code_location CodeLoc(
2398  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2399  KI::getColumnNumber());
2400  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2401  return submit(
2402  [&](handler &CGH) {
2403  CGH.depends_on(DepEvent);
2404  CGH.template parallel_for<KernelName>(Range, Rest...);
2405  },
2406  CodeLoc);
2407  }
2408 
2417  template <typename KernelName = detail::auto_name, int Dims,
2418  typename... RestT>
2419  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
2420  RestT &&...Rest) {
2421  using KI = sycl::detail::KernelInfo<KernelName>;
2422  constexpr detail::code_location CodeLoc(
2423  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2424  KI::getColumnNumber());
2425  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2426  return submit(
2427  [&](handler &CGH) {
2428  CGH.depends_on(DepEvents);
2429  CGH.template parallel_for<KernelName>(Range, Rest...);
2430  },
2431  CodeLoc);
2432  }
2433 
2440  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2441  access::placeholder IsPlaceholder, typename DestT>
2442  event copy(
2444  std::shared_ptr<DestT> Dest,
2446  return submit(
2447  [&](handler &CGH) {
2448  CGH.require(Src);
2449  CGH.copy(Src, Dest);
2450  },
2451  CodeLoc);
2452  }
2453 
2460  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2462  event copy(
2463  std::shared_ptr<SrcT> Src,
2466  return submit(
2467  [&](handler &CGH) {
2468  CGH.require(Dest);
2469  CGH.copy(Src, Dest);
2470  },
2471  CodeLoc);
2472  }
2473 
2480  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2481  access::placeholder IsPlaceholder, typename DestT>
2482  event copy(
2485  return submit(
2486  [&](handler &CGH) {
2487  CGH.require(Src);
2488  CGH.copy(Src, Dest);
2489  },
2490  CodeLoc);
2491  }
2492 
2499  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2501  event copy(
2502  const SrcT *Src,
2505  return submit(
2506  [&](handler &CGH) {
2507  CGH.require(Dest);
2508  CGH.copy(Src, Dest);
2509  },
2510  CodeLoc);
2511  }
2512 
2519  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2520  access::placeholder IsSrcPlaceholder, typename DestT, int DestDims,
2521  access_mode DestMode, target DestTgt,
2522  access::placeholder IsDestPlaceholder>
2523  event copy(
2527  return submit(
2528  [&](handler &CGH) {
2529  CGH.require(Src);
2530  CGH.require(Dest);
2531  CGH.copy(Src, Dest);
2532  },
2533  CodeLoc);
2534  }
2535 
2541  template <typename T, int Dims, access_mode Mode, target Tgt,
2546  return submit(
2547  [&](handler &CGH) {
2548  CGH.require(Acc);
2549  CGH.update_host(Acc);
2550  },
2551  CodeLoc);
2552  }
2553 
2560  template <typename T, int Dims, access_mode Mode, target Tgt,
2562  event fill(
2565  return submit(
2566  [&](handler &CGH) {
2567  CGH.require(Dest);
2568  CGH.fill<T>(Dest, Src);
2569  },
2570  CodeLoc);
2571  }
2572 
2579  bool ext_codeplay_supports_fusion() const;
2580 
2581 // Clean KERNELFUNC macros.
2582 #undef _KERNELFUNCPARAM
2583 
2591  Graph,
2593  return submit([&](handler &CGH) { CGH.ext_oneapi_graph(Graph); }, CodeLoc);
2594  }
2595 
2605  Graph,
2606  event DepEvent,
2608  return submit(
2609  [&](handler &CGH) {
2610  CGH.depends_on(DepEvent);
2611  CGH.ext_oneapi_graph(Graph);
2612  },
2613  CodeLoc);
2614  }
2615 
2625  Graph,
2626  const std::vector<event> &DepEvents,
2628  return submit(
2629  [&](handler &CGH) {
2630  CGH.depends_on(DepEvents);
2631  CGH.ext_oneapi_graph(Graph);
2632  },
2633  CodeLoc);
2634  }
2635 
2639  void ext_oneapi_prod();
2640 
2644  bool is_in_order() const;
2645 
2649  backend get_backend() const noexcept;
2650 
2655  bool ext_oneapi_empty() const;
2656 
2657  pi_native_handle getNative(int32_t &NativeHandleDesc) const;
2658 
2659  event ext_oneapi_get_last_event() const;
2660 
2661  void ext_oneapi_set_external_event(const event &external_event);
2662 
2663 private:
2664  std::shared_ptr<detail::queue_impl> impl;
2665  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
2666 
2667  template <class Obj>
2668  friend const decltype(Obj::impl)& detail::getSyclObjImpl(const Obj &SyclObject);
2669  template <class T>
2670  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
2671 
2672  template <backend BackendName, class SyclObjectT>
2673  friend auto get_native(const SyclObjectT &Obj)
2675 
2676 #if __SYCL_USE_FALLBACK_ASSERT
2677  friend event detail::submitAssertCapture(queue &, event &, queue *,
2678  const detail::code_location &);
2679 #endif
2680 
2681  template <typename CommandGroupFunc>
2683  queue &Q, CommandGroupFunc &&CGF,
2684  const sycl::detail::code_location &CodeLoc);
2685 
2687  event submit_impl(std::function<void(handler &)> CGH,
2688  const detail::code_location &CodeLoc);
2690  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
2691  const detail::code_location &CodeLoc);
2692 
2694  void submit_without_event_impl(std::function<void(handler &)> CGH,
2695  const detail::code_location &CodeLoc);
2696 
2702  template <typename T>
2703  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, void>
2704  submit_without_event(T CGF, const detail::code_location &CodeLoc) {
2705  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2706 #if __SYCL_USE_FALLBACK_ASSERT
2707  // If post-processing is needed, fall back to the regular submit.
2708  // TODO: Revisit whether we can avoid this.
2709  submit(CGF, CodeLoc);
2710 #else
2711  submit_without_event_impl(CGF, CodeLoc);
2712 #endif // __SYCL_USE_FALLBACK_ASSERT
2713  }
2714 
2715  // Function to postprocess submitted command
2716  // Arguments:
2717  // bool IsKernel - true if the submitted command was kernel, false otherwise
2718  // bool KernelUsesAssert - true if submitted kernel uses assert, only
2719  // meaningful when IsKernel is true
2720  // event &Event - event after which post processing should be executed
2721  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
2722 
2728  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2729  const detail::code_location &CodeLoc,
2730  const SubmitPostProcessF &PostProcess);
2737  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2738  queue secondQueue,
2739  const detail::code_location &CodeLoc,
2740  const SubmitPostProcessF &PostProcess);
2741 
2748  template <typename KernelName, int Dims, typename PropertiesT,
2749  typename... RestT>
2750  std::enable_if_t<
2751  detail::AreAllButLastReductions<RestT...>::value &&
2753  event>
2754  parallel_for_impl(range<Dims> Range, PropertiesT Properties,
2755  RestT &&...Rest) {
2756  using KI = sycl::detail::KernelInfo<KernelName>;
2757  constexpr detail::code_location CodeLoc(
2758  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2759  KI::getColumnNumber());
2760  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2761  return submit(
2762  [&](handler &CGH) {
2763  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2764  },
2765  CodeLoc);
2766  }
2767 
2773  template <typename KernelName, int Dims, typename... RestT>
2774  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2775  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
2776  return parallel_for_impl<KernelName>(
2778  }
2779 
2787  template <typename KernelName, int Dims, typename PropertiesT,
2788  typename... RestT>
2789  std::enable_if_t<
2790  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2791  parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2792  RestT &&...Rest) {
2793  using KI = sycl::detail::KernelInfo<KernelName>;
2794  constexpr detail::code_location CodeLoc(
2795  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2796  KI::getColumnNumber());
2797  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2798  return submit(
2799  [&](handler &CGH) {
2800  CGH.depends_on(DepEvent);
2801  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2802  },
2803  CodeLoc);
2804  }
2805 
2812  template <typename KernelName, int Dims, typename... RestT>
2813  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2814  return parallel_for_impl<KernelName>(
2816  Rest...);
2817  }
2818 
2827  template <typename KernelName, int Dims, typename PropertiesT,
2828  typename... RestT>
2829  std::enable_if_t<
2830  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2831  parallel_for_impl(range<Dims> Range, const std::vector<event> &DepEvents,
2832  PropertiesT Properties, RestT &&...Rest) {
2833  using KI = sycl::detail::KernelInfo<KernelName>;
2834  constexpr detail::code_location CodeLoc(
2835  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2836  KI::getColumnNumber());
2837  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2838  return submit(
2839  [&](handler &CGH) {
2840  CGH.depends_on(DepEvents);
2841  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2842  },
2843  CodeLoc);
2844  }
2845 
2853  template <typename KernelName, int Dims, typename... RestT>
2854  event parallel_for_impl(range<Dims> Range,
2855  const std::vector<event> &DepEvents,
2856  RestT &&...Rest) {
2857  return parallel_for_impl<KernelName>(
2859  Rest...);
2860  }
2861 
2862  event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
2863  bool IsDeviceImageScope, size_t NumBytes,
2864  size_t Offset,
2865  const std::vector<event> &DepEvents);
2866  event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
2867  bool IsDeviceImageScope, size_t NumBytes,
2868  size_t Offset,
2869  const std::vector<event> &DepEvents);
2870  const property_list &getPropList() const;
2871 };
2872 
2873 } // namespace _V1
2874 } // namespace sycl
2875 
2876 namespace std {
2877 template <> struct __SYCL_EXPORT hash<sycl::queue> {
2878  size_t operator()(const sycl::queue &Q) const;
2879 };
2880 } // namespace std
2881 
2882 #if __SYCL_USE_FALLBACK_ASSERT
2883 // Explicitly request format macros
2884 #ifndef __STDC_FORMAT_MACROS
2885 #define __STDC_FORMAT_MACROS 1
2886 #endif
2887 #include <cinttypes>
2888 
2889 namespace sycl {
2890 inline namespace _V1 {
2891 
2892 namespace detail {
2893 #define __SYCL_ASSERT_START 1
2894 
2895 namespace __sycl_service_kernel__ {
2896 class AssertInfoCopier;
2897 } // namespace __sycl_service_kernel__
2898 
2910 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
2911  const detail::code_location &CodeLoc) {
2912  buffer<detail::AssertHappened, 1> Buffer{1};
2913 
2914  event CopierEv, CheckerEv, PostCheckerEv;
2915  auto CopierCGF = [&](handler &CGH) {
2916  CGH.depends_on(Event);
2917 
2918  auto Acc = Buffer.get_access<access::mode::write>(CGH);
2919 
2920  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
2921 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2922  __devicelib_assert_read(&Acc[0]);
2923 #else
2924  (void)Acc;
2925 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2926  });
2927  };
2928  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
2929  CGH.depends_on(CopierEv);
2930  using mode = access::mode;
2931  using target = access::target;
2932 
2933  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
2934 
2935  CGH.host_task([=] {
2936  const detail::AssertHappened *AH = &Acc[0];
2937 
2938  // Don't use assert here as msvc will insert reference to __imp__wassert
2939  // which won't be properly resolved in separate compile use-case
2940 #ifndef NDEBUG
2941  if (AH->Flag == __SYCL_ASSERT_START)
2942  throw sycl::exception(
2944  "Internal Error. Invalid value in assert description.");
2945 #endif
2946 
2947  if (AH->Flag) {
2948  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
2949  const char *File = AH->File[0] ? AH->File : "<unknown file>";
2950  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
2951 
2952  fprintf(stderr,
2953  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
2954  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
2955  "Assertion `%s` failed.\n",
2956  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
2957  AH->LID1, AH->LID2, Expr);
2958  fflush(stderr);
2959  abort(); // no need to release memory as it's abort anyway
2960  }
2961  });
2962  };
2963 
2964  if (SecondaryQueue) {
2965  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
2966  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
2967  } else {
2968  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
2969  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
2970  }
2971 
2972  return CheckerEv;
2973 }
2974 #undef __SYCL_ASSERT_START
2975 } // namespace detail
2976 
2977 } // namespace _V1
2978 } // namespace sycl
2979 #endif // __SYCL_USE_FALLBACK_ASSERT
The file contains implementations of accessor class.
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:44
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:468
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1533
void ext_oneapi_wait_external_semaphore(ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Submit a non-blocking device-side wait on an external.
Definition: handler.cpp:1423
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2616
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1793
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:934
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
Definition: handler.hpp:1911
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2825
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
Definition: handler.hpp:2795
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: memcpy2d.hpp:18
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:951
A unique identifier of an item in an index space.
Definition: id.hpp:36
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2482
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1239
void wait(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:429
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:523
event fill(void *Ptr, const T &Pattern, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:484
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:116
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:501
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:801
queue & operator=(const queue &RHS)=default
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:769
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2018
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:682
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:179
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2221
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2249
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2106
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2150
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const device_selector &DeviceSelector
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
Definition: queue.hpp:2266
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1136
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
Definition: queue.hpp:2462
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:163
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with multiple dependencies.
Definition: queue.hpp:2622
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, both pointed by placeholder accessors.
Definition: queue.hpp:2523
event copy(const T *Src, T *Dest, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:640
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2184
queue(queue &&RHS)=default
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:1848
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2208
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2172
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc, const detail::code_location &CodeLoc=detail::code_location::current())
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
Definition: queue.hpp:2543
event copy(const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:661
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1360
event prefetch(const void *Ptr, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:748
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:149
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1216
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1312
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2161
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:124
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2419
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2395
queue & operator=(queue &&RHS)=default
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:136
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands.
Definition: queue.hpp:2588
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const context &SyclContext
Constructs a SYCL queue instance that is associated with the context provided, using the device retur...
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2087
event single_task(_KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2043
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified data.
Definition: queue.hpp:2562
bool has_property() const noexcept
Definition: queue.hpp:465
bool operator==(const queue &RHS) const
Definition: queue.hpp:290
queue(const queue &RHS)=default
Constructs a SYCL queue with an optional async_handler from an OpenCL cl_command_queue.
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1195
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1116
void wait_and_throw(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:443
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2135
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1287
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1076
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with a single dependency.
Definition: queue.hpp:2602
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2381
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1264
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
Definition: queue.hpp:2501
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2442
event prefetch(const void *Ptr, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:733
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2060
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2196
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:340
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1337
PropertyT get_property() const
Definition: queue.hpp:472
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:375
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1157
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2235
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2359
bool operator!=(const queue &RHS) const
Definition: queue.hpp:292
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:31
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:42
void defaultAsyncHandler(exception_list Exceptions)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:40
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
void submit_impl(queue &Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc)
void submit_impl(queue &Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc)
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
void submit(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc=sycl::detail::code_location::current())
@ modifiable
In modifiable state, commands can be added to graph.
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
constexpr device_has_key::value_t< Aspects... > device_has
Definition: properties.hpp:152
properties< std::tuple<> > empty_properties_t
Definition: properties.hpp:234
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
pointer get() const
Definition: multi_ptr.hpp:544
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:130
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
Definition: backend.hpp:81
int default_selector_v(const device &dev)
class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:905
uintptr_t pi_native_handle
Definition: pi.h:267
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:59
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Predicate returning true if all template type parameters except the last one are reductions.
static constexpr code_location current(const char *fileName=__CODELOC_FILE_NAME, const char *funcName=__CODELOC_FUNCTION, unsigned long lineNo=__CODELOC_LINE, unsigned long columnNo=__CODELOC_COLUMN) noexcept
Definition: common.hpp:68
A struct to describe the properties of an image.