DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, access...
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aspects.hpp> // for aspect
14 #include <sycl/async_handler.hpp> // for async_handler
15 #include <sycl/backend_types.hpp> // for backend, backe...
16 #include <sycl/buffer.hpp> // for buffer
17 #include <sycl/context.hpp> // for context
18 #include <sycl/detail/assert_happened.hpp> // for AssertHappened
19 #include <sycl/detail/cg_types.hpp> // for check_fn_signa...
20 #include <sycl/detail/common.hpp> // for code_location
21 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEP...
22 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
23 #include <sycl/detail/info_desc_helpers.hpp> // for is_queue_info_...
24 #include <sycl/detail/kernel_desc.hpp> // for KernelInfo
25 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
26 #include <sycl/detail/pi.h> // for pi_mem_advice
27 #include <sycl/device.hpp> // for device
28 #include <sycl/device_selector.hpp> // for device_selector
29 #include <sycl/event.hpp> // for event
30 #include <sycl/exception.hpp> // for make_error_code
31 #include <sycl/exception_list.hpp> // for defaultAsyncHa...
32 #include <sycl/ext/oneapi/device_global/device_global.hpp> // for device_global
33 #include <sycl/ext/oneapi/device_global/properties.hpp> // for device_image_s...
34 #include <sycl/ext/oneapi/experimental/graph.hpp> // for command_graph...
35 #include <sycl/ext/oneapi/properties/properties.hpp> // for empty_properti...
36 #include <sycl/handler.hpp> // for handler, isDev...
37 #include <sycl/id.hpp> // for id
38 #include <sycl/kernel.hpp> // for auto_name
39 #include <sycl/kernel_handler.hpp> // for kernel_handler
40 #include <sycl/nd_range.hpp> // for nd_range
41 #include <sycl/property_list.hpp> // for property_list
42 #include <sycl/range.hpp> // for range
43 
44 #if __SYCL_USE_FALLBACK_ASSERT
45 // TODO: maybe we can move detail::submitAssertCapture into the shared library
46 // instead.
48 #endif
49 
50 #include <cstddef> // for size_t
51 #include <functional> // for function
52 #include <memory> // for shared_ptr, hash
53 #include <stdint.h> // for int32_t
54 #include <tuple> // for tuple
55 #include <type_traits> // for remove_all_ext...
56 #include <variant> // for hash
57 #include <vector> // for vector
58 
59 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
60 // Here we simplify the KernelFunc param is simplified to be
61 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
62 // these macros are #undef immediately.
63 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
64 // or const KernelType &KernelFunc
65 #ifdef __SYCL_NONCONST_FUNCTOR__
66 #define _KERNELFUNCPARAM(a) KernelType a
67 #else
68 #define _KERNELFUNCPARAM(a) const KernelType &a
69 #endif
70 
71 namespace sycl {
72 inline namespace _V1 {
73 
74 // Forward declaration
75 class context;
76 class device;
77 class event;
78 class queue;
79 
80 template <backend BackendName, class SyclObjectT>
81 auto get_native(const SyclObjectT &Obj)
82  -> backend_return_t<BackendName, SyclObjectT>;
83 
84 namespace detail {
85 class queue_impl;
86 
87 #if __SYCL_USE_FALLBACK_ASSERT
88 inline event submitAssertCapture(queue &, event &, queue *,
89  const detail::code_location &);
90 #endif
91 } // namespace detail
92 
93 namespace ext ::oneapi ::experimental {
94 // State of a queue with regards to graph recording,
95 // returned by info::queue::state
97 struct image_descriptor;
98 } // namespace ext::oneapi::experimental
99 
111 class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
112 public:
117  explicit queue(const property_list &PropList = {})
119 
125  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
126  : queue(default_selector_v, AsyncHandler, PropList) {}
127 
134  template <typename DeviceSelector,
135  typename =
136  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
137  explicit queue(const DeviceSelector &deviceSelector,
138  const async_handler &AsyncHandler,
139  const property_list &PropList = {})
140  : queue(detail::select_device(deviceSelector), AsyncHandler, PropList) {}
141 
147  template <typename DeviceSelector,
148  typename =
149  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
150  explicit queue(const DeviceSelector &deviceSelector,
151  const property_list &PropList = {})
152  : queue(detail::select_device(deviceSelector),
153  detail::defaultAsyncHandler, PropList) {}
154 
161  template <typename DeviceSelector,
162  typename =
163  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
164  explicit queue(const context &syclContext,
165  const DeviceSelector &deviceSelector,
166  const property_list &propList = {})
167  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
168  propList) {}
169 
177  template <typename DeviceSelector,
178  typename =
179  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
180  explicit queue(const context &syclContext,
181  const DeviceSelector &deviceSelector,
182  const async_handler &AsyncHandler,
183  const property_list &propList = {})
184  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
185  AsyncHandler, propList) {}
186 
192  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
193  "use SYCL 2020 device selectors instead.")
194  queue(const device_selector &DeviceSelector,
195  const property_list &PropList = {})
196  : queue(DeviceSelector.select_device(), detail::defaultAsyncHandler,
197  PropList) {}
198 
205  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
206  "use SYCL 2020 device selectors instead.")
207  queue(const device_selector &DeviceSelector,
208  const async_handler &AsyncHandler, const property_list &PropList = {})
209  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
210 
215  explicit queue(const device &SyclDevice, const property_list &PropList = {})
216  : queue(SyclDevice, detail::defaultAsyncHandler, PropList) {}
217 
224  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
225  const property_list &PropList = {});
226 
233  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
234  "use SYCL 2020 device selectors instead.")
235  queue(const context &SyclContext, const device_selector &DeviceSelector,
236  const property_list &PropList = {});
237 
246  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
247  "use SYCL 2020 device selectors instead.")
248  queue(const context &SyclContext, const device_selector &DeviceSelector,
249  const async_handler &AsyncHandler, const property_list &PropList = {});
250 
257  queue(const context &SyclContext, const device &SyclDevice,
258  const property_list &PropList = {});
259 
267  queue(const context &SyclContext, const device &SyclDevice,
268  const async_handler &AsyncHandler, const property_list &PropList = {});
269 
278 #ifdef __SYCL_INTERNAL_API
279  queue(cl_command_queue ClQueue, const context &SyclContext,
280  const async_handler &AsyncHandler = {});
281 #endif
282 
283  queue(const queue &RHS) = default;
284 
285  queue(queue &&RHS) = default;
286 
287  queue &operator=(const queue &RHS) = default;
288 
289  queue &operator=(queue &&RHS) = default;
290 
291  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
292 
293  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
294 
297 #ifdef __SYCL_INTERNAL_API
298  cl_command_queue get() const;
299 #endif
300 
302  context get_context() const;
303 
305  device get_device() const;
306 
308  ext::oneapi::experimental::queue_state ext_oneapi_get_state() const;
309 
313  ext_oneapi_get_graph() const;
314 
317  "is_host() is deprecated as the host device is no longer supported.")
318  bool is_host() const;
319 
323  template <typename Param>
324  typename detail::is_queue_info_desc<Param>::return_type get_info() const;
325 
329  template <typename Param>
330  typename detail::is_backend_info_desc<Param>::return_type
331  get_backend_info() const;
332 
333 private:
334  // A shorthand for `get_device().has()' which is expected to be a bit quicker
335  // than the long version
336  bool device_has(aspect Aspect) const;
337 
338 public:
345  template <typename T>
346  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
347  T CGF,
348  const detail::code_location &CodeLoc = detail::code_location::current()) {
349  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
350 #if __SYCL_USE_FALLBACK_ASSERT
351  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
352  event &E) {
353  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
354  KernelUsesAssert && !device_has(aspect::accelerator)) {
355  // __devicelib_assert_fail isn't supported by Device-side Runtime
356  // Linking against fallback impl of __devicelib_assert_fail is
357  // performed by program manager class
358  // Fallback assert isn't supported for FPGA
359  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc);
360  }
361  };
362 
363  return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
364 #else
365  return submit_impl(CGF, CodeLoc);
366 #endif // __SYCL_USE_FALLBACK_ASSERT
367  }
368 
380  template <typename T>
381  std::enable_if_t<std::is_invocable_r_v<void, T, handler &>, event> submit(
382  T CGF, queue &SecondaryQueue,
384  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
385 #if __SYCL_USE_FALLBACK_ASSERT
386  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
387  bool IsKernel, bool KernelUsesAssert, event &E) {
388  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
389  KernelUsesAssert && !device_has(aspect::accelerator)) {
390  // Only secondary queues on devices need to be added to the assert
391  // capture.
392  // __devicelib_assert_fail isn't supported by Device-side Runtime
393  // Linking against fallback impl of __devicelib_assert_fail is
394  // performed by program manager class
395  // Fallback assert isn't supported for FPGA
396  submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc);
397  }
398  };
399 
400  return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
401  PostProcess);
402 #else
403  return submit_impl(CGF, SecondaryQueue, CodeLoc);
404 #endif // __SYCL_USE_FALLBACK_ASSERT
405  }
406 
414  event ext_oneapi_submit_barrier(
416 
426  event ext_oneapi_submit_barrier(
427  const std::vector<event> &WaitList,
429 
435  void wait(
437  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
438  wait_proxy(CodeLoc);
439  }
440 
451  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
452  wait_and_throw_proxy(CodeLoc);
453  }
454 
457  void wait_proxy(const detail::code_location &CodeLoc);
460  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
461 
467  void throw_asynchronous();
468 
471  template <typename PropertyT> bool has_property() const noexcept;
472 
476  template <typename PropertyT> PropertyT get_property() const;
477 
485  template <typename T>
487  void *Ptr, const T &Pattern, size_t Count,
488  const detail::code_location &CodeLoc = detail::code_location::current()) {
489  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
490  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); },
491  CodeLoc);
492  }
493 
502  template <typename T>
503  event fill(
504  void *Ptr, const T &Pattern, size_t Count, event DepEvent,
506  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
507  return submit(
508  [&](handler &CGH) {
509  CGH.depends_on(DepEvent);
510  CGH.fill<T>(Ptr, Pattern, Count);
511  },
512  CodeLoc);
513  }
514 
524  template <typename T>
525  event fill(
526  void *Ptr, const T &Pattern, size_t Count,
527  const std::vector<event> &DepEvents,
529  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
530  return submit(
531  [&](handler &CGH) {
532  CGH.depends_on(DepEvents);
533  CGH.fill<T>(Ptr, Pattern, Count);
534  },
535  CodeLoc);
536  }
537 
547  event memset(
548  void *Ptr, int Value, size_t Count,
550 
561  event memset(
562  void *Ptr, int Value, size_t Count, event DepEvent,
564 
576  event memset(
577  void *Ptr, int Value, size_t Count, const std::vector<event> &DepEvents,
579 
591  event memcpy(
592  void *Dest, const void *Src, size_t Count,
594 
607  event memcpy(
608  void *Dest, const void *Src, size_t Count, event DepEvent,
610 
624  event memcpy(
625  void *Dest, const void *Src, size_t Count,
626  const std::vector<event> &DepEvents,
628 
641  template <typename T>
642  event copy(
643  const T *Src, T *Dest, size_t Count,
645  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
646  return this->memcpy(Dest, Src, Count * sizeof(T));
647  }
648 
662  template <typename T>
663  event copy(
664  const T *Src, T *Dest, size_t Count, event DepEvent,
666  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
667  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
668  }
669 
683  template <typename T>
684  event copy(
685  const T *Src, T *Dest, size_t Count, const std::vector<event> &DepEvents,
687  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
688  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
689  }
690 
698  __SYCL2020_DEPRECATED("use the overload with int Advice instead")
700  const void *Ptr, size_t Length, pi_mem_advice Advice,
701  const detail::code_location &CodeLoc = detail::code_location::current());
702 
711  const void *Ptr, size_t Length, int Advice,
712  const detail::code_location &CodeLoc = detail::code_location::current());
713 
723  const void *Ptr, size_t Length, int Advice, event DepEvent,
724  const detail::code_location &CodeLoc = detail::code_location::current());
725 
736  const void *Ptr, size_t Length, int Advice,
737  const std::vector<event> &DepEvents,
738  const detail::code_location &CodeLoc = detail::code_location::current());
739 
747  event prefetch(
748  const void *Ptr, size_t Count,
749  const detail::code_location &CodeLoc = detail::code_location::current()) {
750  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
751  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc);
752  }
753 
762  event prefetch(
763  const void *Ptr, size_t Count, event DepEvent,
765  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
766  return submit(
767  [=](handler &CGH) {
768  CGH.depends_on(DepEvent);
769  CGH.prefetch(Ptr, Count);
770  },
771  CodeLoc);
772  }
773 
783  event prefetch(
784  const void *Ptr, size_t Count, const std::vector<event> &DepEvents,
786  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
787  return submit(
788  [=](handler &CGH) {
789  CGH.depends_on(DepEvents);
790  CGH.prefetch(Ptr, Count);
791  },
792  CodeLoc);
793  }
794 
813  template <typename T = unsigned char,
814  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
816  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
817  size_t Width, size_t Height,
819  return submit(
820  [=](handler &CGH) {
821  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width,
822  Height);
823  },
824  CodeLoc);
825  }
826 
846  template <typename T = unsigned char,
847  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
848  event ext_oneapi_memcpy2d(
849  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
850  size_t Width, size_t Height, event DepEvent,
852 
873  template <typename T = unsigned char,
874  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
875  event ext_oneapi_memcpy2d(
876  void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch,
877  size_t Width, size_t Height, const std::vector<event> &DepEvents,
879 
895  template <typename T>
896  event ext_oneapi_copy2d(
897  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
898  size_t Height,
900 
917  template <typename T>
918  event ext_oneapi_copy2d(
919  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
920  size_t Height, event DepEvent,
922 
940  template <typename T>
941  event ext_oneapi_copy2d(
942  const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width,
943  size_t Height, const std::vector<event> &DepEvents,
945 
962  template <typename T = unsigned char,
963  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
964  event ext_oneapi_memset2d(
965  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
967 
985  template <typename T = unsigned char,
986  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
987  event ext_oneapi_memset2d(
988  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
989  event DepEvent,
991 
1010  template <typename T = unsigned char,
1011  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
1012  event ext_oneapi_memset2d(
1013  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
1014  const std::vector<event> &DepEvents,
1016 
1030  template <typename T>
1031  event ext_oneapi_fill2d(
1032  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1033  size_t Height,
1035 
1050  template <typename T>
1051  event ext_oneapi_fill2d(
1052  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1053  size_t Height, event DepEvent,
1055 
1071  template <typename T>
1072  event ext_oneapi_fill2d(
1073  void *Dest, size_t DestPitch, const T &Pattern, size_t Width,
1074  size_t Height, const std::vector<event> &DepEvents,
1076 
1089  template <typename T, typename PropertyListT>
1090  event memcpy(
1092  const void *Src, size_t NumBytes, size_t Offset,
1093  const std::vector<event> &DepEvents,
1095  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1096  if (sizeof(T) < Offset + NumBytes)
1098  "Copy to device_global is out of bounds.");
1099 
1100  if (!detail::isDeviceGlobalUsedInKernel(&Dest)) {
1101  // device_global is unregistered so we need a fallback. We let the handler
1102  // implement this fallback.
1103  return submit(
1104  [&](handler &CGH) {
1105  CGH.depends_on(DepEvents);
1106  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1107  },
1108  CodeLoc);
1109  }
1110 
1111  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1113  return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1114  Offset, DepEvents);
1115  }
1116 
1129  template <typename T, typename PropertyListT>
1130  event memcpy(
1132  const void *Src, size_t NumBytes, size_t Offset, event DepEvent,
1134  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1135  return this->memcpy(Dest, Src, NumBytes, Offset,
1136  std::vector<event>{DepEvent});
1137  }
1138 
1149  template <typename T, typename PropertyListT>
1150  event memcpy(
1152  const void *Src, size_t NumBytes = sizeof(T), size_t Offset = 0,
1154  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1155  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1156  }
1157 
1170  template <typename T, typename PropertyListT>
1171  event memcpy(
1172  void *Dest,
1174  size_t NumBytes, size_t Offset, const std::vector<event> &DepEvents,
1176  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1177  if (sizeof(T) < Offset + NumBytes)
1179  "Copy from device_global is out of bounds.");
1180 
1182  // device_global is unregistered so we need a fallback. We let the handler
1183  // implement this fallback.
1184  return submit([&](handler &CGH) {
1185  CGH.depends_on(DepEvents);
1186  return CGH.memcpy(Dest, Src, NumBytes, Offset);
1187  });
1188  }
1189 
1190  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1192  return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1193  Offset, DepEvents);
1194  }
1195 
1208  template <typename T, typename PropertyListT>
1209  event memcpy(
1210  void *Dest,
1212  size_t NumBytes, size_t Offset, event DepEvent,
1214  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1215  return this->memcpy(Dest, Src, NumBytes, Offset,
1216  std::vector<event>{DepEvent});
1217  }
1218 
1229  template <typename T, typename PropertyListT>
1230  event memcpy(
1231  void *Dest,
1233  size_t NumBytes = sizeof(T), size_t Offset = 0,
1235  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1236  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1237  }
1238 
1252  template <typename T, typename PropertyListT>
1253  event copy(
1254  const std::remove_all_extents_t<T> *Src,
1256  size_t Count, size_t StartIndex, const std::vector<event> &DepEvents,
1258  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1259  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1260  StartIndex * sizeof(std::remove_all_extents_t<T>),
1261  DepEvents);
1262  }
1263 
1277  template <typename T, typename PropertyListT>
1278  event copy(
1279  const std::remove_all_extents_t<T> *Src,
1281  size_t Count, size_t StartIndex, event DepEvent,
1283  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1284  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1285  StartIndex * sizeof(std::remove_all_extents_t<T>),
1286  DepEvent);
1287  }
1288 
1300  template <typename T, typename PropertyListT>
1301  event copy(
1302  const std::remove_all_extents_t<T> *Src,
1304  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1305  size_t StartIndex = 0,
1307  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1308  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1309  StartIndex * sizeof(std::remove_all_extents_t<T>));
1310  }
1311 
1325  template <typename T, typename PropertyListT>
1326  event copy(
1328  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1329  const std::vector<event> &DepEvents,
1331  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1332  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1333  StartIndex * sizeof(std::remove_all_extents_t<T>),
1334  DepEvents);
1335  }
1336 
1350  template <typename T, typename PropertyListT>
1351  event copy(
1353  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1354  event DepEvent,
1356  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1357  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1358  StartIndex * sizeof(std::remove_all_extents_t<T>),
1359  DepEvent);
1360  }
1361 
1373  template <typename T, typename PropertyListT>
1374  event copy(
1376  std::remove_all_extents_t<T> *Dest,
1377  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1378  size_t StartIndex = 0,
1380  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1381  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1382  StartIndex * sizeof(std::remove_all_extents_t<T>));
1383  }
1384 
1396  event ext_oneapi_copy(
1400 
1420  event ext_oneapi_copy(
1421  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1423  sycl::range<3> DestOffset,
1425  sycl::range<3> CopyExtent,
1427 
1440  event ext_oneapi_copy(
1443  event DepEvent,
1445 
1466  event ext_oneapi_copy(
1467  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1469  sycl::range<3> DestOffset,
1471  sycl::range<3> CopyExtent, event DepEvent,
1473 
1487  event ext_oneapi_copy(
1490  const std::vector<event> &DepEvents,
1492 
1514  event ext_oneapi_copy(
1515  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1517  sycl::range<3> DestOffset,
1519  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1521 
1532  event ext_oneapi_copy(
1536 
1558  event ext_oneapi_copy(
1560  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1561  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1562  sycl::range<3> CopyExtent,
1564 
1576  event ext_oneapi_copy(
1579  event DepEvent,
1581 
1604  event ext_oneapi_copy(
1606  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1607  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1608  sycl::range<3> CopyExtent, event DepEvent,
1610 
1623  event ext_oneapi_copy(
1626  const std::vector<event> &DepEvents,
1628 
1652  event ext_oneapi_copy(
1654  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1655  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1656  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1658 
1670  event ext_oneapi_copy(
1671  void *Src, void *Dest,
1672  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1673  size_t DeviceRowPitch,
1675 
1698  event ext_oneapi_copy(
1699  void *Src, sycl::range<3> SrcOffset, void *Dest,
1700  sycl::range<3> DestOffset,
1701  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1702  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1703  sycl::range<3> CopyExtent,
1705 
1718  event ext_oneapi_copy(
1719  void *Src, void *Dest,
1720  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1721  size_t DeviceRowPitch, event DepEvent,
1723 
1733  event ext_oneapi_copy(
1737  event DepEvent,
1739 
1750  event ext_oneapi_copy(
1754  const std::vector<event> &DepEvents,
1756 
1765  event ext_oneapi_copy(
1770 
1794  event ext_oneapi_copy(
1795  void *Src, sycl::range<3> SrcOffset, void *Dest,
1796  sycl::range<3> DestOffset,
1797  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1798  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1799  sycl::range<3> CopyExtent, event DepEvent,
1801 
1815  event ext_oneapi_copy(
1816  void *Src, void *Dest,
1817  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1818  size_t DeviceRowPitch, const std::vector<event> &DepEvents,
1820 
1845  event ext_oneapi_copy(
1846  void *Src, sycl::range<3> SrcOffset, void *Dest,
1847  sycl::range<3> DestOffset,
1848  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1849  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1850  sycl::range<3> CopyExtent, const std::vector<event> &DepEvents,
1852 
1862  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1863  return submit(
1864  [&](handler &CGH) {
1865  CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle);
1866  },
1867  CodeLoc);
1868  }
1869 
1877  event ext_oneapi_wait_external_semaphore(
1879  event DepEvent,
1881 
1890  event ext_oneapi_wait_external_semaphore(
1892  const std::vector<event> &DepEvents,
1894 
1903  event ext_oneapi_wait_external_semaphore(
1905  uint64_t WaitValue,
1907 
1917  event ext_oneapi_wait_external_semaphore(
1919  uint64_t WaitValue, event DepEvent,
1921 
1932  event ext_oneapi_wait_external_semaphore(
1934  uint64_t WaitValue, const std::vector<event> &DepEvents,
1936 
1944  event ext_oneapi_signal_external_semaphore(
1947 
1956  event ext_oneapi_signal_external_semaphore(
1958  event DepEvent,
1960 
1970  event ext_oneapi_signal_external_semaphore(
1972  const std::vector<event> &DepEvents,
1974 
1984  event ext_oneapi_signal_external_semaphore(
1986  uint64_t SignalValue,
1988 
1999  event ext_oneapi_signal_external_semaphore(
2001  uint64_t SignalValue, event DepEvent,
2003 
2015  event ext_oneapi_signal_external_semaphore(
2017  uint64_t SignalValue, const std::vector<event> &DepEvents,
2019 
2025  template <typename KernelName = detail::auto_name, typename KernelType,
2026  typename PropertiesT>
2027  std::enable_if_t<
2030  PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2032  static_assert(
2033  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2034  void()>::value ||
2035  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2036  void(kernel_handler)>::value),
2037  "sycl::queue.single_task() requires a kernel instead of command group. "
2038  "Use queue.submit() instead");
2039 
2040  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2041  return submit(
2042  [&](handler &CGH) {
2043  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2044  Properties, KernelFunc);
2045  },
2046  CodeLoc);
2047  }
2048 
2053  template <typename KernelName = detail::auto_name, typename KernelType>
2055  _KERNELFUNCPARAM(KernelFunc),
2057  return single_task<KernelName, KernelType>(
2058  ext::oneapi::experimental::empty_properties_t{}, KernelFunc, CodeLoc);
2059  }
2060 
2067  template <typename KernelName = detail::auto_name, typename KernelType,
2068  typename PropertiesT>
2069  std::enable_if_t<
2072  event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc),
2074  static_assert(
2075  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2076  void()>::value ||
2077  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2078  void(kernel_handler)>::value),
2079  "sycl::queue.single_task() requires a kernel instead of command group. "
2080  "Use queue.submit() instead");
2081 
2082  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2083  return submit(
2084  [&](handler &CGH) {
2085  CGH.depends_on(DepEvent);
2086  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2087  Properties, KernelFunc);
2088  },
2089  CodeLoc);
2090  }
2091 
2097  template <typename KernelName = detail::auto_name, typename KernelType>
2099  event DepEvent, _KERNELFUNCPARAM(KernelFunc),
2101  return single_task<KernelName, KernelType>(
2102  DepEvent, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2103  CodeLoc);
2104  }
2105 
2113  template <typename KernelName = detail::auto_name, typename KernelType,
2114  typename PropertiesT>
2115  std::enable_if_t<
2118  const std::vector<event> &DepEvents, PropertiesT Properties,
2119  _KERNELFUNCPARAM(KernelFunc),
2121  static_assert(
2122  (detail::check_fn_signature<std::remove_reference_t<KernelType>,
2123  void()>::value ||
2124  detail::check_fn_signature<std::remove_reference_t<KernelType>,
2125  void(kernel_handler)>::value),
2126  "sycl::queue.single_task() requires a kernel instead of command group. "
2127  "Use queue.submit() instead");
2128 
2129  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2130  return submit(
2131  [&](handler &CGH) {
2132  CGH.depends_on(DepEvents);
2133  CGH.template single_task<KernelName, KernelType, PropertiesT>(
2134  Properties, KernelFunc);
2135  },
2136  CodeLoc);
2137  }
2138 
2145  template <typename KernelName = detail::auto_name, typename KernelType>
2147  const std::vector<event> &DepEvents, _KERNELFUNCPARAM(KernelFunc),
2149  return single_task<KernelName, KernelType>(
2150  DepEvents, ext::oneapi::experimental::empty_properties_t{}, KernelFunc,
2151  CodeLoc);
2152  }
2153 
2160  template <typename KernelName = detail::auto_name, typename... RestT>
2161  event parallel_for(range<1> Range, RestT &&...Rest) {
2162  return parallel_for_impl<KernelName>(Range, Rest...);
2163  }
2164 
2171  template <typename KernelName = detail::auto_name, typename... RestT>
2172  event parallel_for(range<2> Range, RestT &&...Rest) {
2173  return parallel_for_impl<KernelName>(Range, Rest...);
2174  }
2175 
2182  template <typename KernelName = detail::auto_name, typename... RestT>
2183  event parallel_for(range<3> Range, RestT &&...Rest) {
2184  return parallel_for_impl<KernelName>(Range, Rest...);
2185  }
2186 
2194  template <typename KernelName = detail::auto_name, typename... RestT>
2195  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
2196  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2197  }
2198 
2206  template <typename KernelName = detail::auto_name, typename... RestT>
2207  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
2208  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2209  }
2210 
2218  template <typename KernelName = detail::auto_name, typename... RestT>
2219  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
2220  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2221  }
2222 
2231  template <typename KernelName = detail::auto_name, typename... RestT>
2232  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
2233  RestT &&...Rest) {
2234  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2235  }
2236 
2245  template <typename KernelName = detail::auto_name, typename... RestT>
2246  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
2247  RestT &&...Rest) {
2248  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2249  }
2250 
2259  template <typename KernelName = detail::auto_name, typename... RestT>
2260  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
2261  RestT &&...Rest) {
2262  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2263  }
2264 
2265  // While other shortcuts with offsets are able to go through parallel_for(...,
2266  // RestT &&...Rest), those that accept dependency events vector have to be
2267  // overloaded to allow implicit construction from an init-list.
2275  template <typename KernelName = detail::auto_name, typename KernelType,
2276  int Dim>
2277  event parallel_for(range<Dim> Range, id<Dim> WorkItemOffset,
2278  const std::vector<event> &DepEvents,
2279  _KERNELFUNCPARAM(KernelFunc)) {
2280  static_assert(1 <= Dim && Dim <= 3, "Invalid number of dimensions");
2281  return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
2282  KernelFunc);
2283  }
2284 
2292  template <typename KernelName = detail::auto_name, typename KernelType,
2293  int Dims>
2294  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2295  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2296  _KERNELFUNCPARAM(KernelFunc)) {
2297  // Actual code location needs to be captured from KernelInfo object.
2298  const detail::code_location CodeLoc = {};
2299  return submit(
2300  [&](handler &CGH) {
2301  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2302  KernelFunc);
2303  },
2304  CodeLoc);
2305  }
2306 
2315  template <typename KernelName = detail::auto_name, typename KernelType,
2316  int Dims>
2317  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2318  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2319  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
2320  // Actual code location needs to be captured from KernelInfo object.
2321  const detail::code_location CodeLoc = {};
2322  return submit(
2323  [&](handler &CGH) {
2324  CGH.depends_on(DepEvent);
2325  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2326  KernelFunc);
2327  },
2328  CodeLoc);
2329  }
2330 
2340  template <typename KernelName = detail::auto_name, typename KernelType,
2341  int Dims>
2342  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
2343  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
2344  const std::vector<event> &DepEvents,
2345  _KERNELFUNCPARAM(KernelFunc)) {
2346  // Actual code location needs to be captured from KernelInfo object.
2347  const detail::code_location CodeLoc = {};
2348  return submit(
2349  [&](handler &CGH) {
2350  CGH.depends_on(DepEvents);
2351  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2352  KernelFunc);
2353  },
2354  CodeLoc);
2355  }
2356 
2364  template <typename KernelName = detail::auto_name, int Dims,
2365  typename PropertiesT, typename... RestT>
2366  std::enable_if_t<
2367  detail::AreAllButLastReductions<RestT...>::value &&
2369  event>
2370  parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2371  using KI = sycl::detail::KernelInfo<KernelName>;
2372  constexpr detail::code_location CodeLoc(
2373  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2374  KI::getColumnNumber());
2375  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2376  return submit(
2377  [&](handler &CGH) {
2378  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2379  },
2380  CodeLoc);
2381  }
2382 
2389  template <typename KernelName = detail::auto_name, int Dims,
2390  typename... RestT>
2391  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2392  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
2393  return parallel_for<KernelName>(
2395  }
2396 
2404  template <typename KernelName = detail::auto_name, int Dims,
2405  typename... RestT>
2406  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
2407  using KI = sycl::detail::KernelInfo<KernelName>;
2408  constexpr detail::code_location CodeLoc(
2409  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2410  KI::getColumnNumber());
2411  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2412  return submit(
2413  [&](handler &CGH) {
2414  CGH.depends_on(DepEvent);
2415  CGH.template parallel_for<KernelName>(Range, Rest...);
2416  },
2417  CodeLoc);
2418  }
2419 
2428  template <typename KernelName = detail::auto_name, int Dims,
2429  typename... RestT>
2430  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
2431  RestT &&...Rest) {
2432  using KI = sycl::detail::KernelInfo<KernelName>;
2433  constexpr detail::code_location CodeLoc(
2434  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2435  KI::getColumnNumber());
2436  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2437  return submit(
2438  [&](handler &CGH) {
2439  CGH.depends_on(DepEvents);
2440  CGH.template parallel_for<KernelName>(Range, Rest...);
2441  },
2442  CodeLoc);
2443  }
2444 
2451  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2452  access::placeholder IsPlaceholder, typename DestT>
2453  event copy(
2455  std::shared_ptr<DestT> Dest,
2457  return submit(
2458  [&](handler &CGH) {
2459  CGH.require(Src);
2460  CGH.copy(Src, Dest);
2461  },
2462  CodeLoc);
2463  }
2464 
2471  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2473  event copy(
2474  std::shared_ptr<SrcT> Src,
2477  return submit(
2478  [&](handler &CGH) {
2479  CGH.require(Dest);
2480  CGH.copy(Src, Dest);
2481  },
2482  CodeLoc);
2483  }
2484 
2491  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2492  access::placeholder IsPlaceholder, typename DestT>
2493  event copy(
2496  return submit(
2497  [&](handler &CGH) {
2498  CGH.require(Src);
2499  CGH.copy(Src, Dest);
2500  },
2501  CodeLoc);
2502  }
2503 
2510  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
2512  event copy(
2513  const SrcT *Src,
2516  return submit(
2517  [&](handler &CGH) {
2518  CGH.require(Dest);
2519  CGH.copy(Src, Dest);
2520  },
2521  CodeLoc);
2522  }
2523 
2530  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
2531  access::placeholder IsSrcPlaceholder, typename DestT, int DestDims,
2532  access_mode DestMode, target DestTgt,
2533  access::placeholder IsDestPlaceholder>
2534  event copy(
2538  return submit(
2539  [&](handler &CGH) {
2540  CGH.require(Src);
2541  CGH.require(Dest);
2542  CGH.copy(Src, Dest);
2543  },
2544  CodeLoc);
2545  }
2546 
2552  template <typename T, int Dims, access_mode Mode, target Tgt,
2557  return submit(
2558  [&](handler &CGH) {
2559  CGH.require(Acc);
2560  CGH.update_host(Acc);
2561  },
2562  CodeLoc);
2563  }
2564 
2571  template <typename T, int Dims, access_mode Mode, target Tgt,
2573  event fill(
2576  return submit(
2577  [&](handler &CGH) {
2578  CGH.require(Dest);
2579  CGH.fill<T>(Dest, Src);
2580  },
2581  CodeLoc);
2582  }
2583 
2590  bool ext_codeplay_supports_fusion() const;
2591 
2592 // Clean KERNELFUNC macros.
2593 #undef _KERNELFUNCPARAM
2594 
2602  Graph,
2604  return submit([&](handler &CGH) { CGH.ext_oneapi_graph(Graph); }, CodeLoc);
2605  }
2606 
2616  Graph,
2617  event DepEvent,
2619  return submit(
2620  [&](handler &CGH) {
2621  CGH.depends_on(DepEvent);
2622  CGH.ext_oneapi_graph(Graph);
2623  },
2624  CodeLoc);
2625  }
2626 
2636  Graph,
2637  const std::vector<event> &DepEvents,
2639  return submit(
2640  [&](handler &CGH) {
2641  CGH.depends_on(DepEvents);
2642  CGH.ext_oneapi_graph(Graph);
2643  },
2644  CodeLoc);
2645  }
2646 
2650  void ext_oneapi_prod();
2651 
2655  bool is_in_order() const;
2656 
2660  backend get_backend() const noexcept;
2661 
2666  bool ext_oneapi_empty() const;
2667 
2668  pi_native_handle getNative(int32_t &NativeHandleDesc) const;
2669 
2670  event ext_oneapi_get_last_event() const;
2671 
2672  void ext_oneapi_set_external_event(const event &external_event);
2673 
2674 private:
2675  std::shared_ptr<detail::queue_impl> impl;
2676  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
2677 
2678  template <class Obj>
2679  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
2680  template <class T>
2681  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
2682 
2683  template <backend BackendName, class SyclObjectT>
2684  friend auto get_native(const SyclObjectT &Obj)
2686 
2687 #if __SYCL_USE_FALLBACK_ASSERT
2688  friend event detail::submitAssertCapture(queue &, event &, queue *,
2689  const detail::code_location &);
2690 #endif
2691 
2693  event submit_impl(std::function<void(handler &)> CGH,
2694  const detail::code_location &CodeLoc);
2696  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
2697  const detail::code_location &CodeLoc);
2698 
2702  event discard_or_return(const event &Event);
2703 
2704  // Function to postprocess submitted command
2705  // Arguments:
2706  // bool IsKernel - true if the submitted command was kernel, false otherwise
2707  // bool KernelUsesAssert - true if submitted kernel uses assert, only
2708  // meaningful when IsKernel is true
2709  // event &Event - event after which post processing should be executed
2710  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
2711 
2717  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2718  const detail::code_location &CodeLoc,
2719  const SubmitPostProcessF &PostProcess);
2726  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2727  queue secondQueue,
2728  const detail::code_location &CodeLoc,
2729  const SubmitPostProcessF &PostProcess);
2730 
2737  template <typename KernelName, int Dims, typename PropertiesT,
2738  typename... RestT>
2739  std::enable_if_t<
2740  detail::AreAllButLastReductions<RestT...>::value &&
2742  event>
2743  parallel_for_impl(range<Dims> Range, PropertiesT Properties,
2744  RestT &&...Rest) {
2745  using KI = sycl::detail::KernelInfo<KernelName>;
2746  constexpr detail::code_location CodeLoc(
2747  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2748  KI::getColumnNumber());
2749  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2750  return submit(
2751  [&](handler &CGH) {
2752  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2753  },
2754  CodeLoc);
2755  }
2756 
2762  template <typename KernelName, int Dims, typename... RestT>
2763  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
2764  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
2765  return parallel_for_impl<KernelName>(
2767  }
2768 
2776  template <typename KernelName, int Dims, typename PropertiesT,
2777  typename... RestT>
2778  std::enable_if_t<
2779  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2780  parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2781  RestT &&...Rest) {
2782  using KI = sycl::detail::KernelInfo<KernelName>;
2783  constexpr detail::code_location CodeLoc(
2784  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2785  KI::getColumnNumber());
2786  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2787  return submit(
2788  [&](handler &CGH) {
2789  CGH.depends_on(DepEvent);
2790  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2791  },
2792  CodeLoc);
2793  }
2794 
2801  template <typename KernelName, int Dims, typename... RestT>
2802  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2803  return parallel_for_impl<KernelName>(
2805  Rest...);
2806  }
2807 
2816  template <typename KernelName, int Dims, typename PropertiesT,
2817  typename... RestT>
2818  std::enable_if_t<
2819  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2820  parallel_for_impl(range<Dims> Range, const std::vector<event> &DepEvents,
2821  PropertiesT Properties, RestT &&...Rest) {
2822  using KI = sycl::detail::KernelInfo<KernelName>;
2823  constexpr detail::code_location CodeLoc(
2824  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2825  KI::getColumnNumber());
2826  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2827  return submit(
2828  [&](handler &CGH) {
2829  CGH.depends_on(DepEvents);
2830  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2831  },
2832  CodeLoc);
2833  }
2834 
2842  template <typename KernelName, int Dims, typename... RestT>
2843  event parallel_for_impl(range<Dims> Range,
2844  const std::vector<event> &DepEvents,
2845  RestT &&...Rest) {
2846  return parallel_for_impl<KernelName>(
2848  Rest...);
2849  }
2850 
2851  event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
2852  bool IsDeviceImageScope, size_t NumBytes,
2853  size_t Offset,
2854  const std::vector<event> &DepEvents);
2855  event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
2856  bool IsDeviceImageScope, size_t NumBytes,
2857  size_t Offset,
2858  const std::vector<event> &DepEvents);
2859 };
2860 
2861 } // namespace _V1
2862 } // namespace sycl
2863 
2864 namespace std {
2865 template <> struct __SYCL_EXPORT hash<sycl::queue> {
2866  size_t operator()(const sycl::queue &Q) const;
2867 };
2868 } // namespace std
2869 
2870 #if __SYCL_USE_FALLBACK_ASSERT
2871 // Explicitly request format macros
2872 #ifndef __STDC_FORMAT_MACROS
2873 #define __STDC_FORMAT_MACROS 1
2874 #endif
2875 #include <cinttypes>
2876 
2877 namespace sycl {
2878 inline namespace _V1 {
2879 
2880 namespace detail {
2881 #define __SYCL_ASSERT_START 1
2882 
2883 namespace __sycl_service_kernel__ {
2884 class AssertInfoCopier;
2885 } // namespace __sycl_service_kernel__
2886 
2898 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
2899  const detail::code_location &CodeLoc) {
2900  buffer<detail::AssertHappened, 1> Buffer{1};
2901 
2902  event CopierEv, CheckerEv, PostCheckerEv;
2903  auto CopierCGF = [&](handler &CGH) {
2904  CGH.depends_on(Event);
2905 
2906  auto Acc = Buffer.get_access<access::mode::write>(CGH);
2907 
2908  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
2909 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2910  __devicelib_assert_read(&Acc[0]);
2911 #else
2912  (void)Acc;
2913 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2914  });
2915  };
2916  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
2917  CGH.depends_on(CopierEv);
2918  using mode = access::mode;
2919  using target = access::target;
2920 
2921  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
2922 
2923  CGH.host_task([=] {
2924  const detail::AssertHappened *AH = &Acc[0];
2925 
2926  // Don't use assert here as msvc will insert reference to __imp__wassert
2927  // which won't be properly resolved in separate compile use-case
2928 #ifndef NDEBUG
2929  if (AH->Flag == __SYCL_ASSERT_START)
2930  throw sycl::exception(
2932  "Internal Error. Invalid value in assert description.");
2933 #endif
2934 
2935  if (AH->Flag) {
2936  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
2937  const char *File = AH->File[0] ? AH->File : "<unknown file>";
2938  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
2939 
2940  fprintf(stderr,
2941  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
2942  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
2943  "Assertion `%s` failed.\n",
2944  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
2945  AH->LID1, AH->LID2, Expr);
2946  fflush(stderr);
2947  abort(); // no need to release memory as it's abort anyway
2948  }
2949  });
2950  };
2951 
2952  if (SecondaryQueue) {
2953  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
2954  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
2955  } else {
2956  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
2957  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
2958  }
2959 
2960  return CheckerEv;
2961 }
2962 #undef __SYCL_ASSERT_START
2963 } // namespace detail
2964 
2965 } // namespace _V1
2966 } // namespace sycl
2967 #endif // __SYCL_USE_FALLBACK_ASSERT
The file contains implementations of accessor class.
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:44
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:462
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1512
void ext_oneapi_wait_external_semaphore(ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Submit a non-blocking device-side wait on an external.
Definition: handler.cpp:1404
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2624
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1748
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:960
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
Definition: handler.hpp:1935
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2853
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
Definition: handler.hpp:2822
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: memcpy2d.hpp:19
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:977
A unique identifier of an item in an index space.
Definition: id.hpp:36
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:111
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2493
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1253
void wait(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:435
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:525
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:117
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
Definition: queue.hpp:503
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:815
queue & operator=(const queue &RHS)=default
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:783
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2029
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:684
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:180
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2232
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2260
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2117
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2161
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const device_selector &DeviceSelector
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
Definition: queue.hpp:2277
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1150
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
Definition: queue.hpp:2473
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:164
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with multiple dependencies.
Definition: queue.hpp:2633
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, both pointed by placeholder accessors.
Definition: queue.hpp:2534
event copy(const T *Src, T *Dest, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:642
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2195
queue(queue &&RHS)=default
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: queue.hpp:1859
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2219
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2183
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc, const detail::code_location &CodeLoc=detail::code_location::current())
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
Definition: queue.hpp:2554
event copy(const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:663
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1374
event prefetch(const void *Ptr, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:762
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:150
__SYCL2020_DEPRECATED("is_host() is deprecated as the host device is no longer supported.") bool is_host() const
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1230
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1326
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2172
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:125
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2430
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2406
queue & operator=(queue &&RHS)=default
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:137
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands.
Definition: queue.hpp:2599
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const context &SyclContext
Constructs a SYCL queue instance that is associated with the context provided, using the device retur...
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2098
event single_task(_KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2054
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified data.
Definition: queue.hpp:2573
bool operator==(const queue &RHS) const
Definition: queue.hpp:291
queue(const queue &RHS)=default
Constructs a SYCL queue with an optional async_handler from an OpenCL cl_command_queue.
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1209
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1130
void wait_and_throw(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:449
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2146
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1301
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1090
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with a single dependency.
Definition: queue.hpp:2613
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2392
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1278
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
Definition: queue.hpp:2512
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:2453
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:2071
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2207
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1351
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:381
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
Definition: queue.hpp:1171
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:2246
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:2370
bool operator!=(const queue &RHS) const
Definition: queue.hpp:293
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:40
void defaultAsyncHandler(exception_list Exceptions)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:48
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
void submit(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc=sycl::detail::code_location::current())
@ modifiable
In modifiable state, commands can be added to graph.
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
constexpr device_has_key::value_t< Aspects... > device_has
Definition: properties.hpp:152
static constexpr auto get_property()
properties< std::tuple<> > empty_properties_t
Definition: properties.hpp:234
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
pointer get() const
Definition: multi_ptr.hpp:544
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:136
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
Definition: backend.hpp:87
class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:93
Definition: access.hpp:18
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:897
uintptr_t pi_native_handle
Definition: pi.h:243
_pi_mem_advice
Definition: pi.h:641
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:66
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Predicate returning true if all template type parameters except the last one are reductions.
static constexpr code_location current(const char *fileName=__CODELOC_FILE_NAME, const char *funcName=__CODELOC_FUNCTION, unsigned long lineNo=__CODELOC_LINE, unsigned long columnNo=__CODELOC_COLUMN) noexcept
Definition: common.hpp:68
A struct to describe the properties of an image.