DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/export.hpp>
18 #include <sycl/device.hpp>
19 #include <sycl/device_selector.hpp>
20 #include <sycl/event.hpp>
21 #include <sycl/exception_list.hpp>
24 #include <sycl/handler.hpp>
25 #include <sycl/info/info_desc.hpp>
26 #include <sycl/property_list.hpp>
27 #include <sycl/stl.hpp>
28 
29 // Explicitly request format macros
30 #ifndef __STDC_FORMAT_MACROS
31 #define __STDC_FORMAT_MACROS 1
32 #endif
33 #include <cinttypes>
34 #include <type_traits>
35 #include <utility>
36 
37 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
38 // Here we simplify the KernelFunc param is simplified to be
39 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
40 // these macros are #undef immediately.
41 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
42 // or const KernelType &KernelFunc
43 #ifdef __SYCL_NONCONST_FUNCTOR__
44 #define _KERNELFUNCPARAM(a) KernelType a
45 #else
46 #define _KERNELFUNCPARAM(a) const KernelType &a
47 #endif
48 
49 // Helper macro to identify if fallback assert is needed
50 // FIXME remove __NVPTX__ condition once devicelib supports CUDA
51 #if defined(SYCL_FALLBACK_ASSERT)
52 #define __SYCL_USE_FALLBACK_ASSERT SYCL_FALLBACK_ASSERT
53 #else
54 #define __SYCL_USE_FALLBACK_ASSERT 0
55 #endif
56 
57 namespace sycl {
59 
60 // Forward declaration
61 class context;
62 class device;
63 class queue;
64 
65 template <backend BackendName, class SyclObjectT>
66 auto get_native(const SyclObjectT &Obj)
67  -> backend_return_t<BackendName, SyclObjectT>;
68 
69 namespace detail {
70 class queue_impl;
71 
72 #if __SYCL_USE_FALLBACK_ASSERT
73 static event submitAssertCapture(queue &, event &, queue *,
74  const detail::code_location &);
75 #endif
76 } // namespace detail
77 
89 class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
90 public:
95  explicit queue(const property_list &PropList = {})
96  : queue(default_selector(), detail::defaultAsyncHandler, PropList) {}
97 
103  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
104  : queue(default_selector(), AsyncHandler, PropList) {}
105 
112  template <typename DeviceSelector,
113  typename =
114  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
115  explicit queue(const DeviceSelector &deviceSelector,
116  const async_handler &AsyncHandler,
117  const property_list &PropList = {})
118  : queue(detail::select_device(deviceSelector), AsyncHandler, PropList) {}
119 
125  template <typename DeviceSelector,
126  typename =
127  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
128  explicit queue(const DeviceSelector &deviceSelector,
129  const property_list &PropList = {})
130  : queue(detail::select_device(deviceSelector),
131  detail::defaultAsyncHandler, PropList) {}
132 
139  template <typename DeviceSelector,
140  typename =
141  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
142  explicit queue(const context &syclContext,
143  const DeviceSelector &deviceSelector,
144  const property_list &propList = {})
145  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
146  propList) {}
147 
155  template <typename DeviceSelector,
156  typename =
157  detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
158  explicit queue(const context &syclContext,
159  const DeviceSelector &deviceSelector,
160  const async_handler &AsyncHandler,
161  const property_list &propList = {})
162  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
163  AsyncHandler, propList) {}
164 
170  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
171  "use SYCL 2020 device selectors instead.")
172  queue(const device_selector &DeviceSelector,
173  const property_list &PropList = {})
174  : queue(DeviceSelector.select_device(), detail::defaultAsyncHandler,
175  PropList) {}
176 
183  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
184  "use SYCL 2020 device selectors instead.")
185  queue(const device_selector &DeviceSelector,
186  const async_handler &AsyncHandler, const property_list &PropList = {})
187  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
188 
193  explicit queue(const device &SyclDevice, const property_list &PropList = {})
194  : queue(SyclDevice, detail::defaultAsyncHandler, PropList) {}
195 
202  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
203  const property_list &PropList = {});
204 
211  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
212  "use SYCL 2020 device selectors instead.")
213  queue(const context &SyclContext, const device_selector &DeviceSelector,
214  const property_list &PropList = {});
215 
224  __SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please "
225  "use SYCL 2020 device selectors instead.")
226  queue(const context &SyclContext, const device_selector &DeviceSelector,
227  const async_handler &AsyncHandler, const property_list &PropList = {});
228 
235  queue(const context &SyclContext, const device &SyclDevice,
236  const property_list &PropList = {});
237 
245  queue(const context &SyclContext, const device &SyclDevice,
246  const async_handler &AsyncHandler, const property_list &PropList = {});
247 
256 #ifdef __SYCL_INTERNAL_API
257  queue(cl_command_queue ClQueue, const context &SyclContext,
258  const async_handler &AsyncHandler = {});
259 #endif
260 
261  queue(const queue &RHS) = default;
262 
263  queue(queue &&RHS) = default;
264 
265  queue &operator=(const queue &RHS) = default;
266 
267  queue &operator=(queue &&RHS) = default;
268 
269  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
270 
271  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
272 
275 #ifdef __SYCL_INTERNAL_API
276  cl_command_queue get() const;
277 #endif
278 
280  context get_context() const;
281 
283  device get_device() const;
284 
287  "is_host() is deprecated as the host device is no longer supported.")
288  bool is_host() const;
289 
293  template <typename Param>
294  typename detail::is_queue_info_desc<Param>::return_type get_info() const;
295 
296 private:
297  // A shorthand for `get_device().has()' which is expected to be a bit quicker
298  // than the long version
299  bool device_has(aspect Aspect) const;
300 
301 public:
308  template <typename T> event submit(T CGF _CODELOCPARAM(&CodeLoc)) {
309  _CODELOCARG(&CodeLoc);
310  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
311 #if __SYCL_USE_FALLBACK_ASSERT
312  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
313  event &E) {
314  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
315  KernelUsesAssert && !device_has(aspect::accelerator)) {
316  // __devicelib_assert_fail isn't supported by Device-side Runtime
317  // Linking against fallback impl of __devicelib_assert_fail is
318  // performed by program manager class
319  // Fallback assert isn't supported for FPGA
320  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc);
321  }
322  };
323 
324  auto Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
325  return discard_or_return(Event);
326 #else
327  auto Event = submit_impl(CGF, CodeLoc);
328  return discard_or_return(Event);
329 #endif // __SYCL_USE_FALLBACK_ASSERT
330  }
331 
343  template <typename T>
344  event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc)) {
345  _CODELOCARG(&CodeLoc);
346  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
347 #if __SYCL_USE_FALLBACK_ASSERT
348  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
349  bool IsKernel, bool KernelUsesAssert, event &E) {
350  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
351  KernelUsesAssert && !device_has(aspect::accelerator)) {
352  // Only secondary queues on devices need to be added to the assert
353  // capture.
354  // __devicelib_assert_fail isn't supported by Device-side Runtime
355  // Linking against fallback impl of __devicelib_assert_fail is
356  // performed by program manager class
357  // Fallback assert isn't supported for FPGA
358  submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc);
359  }
360  };
361 
362  auto Event =
363  submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, PostProcess);
364  return discard_or_return(Event);
365 #else
366  auto Event = submit_impl(CGF, SecondaryQueue, CodeLoc);
367  return discard_or_return(Event);
368 #endif // __SYCL_USE_FALLBACK_ASSERT
369  }
370 
379  return submit(
380  [=](handler &CGH) { CGH.ext_oneapi_barrier(); } _CODELOCFW(CodeLoc));
381  }
382 
390  __SYCL2020_DEPRECATED("use 'ext_oneapi_submit_barrier' instead")
391  event submit_barrier(_CODELOCONLYPARAM(&CodeLoc)) {
392  _CODELOCARG(&CodeLoc);
393  return ext_oneapi_submit_barrier(CodeLoc);
394  }
395 
406  const std::vector<event> &WaitList _CODELOCPARAM(&CodeLoc)) {
407  return submit([=](handler &CGH) {
408  CGH.ext_oneapi_barrier(WaitList);
409  } _CODELOCFW(CodeLoc));
410  }
411 
421  __SYCL2020_DEPRECATED("use 'ext_oneapi_submit_barrier' instead")
422  event
423  submit_barrier(const std::vector<event> &WaitList _CODELOCPARAM(&CodeLoc)) {
424  _CODELOCARG(&CodeLoc);
425  return ext_oneapi_submit_barrier(WaitList, CodeLoc);
426  }
427 
433  void wait(_CODELOCONLYPARAM(&CodeLoc)) {
434  _CODELOCARG(&CodeLoc);
435  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
436  wait_proxy(CodeLoc);
437  }
438 
448  _CODELOCARG(&CodeLoc);
449  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
450  wait_and_throw_proxy(CodeLoc);
451  }
452 
455  void wait_proxy(const detail::code_location &CodeLoc);
458  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
459 
465  void throw_asynchronous();
466 
469  template <typename PropertyT> bool has_property() const noexcept;
470 
474  template <typename PropertyT> PropertyT get_property() const;
475 
483  template <typename T> event fill(void *Ptr, const T &Pattern, size_t Count) {
484  // TODO: to add code location as parameter when ABI break is permitted
485  const detail::code_location CodeLoc("sycl/queue.hpp", "fill", 0, 0);
486  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); },
487  CodeLoc);
488  }
489 
498  template <typename T>
499  event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent) {
500  return submit([&](handler &CGH) {
501  CGH.depends_on(DepEvent);
502  CGH.fill<T>(Ptr, Pattern, Count);
503  });
504  }
505 
515  template <typename T>
516  event fill(void *Ptr, const T &Pattern, size_t Count,
517  const std::vector<event> &DepEvents) {
518  return submit([&](handler &CGH) {
519  CGH.depends_on(DepEvents);
520  CGH.fill<T>(Ptr, Pattern, Count);
521  });
522  }
523 
533  event memset(void *Ptr, int Value, size_t Count);
534 
545  event memset(void *Ptr, int Value, size_t Count, event DepEvent);
546 
558  event memset(void *Ptr, int Value, size_t Count,
559  const std::vector<event> &DepEvents);
560 
572  event memcpy(void *Dest, const void *Src, size_t Count);
573 
586  event memcpy(void *Dest, const void *Src, size_t Count, event DepEvent);
587 
601  event memcpy(void *Dest, const void *Src, size_t Count,
602  const std::vector<event> &DepEvents);
603 
616  template <typename T>
617  event copy(const T *Src, T *Dest, size_t Count _CODELOCPARAM(&CodeLoc)) {
618  _CODELOCARG(&CodeLoc);
619  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
620  return this->memcpy(Dest, Src, Count * sizeof(T));
621  }
622 
636  template <typename T>
637  event copy(const T *Src, T *Dest, size_t Count,
638  event DepEvent _CODELOCPARAM(&CodeLoc)) {
639  _CODELOCARG(&CodeLoc);
640  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
641  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
642  }
643 
657  template <typename T>
658  event copy(const T *Src, T *Dest, size_t Count,
659  const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
660  _CODELOCARG(&CodeLoc);
661  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
662  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
663  }
664 
672  __SYCL2020_DEPRECATED("use the overload with int Advice instead")
673  event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice);
674 
682  event mem_advise(const void *Ptr, size_t Length, int Advice);
683 
692  event mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent);
693 
703  event mem_advise(const void *Ptr, size_t Length, int Advice,
704  const std::vector<event> &DepEvents);
705 
713  event prefetch(const void *Ptr, size_t Count) {
714  // TODO: to add code location as parameter when ABI break is permitted
715  const detail::code_location CodeLoc("sycl/queue.hpp", "prefetch", 0, 0);
716  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc);
717  }
718 
727  event prefetch(const void *Ptr, size_t Count, event DepEvent) {
728  return submit([=](handler &CGH) {
729  CGH.depends_on(DepEvent);
730  CGH.prefetch(Ptr, Count);
731  });
732  }
733 
743  event prefetch(const void *Ptr, size_t Count,
744  const std::vector<event> &DepEvents) {
745  return submit([=](handler &CGH) {
746  CGH.depends_on(DepEvents);
747  CGH.prefetch(Ptr, Count);
748  });
749  }
750 
769  template <typename T = unsigned char,
770  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
771  event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
772  size_t SrcPitch, size_t Width,
773  size_t Height _CODELOCPARAM(&CodeLoc)) {
774  return submit([=](handler &CGH) {
775  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width, Height);
776  } _CODELOCFW(CodeLoc));
777  }
778 
798  template <typename T = unsigned char,
799  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
800  event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
801  size_t SrcPitch, size_t Width, size_t Height,
802  event DepEvent _CODELOCPARAM(&CodeLoc)) {
803  return submit([=](handler &CGH) {
804  CGH.depends_on(DepEvent);
805  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width, Height);
806  } _CODELOCFW(CodeLoc));
807  }
808 
829  template <typename T = unsigned char,
830  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
831  event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
832  size_t SrcPitch, size_t Width, size_t Height,
833  const std::vector<event> &DepEvents
834  _CODELOCPARAM(&CodeLoc)) {
835  return submit([=](handler &CGH) {
836  CGH.depends_on(DepEvents);
837  CGH.ext_oneapi_memcpy2d<T>(Dest, DestPitch, Src, SrcPitch, Width, Height);
838  } _CODELOCFW(CodeLoc));
839  }
840 
856  template <typename T>
857  event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest,
858  size_t DestPitch, size_t Width,
859  size_t Height _CODELOCPARAM(&CodeLoc)) {
860  return submit([=](handler &CGH) {
861  CGH.ext_oneapi_copy2d<T>(Src, SrcPitch, Dest, DestPitch, Width, Height);
862  } _CODELOCFW(CodeLoc));
863  }
864 
881  template <typename T>
882  event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest,
883  size_t DestPitch, size_t Width, size_t Height,
884  event DepEvent _CODELOCPARAM(&CodeLoc)) {
885  return submit([=](handler &CGH) {
886  CGH.depends_on(DepEvent);
887  CGH.ext_oneapi_copy2d<T>(Src, SrcPitch, Dest, DestPitch, Width, Height);
888  } _CODELOCFW(CodeLoc));
889  }
890 
908  template <typename T>
909  event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest,
910  size_t DestPitch, size_t Width, size_t Height,
911  const std::vector<event> &DepEvents
912  _CODELOCPARAM(&CodeLoc)) {
913  return submit([=](handler &CGH) {
914  CGH.depends_on(DepEvents);
915  CGH.ext_oneapi_copy2d<T>(Src, SrcPitch, Dest, DestPitch, Width, Height);
916  } _CODELOCFW(CodeLoc));
917  }
918 
935  template <typename T = unsigned char,
936  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
937  event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value,
938  size_t Width,
939  size_t Height _CODELOCPARAM(&CodeLoc)) {
940  return submit([=](handler &CGH) {
941  CGH.ext_oneapi_memset2d<T>(Dest, DestPitch, Value, Width, Height);
942  } _CODELOCFW(CodeLoc));
943  }
944 
962  template <typename T = unsigned char,
963  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
964  event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value,
965  size_t Width, size_t Height,
966  event DepEvent _CODELOCPARAM(&CodeLoc)) {
967  return submit([=](handler &CGH) {
968  CGH.depends_on(DepEvent);
969  CGH.ext_oneapi_memset2d<T>(Dest, DestPitch, Value, Width, Height);
970  } _CODELOCFW(CodeLoc));
971  }
972 
991  template <typename T = unsigned char,
992  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
994  void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height,
995  const std::vector<event> &DepEvents _CODELOCPARAM(&CodeLoc)) {
996  return submit([=](handler &CGH) {
997  CGH.depends_on(DepEvents);
998  CGH.ext_oneapi_memset2d<T>(Dest, DestPitch, Value, Width, Height);
999  } _CODELOCFW(CodeLoc));
1000  }
1001 
1015  template <typename T>
1016  event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern,
1017  size_t Width, size_t Height _CODELOCPARAM(&CodeLoc)) {
1018  return submit([=](handler &CGH) {
1019  CGH.ext_oneapi_fill2d<T>(Dest, DestPitch, Pattern, Width, Height);
1020  } _CODELOCFW(CodeLoc));
1021  }
1022 
1037  template <typename T>
1038  event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern,
1039  size_t Width, size_t Height,
1040  event DepEvent _CODELOCPARAM(&CodeLoc)) {
1041  return submit([=](handler &CGH) {
1042  CGH.depends_on(DepEvent);
1043  CGH.ext_oneapi_fill2d<T>(Dest, DestPitch, Pattern, Width, Height);
1044  } _CODELOCFW(CodeLoc));
1045  }
1046 
1062  template <typename T>
1063  event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern,
1064  size_t Width, size_t Height,
1065  const std::vector<event> &DepEvents
1066  _CODELOCPARAM(&CodeLoc)) {
1067  return submit([=](handler &CGH) {
1068  CGH.depends_on(DepEvents);
1069  CGH.ext_oneapi_fill2d<T>(Dest, DestPitch, Pattern, Width, Height);
1070  } _CODELOCFW(CodeLoc));
1071  }
1072 
1085  template <typename T, typename PropertyListT>
1087  const void *Src, size_t NumBytes, size_t Offset,
1088  const std::vector<event> &DepEvents) {
1089  if (sizeof(T) < Offset + NumBytes)
1090  throw sycl::exception(make_error_code(errc::invalid),
1091  "Copy to device_global is out of bounds.");
1092 
1093  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1095  return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1096  Offset, DepEvents);
1097  }
1098 
1111  template <typename T, typename PropertyListT>
1113  const void *Src, size_t NumBytes, size_t Offset,
1114  event DepEvent) {
1115  return this->memcpy(Dest, Src, NumBytes, Offset,
1116  std::vector<event>{DepEvent});
1117  }
1118 
1129  template <typename T, typename PropertyListT>
1131  const void *Src, size_t NumBytes = sizeof(T),
1132  size_t Offset = 0) {
1133  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1134  }
1135 
1148  template <typename T, typename PropertyListT>
1149  event
1150  memcpy(void *Dest,
1152  size_t NumBytes, size_t Offset, const std::vector<event> &DepEvents) {
1153  if (sizeof(T) < Offset + NumBytes)
1154  throw sycl::exception(make_error_code(errc::invalid),
1155  "Copy from device_global is out of bounds.");
1156 
1157  constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
1159  return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1160  Offset, DepEvents);
1161  }
1162 
1175  template <typename T, typename PropertyListT>
1176  event
1177  memcpy(void *Dest,
1179  size_t NumBytes, size_t Offset, event DepEvent) {
1180  return this->memcpy(Dest, Src, NumBytes, Offset,
1181  std::vector<event>{DepEvent});
1182  }
1183 
1194  template <typename T, typename PropertyListT>
1195  event
1196  memcpy(void *Dest,
1198  size_t NumBytes = sizeof(T), size_t Offset = 0) {
1199  return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1200  }
1201 
1215  template <typename T, typename PropertyListT>
1216  event copy(const std::remove_all_extents_t<T> *Src,
1218  size_t Count, size_t StartIndex,
1219  const std::vector<event> &DepEvents) {
1220  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1221  StartIndex * sizeof(std::remove_all_extents_t<T>),
1222  DepEvents);
1223  }
1224 
1238  template <typename T, typename PropertyListT>
1239  event copy(const std::remove_all_extents_t<T> *Src,
1241  size_t Count, size_t StartIndex, event DepEvent) {
1242  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1243  StartIndex * sizeof(std::remove_all_extents_t<T>),
1244  DepEvent);
1245  }
1246 
1258  template <typename T, typename PropertyListT>
1259  event copy(const std::remove_all_extents_t<T> *Src,
1261  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1262  size_t StartIndex = 0) {
1263  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1264  StartIndex * sizeof(std::remove_all_extents_t<T>));
1265  }
1266 
1280  template <typename T, typename PropertyListT>
1281  event
1283  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1284  const std::vector<event> &DepEvents) {
1285  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1286  StartIndex * sizeof(std::remove_all_extents_t<T>),
1287  DepEvents);
1288  }
1289 
1303  template <typename T, typename PropertyListT>
1304  event
1306  std::remove_all_extents_t<T> *Dest, size_t Count, size_t StartIndex,
1307  event DepEvent) {
1308  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1309  StartIndex * sizeof(std::remove_all_extents_t<T>),
1310  DepEvent);
1311  }
1312 
1324  template <typename T, typename PropertyListT>
1325  event
1327  std::remove_all_extents_t<T> *Dest,
1328  size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
1329  size_t StartIndex = 0) {
1330  return this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
1331  StartIndex * sizeof(std::remove_all_extents_t<T>));
1332  }
1333 
1339  template <typename KernelName = detail::auto_name, typename KernelType,
1340  typename PropertiesT>
1343  single_task(PropertiesT Properties,
1345  static_assert(
1347  void()>::value ||
1349  void(kernel_handler)>::value),
1350  "sycl::queue.single_task() requires a kernel instead of command group. "
1351  "Use queue.submit() instead");
1352  _CODELOCARG(&CodeLoc);
1353  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1354  return submit(
1355  [&](handler &CGH) {
1356  CGH.template single_task<KernelName, KernelType, PropertiesT>(
1357  Properties, KernelFunc);
1358  },
1359  CodeLoc);
1360  }
1361 
1366  template <typename KernelName = detail::auto_name, typename KernelType>
1368  return single_task<KernelName, KernelType>(
1370  KernelFunc _CODELOCFW(CodeLoc));
1371  }
1372 
1379  template <typename KernelName = detail::auto_name, typename KernelType,
1380  typename PropertiesT>
1383  single_task(event DepEvent, PropertiesT Properties,
1385  static_assert(
1387  void()>::value ||
1389  void(kernel_handler)>::value),
1390  "sycl::queue.single_task() requires a kernel instead of command group. "
1391  "Use queue.submit() instead");
1392  _CODELOCARG(&CodeLoc);
1393  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1394  return submit(
1395  [&](handler &CGH) {
1396  CGH.depends_on(DepEvent);
1397  CGH.template single_task<KernelName, KernelType, PropertiesT>(
1398  Properties, KernelFunc);
1399  },
1400  CodeLoc);
1401  }
1402 
1408  template <typename KernelName = detail::auto_name, typename KernelType>
1409  event single_task(event DepEvent,
1411  return single_task<KernelName, KernelType>(
1413  KernelFunc _CODELOCFW(CodeLoc));
1414  }
1415 
1423  template <typename KernelName = detail::auto_name, typename KernelType,
1424  typename PropertiesT>
1427  single_task(const std::vector<event> &DepEvents, PropertiesT Properties,
1429  static_assert(
1431  void()>::value ||
1433  void(kernel_handler)>::value),
1434  "sycl::queue.single_task() requires a kernel instead of command group. "
1435  "Use queue.submit() instead");
1436  _CODELOCARG(&CodeLoc);
1437  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1438  return submit(
1439  [&](handler &CGH) {
1440  CGH.depends_on(DepEvents);
1441  CGH.template single_task<KernelName, KernelType, PropertiesT>(
1442  Properties, KernelFunc);
1443  },
1444  CodeLoc);
1445  }
1446 
1453  template <typename KernelName = detail::auto_name, typename KernelType>
1454  event single_task(const std::vector<event> &DepEvents,
1456  return single_task<KernelName, KernelType>(
1458  KernelFunc _CODELOCFW(CodeLoc));
1459  }
1460 
1467  template <typename KernelName = detail::auto_name, typename... RestT>
1468  event parallel_for(range<1> Range, RestT &&...Rest) {
1469  return parallel_for_impl<KernelName>(Range, Rest...);
1470  }
1471 
1478  template <typename KernelName = detail::auto_name, typename... RestT>
1479  event parallel_for(range<2> Range, RestT &&...Rest) {
1480  return parallel_for_impl<KernelName>(Range, Rest...);
1481  }
1482 
1489  template <typename KernelName = detail::auto_name, typename... RestT>
1490  event parallel_for(range<3> Range, RestT &&...Rest) {
1491  return parallel_for_impl<KernelName>(Range, Rest...);
1492  }
1493 
1501  template <typename KernelName = detail::auto_name, typename... RestT>
1502  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
1503  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
1504  }
1505 
1513  template <typename KernelName = detail::auto_name, typename... RestT>
1514  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
1515  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
1516  }
1517 
1525  template <typename KernelName = detail::auto_name, typename... RestT>
1526  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
1527  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
1528  }
1529 
1538  template <typename KernelName = detail::auto_name, typename... RestT>
1539  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
1540  RestT &&...Rest) {
1541  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
1542  }
1543 
1552  template <typename KernelName = detail::auto_name, typename... RestT>
1553  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
1554  RestT &&...Rest) {
1555  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
1556  }
1557 
1566  template <typename KernelName = detail::auto_name, typename... RestT>
1567  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
1568  RestT &&...Rest) {
1569  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
1570  }
1571 
1572  // While other shortcuts with offsets are able to go through parallel_for(...,
1573  // RestT &&...Rest), those that accept dependency events vector have to be
1574  // overloaded to allow implicit construction from an init-list.
1582  template <typename KernelName = detail::auto_name, typename KernelType,
1583  int Dim>
1584  event parallel_for(range<Dim> Range, id<Dim> WorkItemOffset,
1585  const std::vector<event> &DepEvents,
1587  static_assert(1 <= Dim && Dim <= 3, "Invalid number of dimensions");
1588  return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
1589  KernelFunc);
1590  }
1591 
1599  template <typename KernelName = detail::auto_name, typename KernelType,
1600  int Dims>
1601  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
1602  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
1604  // Actual code location needs to be captured from KernelInfo object.
1605  const detail::code_location CodeLoc = {};
1606  return submit(
1607  [&](handler &CGH) {
1608  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
1609  KernelFunc);
1610  },
1611  CodeLoc);
1612  }
1613 
1622  template <typename KernelName = detail::auto_name, typename KernelType,
1623  int Dims>
1624  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
1625  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
1626  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
1627  // Actual code location needs to be captured from KernelInfo object.
1628  const detail::code_location CodeLoc = {};
1629  return submit(
1630  [&](handler &CGH) {
1631  CGH.depends_on(DepEvent);
1632  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
1633  KernelFunc);
1634  },
1635  CodeLoc);
1636  }
1637 
1647  template <typename KernelName = detail::auto_name, typename KernelType,
1648  int Dims>
1649  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
1650  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
1651  const std::vector<event> &DepEvents,
1653  // Actual code location needs to be captured from KernelInfo object.
1654  const detail::code_location CodeLoc = {};
1655  return submit(
1656  [&](handler &CGH) {
1657  CGH.depends_on(DepEvents);
1658  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
1659  KernelFunc);
1660  },
1661  CodeLoc);
1662  }
1663 
1671  template <typename KernelName = detail::auto_name, int Dims,
1672  typename PropertiesT, typename... RestT>
1674  detail::AreAllButLastReductions<RestT...>::value &&
1676  event>
1677  parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
1678  using KI = sycl::detail::KernelInfo<KernelName>;
1679  constexpr detail::code_location CodeLoc(
1680  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
1681  KI::getColumnNumber());
1682  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1683  return submit(
1684  [&](handler &CGH) {
1685  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
1686  },
1687  CodeLoc);
1688  }
1689 
1696  template <typename KernelName = detail::auto_name, int Dims,
1697  typename... RestT>
1698  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
1699  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
1700  return parallel_for<KernelName>(
1702  Rest...);
1703  }
1704 
1712  template <typename KernelName = detail::auto_name, int Dims,
1713  typename... RestT>
1714  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
1715  using KI = sycl::detail::KernelInfo<KernelName>;
1716  constexpr detail::code_location CodeLoc(
1717  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
1718  KI::getColumnNumber());
1719  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1720  return submit(
1721  [&](handler &CGH) {
1722  CGH.depends_on(DepEvent);
1723  CGH.template parallel_for<KernelName>(Range, Rest...);
1724  },
1725  CodeLoc);
1726  }
1727 
1736  template <typename KernelName = detail::auto_name, int Dims,
1737  typename... RestT>
1738  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
1739  RestT &&...Rest) {
1740  using KI = sycl::detail::KernelInfo<KernelName>;
1741  constexpr detail::code_location CodeLoc(
1742  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
1743  KI::getColumnNumber());
1744  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1745  return submit(
1746  [&](handler &CGH) {
1747  CGH.depends_on(DepEvents);
1748  CGH.template parallel_for<KernelName>(Range, Rest...);
1749  },
1750  CodeLoc);
1751  }
1752 
1759  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
1760  access::placeholder IsPlaceholder, typename DestT>
1762  std::shared_ptr<DestT> Dest _CODELOCPARAM(&CodeLoc)) {
1763  return submit([&](handler &CGH) {
1764  CGH.require(Src);
1765  CGH.copy(Src, Dest);
1766  } _CODELOCFW(CodeLoc));
1767  }
1768 
1775  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
1777  event copy(std::shared_ptr<SrcT> Src,
1779  _CODELOCPARAM(&CodeLoc)) {
1780  return submit([&](handler &CGH) {
1781  CGH.require(Dest);
1782  CGH.copy(Src, Dest);
1783  } _CODELOCFW(CodeLoc));
1784  }
1785 
1792  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
1793  access::placeholder IsPlaceholder, typename DestT>
1795  DestT *Dest _CODELOCPARAM(&CodeLoc)) {
1796  return submit([&](handler &CGH) {
1797  CGH.require(Src);
1798  CGH.copy(Src, Dest);
1799  } _CODELOCFW(CodeLoc));
1800  }
1801 
1808  template <typename SrcT, typename DestT, int DestDims, access_mode DestMode,
1810  event copy(const SrcT *Src,
1812  _CODELOCPARAM(&CodeLoc)) {
1813  return submit([&](handler &CGH) {
1814  CGH.require(Dest);
1815  CGH.copy(Src, Dest);
1816  } _CODELOCFW(CodeLoc));
1817  }
1818 
1825  template <typename SrcT, int SrcDims, access_mode SrcMode, target SrcTgt,
1826  access::placeholder IsSrcPlaceholder, typename DestT, int DestDims,
1827  access_mode DestMode, target DestTgt,
1828  access::placeholder IsDestPlaceholder>
1829  event
1832  _CODELOCPARAM(&CodeLoc)) {
1833  return submit([&](handler &CGH) {
1834  CGH.require(Src);
1835  CGH.require(Dest);
1836  CGH.copy(Src, Dest);
1837  } _CODELOCFW(CodeLoc));
1838  }
1839 
1845  template <typename T, int Dims, access_mode Mode, target Tgt,
1849  return submit([&](handler &CGH) {
1850  CGH.require(Acc);
1851  CGH.update_host(Acc);
1852  } _CODELOCFW(CodeLoc));
1853  }
1854 
1861  template <typename T, int Dims, access_mode Mode, target Tgt,
1864  const T &Src _CODELOCPARAM(&CodeLoc)) {
1865  return submit([&](handler &CGH) {
1866  CGH.require(Dest);
1867  CGH.fill<T>(Dest, Src);
1868  } _CODELOCFW(CodeLoc));
1869  }
1870 
1877  bool ext_codeplay_supports_fusion() const;
1878 
1879 // Clean KERNELFUNC macros.
1880 #undef _KERNELFUNCPARAM
1881 
1885  bool is_in_order() const;
1886 
1890  backend get_backend() const noexcept;
1891 
1896  bool ext_oneapi_empty() const;
1897 
1898 private:
1899  pi_native_handle getNative() const;
1900 
1901  std::shared_ptr<detail::queue_impl> impl;
1902  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
1903 
1904  template <class Obj>
1905  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
1906  template <class T>
1907  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
1908 
1909  template <backend BackendName, class SyclObjectT>
1910  friend auto get_native(const SyclObjectT &Obj)
1911  -> backend_return_t<BackendName, SyclObjectT>;
1912 
1913 #if __SYCL_USE_FALLBACK_ASSERT
1914  friend event detail::submitAssertCapture(queue &, event &, queue *,
1915  const detail::code_location &);
1916 #endif
1917 
1919  event submit_impl(std::function<void(handler &)> CGH,
1920  const detail::code_location &CodeLoc);
1922  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
1923  const detail::code_location &CodeLoc);
1924 
1927  event discard_or_return(const event &Event);
1928 
1929  // Function to postprocess submitted command
1930  // Arguments:
1931  // bool IsKernel - true if the submitted command was kernel, false otherwise
1932  // bool KernelUsesAssert - true if submitted kernel uses assert, only
1933  // meaningful when IsKernel is true
1934  // event &Event - event after which post processing should be executed
1935  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
1936 
1942  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
1943  const detail::code_location &CodeLoc,
1944  const SubmitPostProcessF &PostProcess);
1951  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
1952  queue secondQueue,
1953  const detail::code_location &CodeLoc,
1954  const SubmitPostProcessF &PostProcess);
1955 
1962  template <typename KernelName, int Dims, typename PropertiesT,
1963  typename... RestT>
1965  detail::AreAllButLastReductions<RestT...>::value &&
1966  ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1967  event>
1968  parallel_for_impl(range<Dims> Range, PropertiesT Properties,
1969  RestT &&...Rest) {
1970  using KI = sycl::detail::KernelInfo<KernelName>;
1971  constexpr detail::code_location CodeLoc(
1972  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
1973  KI::getColumnNumber());
1974  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
1975  return submit(
1976  [&](handler &CGH) {
1977  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
1978  },
1979  CodeLoc);
1980  }
1981 
1987  template <typename KernelName, int Dims, typename... RestT>
1988  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
1989  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
1990  return parallel_for_impl<KernelName>(
1992  Rest...);
1993  }
1994 
2002  template <typename KernelName, int Dims, typename PropertiesT,
2003  typename... RestT>
2005  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2006  parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2007  RestT &&...Rest) {
2008  using KI = sycl::detail::KernelInfo<KernelName>;
2009  constexpr detail::code_location CodeLoc(
2010  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2011  KI::getColumnNumber());
2012  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2013  return submit(
2014  [&](handler &CGH) {
2015  CGH.depends_on(DepEvent);
2016  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2017  },
2018  CodeLoc);
2019  }
2020 
2027  template <typename KernelName, int Dims, typename... RestT>
2028  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2029  return parallel_for_impl<KernelName>(
2030  Range, DepEvent,
2032  }
2033 
2042  template <typename KernelName, int Dims, typename PropertiesT,
2043  typename... RestT>
2045  ext::oneapi::experimental::is_property_list<PropertiesT>::value, event>
2046  parallel_for_impl(range<Dims> Range, const std::vector<event> &DepEvents,
2047  PropertiesT Properties, RestT &&...Rest) {
2048  using KI = sycl::detail::KernelInfo<KernelName>;
2049  constexpr detail::code_location CodeLoc(
2050  KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2051  KI::getColumnNumber());
2052  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2053  return submit(
2054  [&](handler &CGH) {
2055  CGH.depends_on(DepEvents);
2056  CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2057  },
2058  CodeLoc);
2059  }
2060 
2068  template <typename KernelName, int Dims, typename... RestT>
2069  event parallel_for_impl(range<Dims> Range,
2070  const std::vector<event> &DepEvents,
2071  RestT &&...Rest) {
2072  return parallel_for_impl<KernelName>(
2073  Range, DepEvents,
2075  }
2076 
2077  buffer<detail::AssertHappened, 1> &getAssertHappenedBuffer();
2078 
2079  event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
2080  bool IsDeviceImageScope, size_t NumBytes,
2081  size_t Offset,
2082  const std::vector<event> &DepEvents);
2083  event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
2084  bool IsDeviceImageScope, size_t NumBytes,
2085  size_t Offset,
2086  const std::vector<event> &DepEvents);
2087 };
2088 
2089 namespace detail {
2090 #if __SYCL_USE_FALLBACK_ASSERT
2091 #define __SYCL_ASSERT_START 1
2092 
2103 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
2104  const detail::code_location &CodeLoc) {
2105  using AHBufT = buffer<detail::AssertHappened, 1>;
2106 
2107  AHBufT &Buffer = Self.getAssertHappenedBuffer();
2108 
2109  event CopierEv, CheckerEv, PostCheckerEv;
2110  auto CopierCGF = [&](handler &CGH) {
2111  CGH.depends_on(Event);
2112 
2113  auto Acc = Buffer.get_access<access::mode::write>(CGH);
2114 
2115  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
2116 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2117  __devicelib_assert_read(&Acc[0]);
2118 #else
2119  (void)Acc;
2120 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2121  });
2122  };
2123  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
2124  CGH.depends_on(CopierEv);
2125  using mode = access::mode;
2126  using target = access::target;
2127 
2128  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
2129 
2130  CGH.host_task([=] {
2131  const detail::AssertHappened *AH = &Acc[0];
2132 
2133  // Don't use assert here as msvc will insert reference to __imp__wassert
2134  // which won't be properly resolved in separate compile use-case
2135 #ifndef NDEBUG
2136  if (AH->Flag == __SYCL_ASSERT_START)
2137  throw sycl::runtime_error(
2138  "Internal Error. Invalid value in assert description.",
2139  PI_ERROR_INVALID_VALUE);
2140 #endif
2141 
2142  if (AH->Flag) {
2143  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
2144  const char *File = AH->File[0] ? AH->File : "<unknown file>";
2145  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
2146 
2147  fprintf(stderr,
2148  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
2149  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
2150  "Assertion `%s` failed.\n",
2151  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
2152  AH->LID1, AH->LID2, Expr);
2153  fflush(stderr);
2154  abort(); // no need to release memory as it's abort anyway
2155  }
2156  });
2157  };
2158 
2159  if (SecondaryQueue) {
2160  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
2161  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
2162  } else {
2163  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
2164  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
2165  }
2166 
2167  return CheckerEv;
2168 }
2169 #undef __SYCL_ASSERT_START
2170 #endif // __SYCL_USE_FALLBACK_ASSERT
2171 } // namespace detail
2172 
2173 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
2174 } // namespace sycl
2175 
2176 namespace std {
2177 template <> struct hash<sycl::queue> {
2178  size_t operator()(const sycl::queue &Q) const {
2179  return std::hash<std::shared_ptr<sycl::detail::queue_impl>>()(
2181  }
2182 };
2183 } // namespace std
2184 
2185 #undef __SYCL_USE_FALLBACK_ASSERT
sycl::_V1::handler::copy
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2160
sycl::_V1::IsPlaceholder
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
Definition: accessor.hpp:2855
sycl::_V1::queue::ext_oneapi_memcpy2d
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height _CODELOCPARAM(&CodeLoc))
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:771
sycl::_V1::queue::prefetch
event prefetch(const void *Ptr, size_t Count, event DepEvent)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:727
sycl::_V1::queue::copy
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0)
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1259
sycl::_V1::queue::parallel_for
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1502
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:24
sycl::_V1::queue::parallel_for
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1539
sycl::_V1::__SYCL2020_DEPRECATED
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:96
property_list.hpp
sycl::_V1::handler::ext_oneapi_barrier
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2438
sycl::_V1::access::mode
mode
Definition: access.hpp:30
sycl::_V1::queue::update_host
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc _CODELOCPARAM(&CodeLoc))
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
Definition: queue.hpp:1847
sycl::_V1::backend
backend
Definition: backend_types.hpp:21
sycl::_V1::handler::require
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
Definition: handler.hpp:1523
T
sycl::_V1::queue::memcpy
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent)
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1112
sycl::_V1::queue::parallel_for
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1468
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:92
sycl::_V1::detail::auto_name
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:39
sycl::_V1::queue::prefetch
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:743
stl.hpp
device_selector.hpp
sycl::_V1::queue::queue
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:95
sycl::_V1::queue::memcpy
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1086
sycl::_V1::queue::ext_oneapi_memcpy2d
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, event DepEvent _CODELOCPARAM(&CodeLoc))
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:800
sycl::_V1::queue::parallel_for
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1567
device.hpp
sycl::_V1::ext::oneapi::experimental::device_has
constexpr device_has_key::value_t< Aspects... > device_has
Definition: properties.hpp:120
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:465
sycl::_V1::queue::single_task
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:1409
sycl::_V1::ext::oneapi::experimental::properties
Definition: properties.hpp:126
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, size_t Size)
Definition: memcpy.hpp:16
sycl::_V1::queue::copy
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents)
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1216
sycl::_V1::handler::ext_oneapi_memcpy2d
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: handler.hpp:2537
event.hpp
sycl::_V1::queue::fill
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src _CODELOCPARAM(&CodeLoc))
Fills the specified memory with the specified data.
Definition: queue.hpp:1863
sycl::_V1::queue::single_task
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:1454
sycl::_V1::queue::queue
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:128
service_kernel_names.hpp
sycl::_V1::queue::ext_oneapi_memset2d
event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height _CODELOCPARAM(&CodeLoc))
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:937
sycl::_V1::handler::ext_oneapi_memset2d
void ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.hpp:2643
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:40
sycl::_V1::queue::copy
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents)
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1282
sycl::_V1::queue::memcpy
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0)
Copies data from a device_global to USM memory.
Definition: queue.hpp:1196
sycl::_V1::id
A unique identifier of an item in an index space.
Definition: array.hpp:17
backend_traits.hpp
sycl::_V1::queue::memcpy
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent)
Copies data from a device_global to USM memory.
Definition: queue.hpp:1177
sycl::_V1::queue::parallel_for
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1738
owner_less_base.hpp
sycl::_V1::detail::check_fn_signature
Definition: cg_types.hpp:126
sycl::_V1::queue::queue
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:103
sycl::_V1::queue::single_task
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:1343
sycl::_V1::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
sycl::_V1::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:111
sycl::_V1::range< 1 >
sycl::_V1::queue::single_task
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:1427
sycl::_V1::queue::copy
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest _CODELOCPARAM(&CodeLoc))
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
Definition: queue.hpp:1777
export.hpp
sycl::_V1::access::placeholder
placeholder
Definition: access.hpp:45
sycl::_V1::queue::memcpy
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
Copies data from a device_global to USM memory.
Definition: queue.hpp:1150
sycl::_V1::ext::oneapi::experimental::has_property
static constexpr bool has_property()
Definition: annotated_arg.hpp:162
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
sycl::_V1::queue::parallel_for
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1526
_CODELOCPARAM
#define _CODELOCPARAM(a)
Definition: common.hpp:108
sycl::_V1::queue::queue
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:142
sycl::_V1::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:499
sycl::_V1::handler::ext_oneapi_fill2d
void ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.hpp:2679
sycl::_V1::queue::parallel_for
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1677
sycl::_V1::queue::ext_oneapi_copy2d
event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height _CODELOCPARAM(&CodeLoc))
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:857
sycl::_V1::queue::memcpy
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0)
Copies data from a USM memory region to a device_global.
Definition: queue.hpp:1130
sycl::_V1::queue::parallel_for
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1479
sycl::_V1::queue
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:89
_CODELOCARG
#define _CODELOCARG(a)
Definition: common.hpp:112
sycl::_V1::ext::oneapi::experimental::operator=
annotated_arg & operator=(annotated_arg &)=default
sycl::_V1::queue::ext_oneapi_memset2d
event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height, event DepEvent _CODELOCPARAM(&CodeLoc))
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:964
device_selector
sycl::_V1::queue::copy
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent)
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
Definition: queue.hpp:1239
sycl::_V1::queue::ext_oneapi_memset2d
event ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height, const std::vector< event > &DepEvents _CODELOCPARAM(&CodeLoc))
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:993
sycl::_V1::detail::tls_code_loc_t
Data type that manages the code_location information in TLS.
Definition: common.hpp:152
sycl::_V1::queue::copy
event copy(const T *Src, T *Dest, size_t Count, event DepEvent _CODELOCPARAM(&CodeLoc))
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:637
sycl::_V1::handler::ext_oneapi_copy2d
void ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: handler.hpp:2589
sycl::_V1::detail::OwnerLessBase
Definition: owner_less_base.hpp:21
sycl::_V1::queue::copy
event copy(const T *Src, T *Dest, size_t Count _CODELOCPARAM(&CodeLoc))
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:617
sycl::_V1::queue::wait
void wait(_CODELOCONLYPARAM(&CodeLoc))
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:433
sycl::_V1::queue::copy
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest _CODELOCPARAM(&CodeLoc))
Copies data from one memory region to another, both pointed by placeholder accessors.
Definition: queue.hpp:1830
sycl::_V1::detail::select_device
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
Definition: device_selector.cpp:136
sycl::_V1::handler
Command group handler class.
Definition: handler.hpp:315
sycl::_V1::ext::oneapi::experimental::detail::empty_properties_t
properties< std::tuple<> > empty_properties_t
Definition: properties.hpp:204
sycl::_V1::queue::operator!=
bool operator!=(const queue &RHS) const
Definition: queue.hpp:271
common.hpp
_CODELOCFW
#define _CODELOCFW(a)
Definition: common.hpp:113
sycl::_V1::queue::ext_oneapi_copy2d
event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height, const std::vector< event > &DepEvents _CODELOCPARAM(&CodeLoc))
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:909
_KERNELFUNCPARAM
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:44
sycl::_V1::queue::ext_oneapi_fill2d
event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height _CODELOCPARAM(&CodeLoc))
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1016
sycl::_V1::nd_range
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: uniform.hpp:36
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
sycl::_V1::detail::createSyclObjFromImpl
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: common.hpp:318
sycl::_V1::queue::copy
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents _CODELOCPARAM(&CodeLoc))
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.hpp:658
sycl::_V1::access::target
target
Definition: access.hpp:18
sycl::_V1::queue::parallel_for
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1553
sycl::_V1::queue::queue
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:115
sycl::_V1::queue::parallel_for
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1490
sycl::_V1::ext::oneapi::experimental::is_property_list
Definition: properties.hpp:190
sycl::_V1::ext::oneapi::experimental::get_property
static constexpr auto get_property()
Definition: annotated_arg.hpp:166
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:133
sycl::_V1::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:516
sycl::_V1::accessor
Definition: accessor.hpp:225
sycl::_V1::detail::remove_reference_t
typename std::remove_reference< T >::type remove_reference_t
Definition: stl_type_traits.hpp:35
sycl::_V1::queue::copy
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest _CODELOCPARAM(&CodeLoc))
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
Definition: queue.hpp:1810
sycl::_V1::queue::copy
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0)
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1326
std::hash< sycl::queue >::operator()
size_t operator()(const sycl::queue &Q) const
Definition: queue.hpp:2178
sycl::_V1::queue::copy
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent)
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
Definition: queue.hpp:1305
KernelFunc
std::function< void(const sycl::nd_item< NDims > &)> KernelFunc
Definition: pi_esimd_emulator.cpp:173
sycl::_V1::detail::defaultAsyncHandler
void defaultAsyncHandler(exception_list Exceptions)
Definition: exception_list.hpp:59
sycl::_V1::ext::oneapi::experimental::device_image_scope_key
Definition: properties.hpp:20
sycl::_V1::detail::code_location
Definition: common.hpp:66
sycl::_V1::handler::fill
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2384
sycl::_V1::handler::prefetch
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:710
handler.hpp
_CODELOCONLYPARAM
#define _CODELOCONLYPARAM(a)
Definition: common.hpp:106
sycl::_V1::queue::submit
event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc))
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:344
sycl::_V1::queue::ext_oneapi_fill2d
event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height, const std::vector< event > &DepEvents _CODELOCPARAM(&CodeLoc))
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1063
std
Definition: accessor.hpp:3230
info_desc_helpers.hpp
device_global.hpp
sycl::_V1::queue::parallel_for
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1699
sycl::_V1::queue::single_task
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:1383
sycl::_V1::detail::AreAllButLastReductions
Predicate returning true if all template type parameters except the last one are reductions.
Definition: reduction.hpp:43
sycl::_V1::queue::wait_and_throw
void wait_and_throw(_CODELOCONLYPARAM(&CodeLoc))
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:447
sycl::_V1::async_handler
std::function< void(sycl::exception_list)> async_handler
Definition: exception_list.hpp:54
sycl::_V1::queue::ext_oneapi_fill2d
event ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height, event DepEvent _CODELOCPARAM(&CodeLoc))
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.hpp:1038
sycl::_V1::queue::parallel_for
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1714
sycl::_V1::queue::parallel_for
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:1514
sycl::_V1::queue::operator==
bool operator==(const queue &RHS) const
Definition: queue.hpp:269
sycl::_V1::handler::update_host
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
Definition: handler.hpp:2354
exception_list.hpp
sycl::_V1::queue::copy
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest _CODELOCPARAM(&CodeLoc))
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:1794
info_desc.hpp
sycl::_V1::queue::parallel_for
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
Definition: queue.hpp:1584
sycl::_V1::queue::ext_oneapi_memcpy2d
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const std::vector< event > &DepEvents _CODELOCPARAM(&CodeLoc))
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:831
assert_happened.hpp
sycl::_V1::queue::ext_oneapi_submit_barrier
event ext_oneapi_submit_barrier(_CODELOCONLYPARAM(&CodeLoc))
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: queue.hpp:378
weak_object_base.hpp
sycl::_V1::handler::depends_on
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:786
sycl::_V1::queue::ext_oneapi_copy2d
event ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height, event DepEvent _CODELOCPARAM(&CodeLoc))
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: queue.hpp:882
sycl::_V1::queue::queue
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
Definition: queue.hpp:158
sycl::_V1::ext::oneapi::experimental::device_global
Definition: device_global.hpp:105
sycl::_V1::get_native
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:123
__SYCL2020_DEPRECATED
#define __SYCL2020_DEPRECATED(message)
Definition: defines_elementary.hpp:57
sycl::_V1::queue::copy
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest _CODELOCPARAM(&CodeLoc))
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
Definition: queue.hpp:1761
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:41
sycl::_V1::queue::single_task
event single_task(_KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:1367
sycl::_V1::queue::ext_oneapi_submit_barrier
event ext_oneapi_submit_barrier(const std::vector< event > &WaitList _CODELOCPARAM(&CodeLoc))
Prevents any commands submitted afterward to this queue from executing until all events in WaitList h...
Definition: queue.hpp:405