48 #include <type_traits>
58 #ifdef __SYCL_NONCONST_FUNCTOR__
59 #define _KERNELFUNCPARAM(a) KernelType a
61 #define _KERNELFUNCPARAM(a) const KernelType &a
65 inline namespace _V1 {
73 template <backend BackendName,
class SyclObjectT>
75 -> backend_return_t<BackendName, SyclObjectT>;
80 #if __SYCL_USE_FALLBACK_ASSERT
81 inline event submitAssertCapture(queue &, event &, queue *,
82 const detail::code_location &);
86 namespace ext ::oneapi ::experimental {
90 struct image_descriptor;
93 template <
typename CommandGroupFunc>
95 const sycl::detail::code_location &CodeLoc);
133 template <
typename DeviceSelector,
135 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
136 explicit queue(
const DeviceSelector &deviceSelector,
146 template <
typename DeviceSelector,
148 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
149 explicit queue(
const DeviceSelector &deviceSelector,
160 template <
typename DeviceSelector,
162 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
164 const DeviceSelector &deviceSelector,
176 template <
typename DeviceSelector,
178 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
180 const DeviceSelector &deviceSelector,
184 AsyncHandler, propList) {}
192 "use SYCL 2020 device selectors instead.")
205 "use SYCL 2020 device selectors instead.")
208 : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
214 explicit queue(
const device &SyclDevice,
const property_list &PropList = {})
223 explicit queue(
const device &SyclDevice,
const async_handler &AsyncHandler,
224 const property_list &PropList = {});
233 "use SYCL 2020 device selectors instead.")
246 "use SYCL 2020 device selectors instead.")
277 #ifdef __SYCL_INTERNAL_API
278 queue(cl_command_queue ClQueue,
const context &SyclContext,
296 #ifdef __SYCL_INTERNAL_API
297 cl_command_queue
get()
const;
312 ext_oneapi_get_graph()
const;
317 template <
typename Param>
323 template <
typename Param>
325 get_backend_info()
const;
339 template <
typename T>
340 std::enable_if_t<std::is_invocable_r_v<void, T, handler &>,
event>
submit(
344 #if __SYCL_USE_FALLBACK_ASSERT
345 auto PostProcess = [
this, &CodeLoc](
bool IsKernel,
bool KernelUsesAssert,
347 if (IsKernel && !
device_has(aspect::ext_oneapi_native_assert) &&
348 KernelUsesAssert && !
device_has(aspect::accelerator)) {
353 submitAssertCapture(*
this, E,
nullptr, CodeLoc);
357 return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
374 template <
typename T>
375 std::enable_if_t<std::is_invocable_r_v<void, T, handler &>,
event>
submit(
376 T CGF,
queue &SecondaryQueue,
379 #if __SYCL_USE_FALLBACK_ASSERT
380 auto PostProcess = [
this, &SecondaryQueue, &CodeLoc](
381 bool IsKernel,
bool KernelUsesAssert,
event &E) {
382 if (IsKernel && !
device_has(aspect::ext_oneapi_native_assert) &&
383 KernelUsesAssert && !
device_has(aspect::accelerator)) {
390 submitAssertCapture(*
this, E, &SecondaryQueue, CodeLoc);
394 return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
408 event ext_oneapi_submit_barrier(
420 event ext_oneapi_submit_barrier(
421 const std::vector<event> &WaitList,
446 wait_and_throw_proxy(CodeLoc);
461 void throw_asynchronous();
466 return getPropList().template has_property<PropertyT>();
473 return getPropList().template get_property<PropertyT>();
483 template <
typename T>
485 void *Ptr,
const T &Pattern,
size_t Count,
500 template <
typename T>
502 void *Ptr,
const T &Pattern,
size_t Count,
event DepEvent,
508 CGH.
fill<T>(Ptr, Pattern, Count);
522 template <
typename T>
524 void *Ptr,
const T &Pattern,
size_t Count,
525 const std::vector<event> &DepEvents,
531 CGH.
fill<T>(Ptr, Pattern, Count);
546 void *Ptr,
int Value,
size_t Count,
560 void *Ptr,
int Value,
size_t Count,
event DepEvent,
575 void *Ptr,
int Value,
size_t Count,
const std::vector<event> &DepEvents,
590 void *Dest,
const void *Src,
size_t Count,
606 void *Dest,
const void *Src,
size_t Count,
event DepEvent,
623 void *Dest,
const void *Src,
size_t Count,
624 const std::vector<event> &DepEvents,
639 template <
typename T>
641 const T *Src, T *Dest,
size_t Count,
644 return this->memcpy(Dest, Src, Count *
sizeof(T));
660 template <
typename T>
662 const T *Src, T *Dest,
size_t Count,
event DepEvent,
665 return this->memcpy(Dest, Src, Count *
sizeof(T), DepEvent);
681 template <
typename T>
683 const T *Src, T *Dest,
size_t Count,
const std::vector<event> &DepEvents,
686 return this->memcpy(Dest, Src, Count *
sizeof(T), DepEvents);
697 const void *Ptr,
size_t Length,
int Advice,
709 const void *Ptr,
size_t Length,
int Advice,
event DepEvent,
722 const void *Ptr,
size_t Length,
int Advice,
723 const std::vector<event> &DepEvents,
734 const void *Ptr,
size_t Count,
749 const void *Ptr,
size_t Count,
event DepEvent,
770 const void *Ptr,
size_t Count,
const std::vector<event> &DepEvents,
799 template <
typename T =
unsigned char,
800 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
802 void *Dest,
size_t DestPitch,
const void *Src,
size_t SrcPitch,
803 size_t Width,
size_t Height,
832 template <
typename T =
unsigned char,
833 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
834 event ext_oneapi_memcpy2d(
835 void *Dest,
size_t DestPitch,
const void *Src,
size_t SrcPitch,
836 size_t Width,
size_t Height,
event DepEvent,
859 template <
typename T =
unsigned char,
860 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
861 event ext_oneapi_memcpy2d(
862 void *Dest,
size_t DestPitch,
const void *Src,
size_t SrcPitch,
863 size_t Width,
size_t Height,
const std::vector<event> &DepEvents,
881 template <
typename T>
882 event ext_oneapi_copy2d(
883 const T *Src,
size_t SrcPitch, T *Dest,
size_t DestPitch,
size_t Width,
903 template <
typename T>
904 event ext_oneapi_copy2d(
905 const T *Src,
size_t SrcPitch, T *Dest,
size_t DestPitch,
size_t Width,
906 size_t Height,
event DepEvent,
926 template <
typename T>
927 event ext_oneapi_copy2d(
928 const T *Src,
size_t SrcPitch, T *Dest,
size_t DestPitch,
size_t Width,
929 size_t Height,
const std::vector<event> &DepEvents,
948 template <
typename T =
unsigned char,
949 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
950 event ext_oneapi_memset2d(
951 void *Dest,
size_t DestPitch,
int Value,
size_t Width,
size_t Height,
971 template <
typename T =
unsigned char,
972 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
973 event ext_oneapi_memset2d(
974 void *Dest,
size_t DestPitch,
int Value,
size_t Width,
size_t Height,
996 template <
typename T =
unsigned char,
997 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
998 event ext_oneapi_memset2d(
999 void *Dest,
size_t DestPitch,
int Value,
size_t Width,
size_t Height,
1000 const std::vector<event> &DepEvents,
1016 template <
typename T>
1017 event ext_oneapi_fill2d(
1018 void *Dest,
size_t DestPitch,
const T &Pattern,
size_t Width,
1036 template <
typename T>
1037 event ext_oneapi_fill2d(
1038 void *Dest,
size_t DestPitch,
const T &Pattern,
size_t Width,
1039 size_t Height,
event DepEvent,
1057 template <
typename T>
1058 event ext_oneapi_fill2d(
1059 void *Dest,
size_t DestPitch,
const T &Pattern,
size_t Width,
1060 size_t Height,
const std::vector<event> &DepEvents,
1075 template <
typename T,
typename PropertyListT>
1078 const void *Src,
size_t NumBytes,
size_t Offset,
1079 const std::vector<event> &DepEvents,
1082 if (
sizeof(T) < Offset + NumBytes)
1084 "Copy to device_global is out of bounds.");
1092 return CGH.
memcpy(Dest, Src, NumBytes, Offset);
1097 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
1099 return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1115 template <
typename T,
typename PropertyListT>
1118 const void *Src,
size_t NumBytes,
size_t Offset,
event DepEvent,
1121 return this->memcpy(Dest, Src, NumBytes, Offset,
1122 std::vector<event>{DepEvent});
1135 template <
typename T,
typename PropertyListT>
1138 const void *Src,
size_t NumBytes =
sizeof(T),
size_t Offset = 0,
1141 return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1156 template <
typename T,
typename PropertyListT>
1160 size_t NumBytes,
size_t Offset,
const std::vector<event> &DepEvents,
1163 if (
sizeof(T) < Offset + NumBytes)
1165 "Copy from device_global is out of bounds.");
1172 return CGH.
memcpy(Dest, Src, NumBytes, Offset);
1176 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
1178 return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1194 template <
typename T,
typename PropertyListT>
1198 size_t NumBytes,
size_t Offset,
event DepEvent,
1201 return this->memcpy(Dest, Src, NumBytes, Offset,
1202 std::vector<event>{DepEvent});
1215 template <
typename T,
typename PropertyListT>
1219 size_t NumBytes =
sizeof(T),
size_t Offset = 0,
1222 return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1238 template <
typename T,
typename PropertyListT>
1240 const std::remove_all_extents_t<T> *Src,
1242 size_t Count,
size_t StartIndex,
const std::vector<event> &DepEvents,
1245 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1246 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1263 template <
typename T,
typename PropertyListT>
1265 const std::remove_all_extents_t<T> *Src,
1267 size_t Count,
size_t StartIndex,
event DepEvent,
1270 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1271 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1286 template <
typename T,
typename PropertyListT>
1288 const std::remove_all_extents_t<T> *Src,
1290 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
1291 size_t StartIndex = 0,
1294 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1295 StartIndex *
sizeof(std::remove_all_extents_t<T>));
1311 template <
typename T,
typename PropertyListT>
1314 std::remove_all_extents_t<T> *Dest,
size_t Count,
size_t StartIndex,
1315 const std::vector<event> &DepEvents,
1318 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1319 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1336 template <
typename T,
typename PropertyListT>
1339 std::remove_all_extents_t<T> *Dest,
size_t Count,
size_t StartIndex,
1343 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1344 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1359 template <
typename T,
typename PropertyListT>
1362 std::remove_all_extents_t<T> *Dest,
1363 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
1364 size_t StartIndex = 0,
1367 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1368 StartIndex *
sizeof(std::remove_all_extents_t<T>));
1382 event ext_oneapi_copy(
1406 event ext_oneapi_copy(
1426 event ext_oneapi_copy(
1452 event ext_oneapi_copy(
1473 event ext_oneapi_copy(
1476 const std::vector<event> &DepEvents,
1500 event ext_oneapi_copy(
1518 event ext_oneapi_copy(
1544 event ext_oneapi_copy(
1563 event ext_oneapi_copy(
1591 event ext_oneapi_copy(
1611 event ext_oneapi_copy(
1614 const std::vector<event> &DepEvents,
1640 event ext_oneapi_copy(
1659 event ext_oneapi_copy(
1660 const void *Src,
void *Dest,
1662 size_t DeviceRowPitch,
1687 event ext_oneapi_copy(
1707 event ext_oneapi_copy(
1708 const void *Src,
void *Dest,
1710 size_t DeviceRowPitch,
event DepEvent,
1722 event ext_oneapi_copy(
1739 event ext_oneapi_copy(
1743 const std::vector<event> &DepEvents,
1754 event ext_oneapi_copy(
1783 event ext_oneapi_copy(
1804 event ext_oneapi_copy(
1805 const void *Src,
void *Dest,
1807 size_t DeviceRowPitch,
const std::vector<event> &DepEvents,
1834 event ext_oneapi_copy(
1866 event ext_oneapi_wait_external_semaphore(
1879 event ext_oneapi_wait_external_semaphore(
1881 const std::vector<event> &DepEvents,
1892 event ext_oneapi_wait_external_semaphore(
1906 event ext_oneapi_wait_external_semaphore(
1908 uint64_t WaitValue,
event DepEvent,
1921 event ext_oneapi_wait_external_semaphore(
1923 uint64_t WaitValue,
const std::vector<event> &DepEvents,
1933 event ext_oneapi_signal_external_semaphore(
1945 event ext_oneapi_signal_external_semaphore(
1959 event ext_oneapi_signal_external_semaphore(
1961 const std::vector<event> &DepEvents,
1973 event ext_oneapi_signal_external_semaphore(
1975 uint64_t SignalValue,
1988 event ext_oneapi_signal_external_semaphore(
1990 uint64_t SignalValue,
event DepEvent,
2004 event ext_oneapi_signal_external_semaphore(
2006 uint64_t SignalValue,
const std::vector<event> &DepEvents,
2015 typename PropertiesT>
2025 void(kernel_handler)>::value),
2026 "sycl::queue.single_task() requires a kernel instead of command group. "
2027 "Use queue.submit() instead");
2032 CGH.template single_task<KernelName, KernelType, PropertiesT>(
2033 Properties, KernelFunc);
2042 template <
typename KernelName = detail::auto_name,
typename KernelType>
2046 return single_task<KernelName, KernelType>(
2057 typename PropertiesT>
2067 void(kernel_handler)>::value),
2068 "sycl::queue.single_task() requires a kernel instead of command group. "
2069 "Use queue.submit() instead");
2075 CGH.template single_task<KernelName, KernelType, PropertiesT>(
2076 Properties, KernelFunc);
2086 template <
typename KernelName = detail::auto_name,
typename KernelType>
2090 return single_task<KernelName, KernelType>(
2103 typename PropertiesT>
2107 const std::vector<event> &DepEvents, PropertiesT Properties,
2114 void(kernel_handler)>::value),
2115 "sycl::queue.single_task() requires a kernel instead of command group. "
2116 "Use queue.submit() instead");
2122 CGH.template single_task<KernelName, KernelType, PropertiesT>(
2123 Properties, KernelFunc);
2134 template <
typename KernelName = detail::auto_name,
typename KernelType>
2138 return single_task<KernelName, KernelType>(
2151 return parallel_for_impl<KernelName>(Range, Rest...);
2162 return parallel_for_impl<KernelName>(Range, Rest...);
2173 return parallel_for_impl<KernelName>(Range, Rest...);
2185 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2197 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2209 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2223 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2237 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2251 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2267 const std::vector<event> &DepEvents,
2269 static_assert(1 <= Dim && Dim <= 3,
"Invalid number of dimensions");
2270 return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
2284 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
2290 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2307 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
2314 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2332 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
2333 const
std::vector<
event> &DepEvents,
2340 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2354 typename PropertiesT,
typename... RestT>
2360 using KI = sycl::detail::KernelInfo<KernelName>;
2362 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2363 KI::getColumnNumber());
2367 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2382 return parallel_for<KernelName>(
2396 using KI = sycl::detail::KernelInfo<KernelName>;
2398 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2399 KI::getColumnNumber());
2404 CGH.template parallel_for<KernelName>(Range, Rest...);
2421 using KI = sycl::detail::KernelInfo<KernelName>;
2423 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2424 KI::getColumnNumber());
2429 CGH.template parallel_for<KernelName>(Range, Rest...);
2444 std::shared_ptr<DestT> Dest,
2449 CGH.
copy(Src, Dest);
2460 template <
typename SrcT,
typename DestT,
int DestDims,
access_mode DestMode,
2463 std::shared_ptr<SrcT> Src,
2469 CGH.
copy(Src, Dest);
2488 CGH.
copy(Src, Dest);
2499 template <
typename SrcT,
typename DestT,
int DestDims,
access_mode DestMode,
2508 CGH.
copy(Src, Dest);
2531 CGH.
copy(Src, Dest);
2568 CGH.
fill<T>(Dest, Src);
2579 bool ext_codeplay_supports_fusion()
const;
2582 #undef _KERNELFUNCPARAM
2626 const std::vector<event> &DepEvents,
2639 void ext_oneapi_prod();
2644 bool is_in_order()
const;
2655 bool ext_oneapi_empty() const;
2659 event ext_oneapi_get_last_event() const;
2661 void ext_oneapi_set_external_event(const
event &external_event);
2664 std::shared_ptr<detail::queue_impl> impl;
2665 queue(
std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
2667 template <
class Obj>
2672 template <backend BackendName,
class SyclObjectT>
2673 friend auto get_native(
const SyclObjectT &Obj)
2676 #if __SYCL_USE_FALLBACK_ASSERT
2681 template <
typename CommandGroupFunc>
2683 queue &Q, CommandGroupFunc &&CGF,
2684 const sycl::detail::code_location &CodeLoc);
2694 void submit_without_event_impl(std::function<
void(
handler &)> CGH,
2702 template <
typename T>
2703 std::enable_if_t<std::is_invocable_r_v<void, T, handler &>,
void>
2706 #if __SYCL_USE_FALLBACK_ASSERT
2711 submit_without_event_impl(CGF, CodeLoc);
2721 using SubmitPostProcessF = std::function<void(
bool,
bool,
event &)>;
2728 event submit_impl_and_postprocess(std::function<
void(
handler &)> CGH,
2730 const SubmitPostProcessF &PostProcess);
2737 event submit_impl_and_postprocess(std::function<
void(
handler &)> CGH,
2740 const SubmitPostProcessF &PostProcess);
2748 template <
typename KernelName,
int Dims,
typename PropertiesT,
2754 parallel_for_impl(
range<Dims> Range, PropertiesT Properties,
2756 using KI = sycl::detail::KernelInfo<KernelName>;
2758 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2759 KI::getColumnNumber());
2763 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2773 template <
typename KernelName,
int Dims,
typename... RestT>
2774 std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value,
event>
2775 parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
2776 return parallel_for_impl<KernelName>(
2787 template <
typename KernelName,
int Dims,
typename PropertiesT,
2790 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
event>
2791 parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2793 using KI = sycl::detail::KernelInfo<KernelName>;
2794 constexpr detail::code_location CodeLoc(
2795 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2796 KI::getColumnNumber());
2797 detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2800 CGH.depends_on(DepEvent);
2801 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2812 template <
typename KernelName,
int Dims,
typename... RestT>
2813 event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2814 return parallel_for_impl<KernelName>(
2827 template <
typename KernelName,
int Dims,
typename PropertiesT,
2830 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
event>
2831 parallel_for_impl(range<Dims> Range,
const std::vector<event> &DepEvents,
2832 PropertiesT Properties, RestT &&...Rest) {
2833 using KI = sycl::detail::KernelInfo<KernelName>;
2834 constexpr detail::code_location CodeLoc(
2835 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2836 KI::getColumnNumber());
2837 detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2840 CGH.depends_on(DepEvents);
2841 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2853 template <
typename KernelName,
int Dims,
typename... RestT>
2854 event parallel_for_impl(range<Dims> Range,
2855 const std::vector<event> &DepEvents,
2857 return parallel_for_impl<KernelName>(
2862 event memcpyToDeviceGlobal(
void *DeviceGlobalPtr,
const void *Src,
2863 bool IsDeviceImageScope,
size_t NumBytes,
2865 const std::vector<event> &DepEvents);
2866 event memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
2867 bool IsDeviceImageScope,
size_t NumBytes,
2869 const std::vector<event> &DepEvents);
2870 const property_list &getPropList()
const;
2877 template <>
struct __SYCL_EXPORT hash<
sycl::queue> {
2882 #if __SYCL_USE_FALLBACK_ASSERT
2884 #ifndef __STDC_FORMAT_MACROS
2885 #define __STDC_FORMAT_MACROS 1
2887 #include <cinttypes>
2890 inline namespace _V1 {
2893 #define __SYCL_ASSERT_START 1
2895 namespace __sycl_service_kernel__ {
2896 class AssertInfoCopier;
2910 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
2911 const detail::code_location &CodeLoc) {
2912 buffer<detail::AssertHappened, 1> Buffer{1};
2914 event CopierEv, CheckerEv, PostCheckerEv;
2915 auto CopierCGF = [&](handler &CGH) {
2916 CGH.depends_on(Event);
2920 CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
2921 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
2922 __devicelib_assert_read(&Acc[0]);
2928 auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
2929 CGH.depends_on(CopierEv);
2933 auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
2936 const detail::AssertHappened *AH = &Acc[0];
2941 if (AH->Flag == __SYCL_ASSERT_START)
2944 "Internal Error. Invalid value in assert description.");
2948 const char *Expr = AH->Expr[0] ? AH->Expr :
"<unknown expr>";
2949 const char *File = AH->File[0] ? AH->File :
"<unknown file>";
2950 const char *Func = AH->Func[0] ? AH->Func :
"<unknown func>";
2953 "%s:%d: %s: global id: [%" PRIu64
",%" PRIu64
",%" PRIu64
2954 "], local id: [%" PRIu64
",%" PRIu64
",%" PRIu64
"] "
2955 "Assertion `%s` failed.\n",
2956 File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
2957 AH->LID1, AH->LID2, Expr);
2964 if (SecondaryQueue) {
2965 CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
2966 CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
2968 CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
2969 CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
2974 #undef __SYCL_ASSERT_START
The file contains implementations of accessor class.
The context class represents a SYCL context on which kernel functions may be executed.
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Data type that manages the code_location information in TLS.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
Command group handler class.
void depends_on(event Event)
Registers event dependencies on this command group.
void ext_oneapi_wait_external_semaphore(ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Submit a non-blocking device-side wait on an external.
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
A unique identifier of an item in an index space.
Defines the iteration domain of both the work-groups and the overall dispatch.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
void wait(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
event fill(void *Ptr, const T &Pattern, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
queue & operator=(const queue &RHS)=default
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const device_selector &DeviceSelector
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with multiple dependencies.
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, both pointed by placeholder accessors.
event copy(const T *Src, T *Dest, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
queue(queue &&RHS)=default
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc, const detail::code_location &CodeLoc=detail::code_location::current())
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
event copy(const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
event prefetch(const void *Ptr, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
queue & operator=(queue &&RHS)=default
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands.
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const context &SyclContext
Constructs a SYCL queue instance that is associated with the context provided, using the device retur...
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event single_task(_KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified data.
bool has_property() const noexcept
bool operator==(const queue &RHS) const
queue(const queue &RHS)=default
Constructs a SYCL queue with an optional async_handler from an OpenCL cl_command_queue.
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
void wait_and_throw(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with a single dependency.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
event prefetch(const void *Ptr, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
PropertyT get_property() const
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
bool operator!=(const queue &RHS) const
Defines the iteration domain of either a single work-group in a parallel dispatch,...
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
void defaultAsyncHandler(exception_list Exceptions)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
void submit_impl(queue &Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc)
void submit_impl(queue &Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc)
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
void submit(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc=sycl::detail::code_location::current())
@ modifiable
In modifiable state, commands can be added to graph.
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
constexpr device_has_key::value_t< Aspects... > device_has
properties< std::tuple<> > empty_properties_t
signed char __SYCL2020_DEPRECATED
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
int default_selector_v(const device &dev)
class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
uintptr_t pi_native_handle
#define _KERNELFUNCPARAM(a)
_Abi const simd< _Tp, _Abi > & noexcept
Predicate returning true if all template type parameters except the last one are reductions.
static constexpr code_location current(const char *fileName=__CODELOC_FILE_NAME, const char *funcName=__CODELOC_FUNCTION, unsigned long lineNo=__CODELOC_LINE, unsigned long columnNo=__CODELOC_COLUMN) noexcept
A struct to describe the properties of an image.
Opaque image memory handle type.
Opaque interop semaphore handle type.