52 #include <type_traits>
62 #ifdef __SYCL_NONCONST_FUNCTOR__
63 #define _KERNELFUNCPARAM(a) KernelType a
65 #define _KERNELFUNCPARAM(a) const KernelType &a
69 inline namespace _V1 {
77 template <backend BackendName,
class SyclObjectT>
79 -> backend_return_t<BackendName, SyclObjectT>;
84 #if __SYCL_USE_FALLBACK_ASSERT
85 inline event submitAssertCapture(queue &, event &, queue *,
86 const detail::code_location &);
92 namespace experimental {
134 template <
typename DeviceSelector,
136 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
137 explicit queue(
const DeviceSelector &deviceSelector,
147 template <
typename DeviceSelector,
149 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
150 explicit queue(
const DeviceSelector &deviceSelector,
161 template <
typename DeviceSelector,
163 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
165 const DeviceSelector &deviceSelector,
177 template <
typename DeviceSelector,
179 detail::EnableIfSYCL2020DeviceSelectorInvocable<DeviceSelector>>
181 const DeviceSelector &deviceSelector,
185 AsyncHandler, propList) {}
193 "use SYCL 2020 device selectors instead.")
206 "use SYCL 2020 device selectors instead.")
209 : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
215 explicit queue(
const device &SyclDevice,
const property_list &PropList = {})
224 explicit queue(
const device &SyclDevice,
const async_handler &AsyncHandler,
225 const property_list &PropList = {});
234 "use SYCL 2020 device selectors instead.")
247 "use SYCL 2020 device selectors instead.")
278 #ifdef __SYCL_INTERNAL_API
279 queue(cl_command_queue ClQueue,
const context &SyclContext,
297 #ifdef __SYCL_INTERNAL_API
298 cl_command_queue
get()
const;
313 ext_oneapi_get_graph()
const;
317 "is_host() is deprecated as the host device is no longer supported.")
318 bool is_host() const;
323 template <typename Param>
324 typename detail::is_queue_info_desc<Param>::return_type get_info() const;
329 template <typename Param>
330 typename detail::is_backend_info_desc<Param>::return_type
331 get_backend_info() const;
345 template <typename T>
348 const detail::code_location &CodeLoc = detail::code_location::current()) {
350 #if __SYCL_USE_FALLBACK_ASSERT
351 auto PostProcess = [
this, &CodeLoc](
bool IsKernel,
bool KernelUsesAssert,
353 if (IsKernel && !
device_has(aspect::ext_oneapi_native_assert) &&
354 KernelUsesAssert && !
device_has(aspect::accelerator)) {
359 submitAssertCapture(*
this, E,
nullptr, CodeLoc);
363 return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
365 return submit_impl(CGF, CodeLoc);
380 template <
typename T>
381 std::enable_if_t<std::is_invocable_r_v<void, T, handler &>,
event>
submit(
382 T CGF,
queue &SecondaryQueue,
385 #if __SYCL_USE_FALLBACK_ASSERT
386 auto PostProcess = [
this, &SecondaryQueue, &CodeLoc](
387 bool IsKernel,
bool KernelUsesAssert,
event &E) {
388 if (IsKernel && !
device_has(aspect::ext_oneapi_native_assert) &&
389 KernelUsesAssert && !
device_has(aspect::accelerator)) {
396 submitAssertCapture(*
this, E, &SecondaryQueue, CodeLoc);
400 return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
403 return submit_impl(CGF, SecondaryQueue, CodeLoc);
414 event ext_oneapi_submit_barrier(
426 event ext_oneapi_submit_barrier(
427 const std::vector<event> &WaitList,
452 wait_and_throw_proxy(CodeLoc);
467 void throw_asynchronous();
476 template <typename PropertyT> PropertyT
get_property() const;
485 template <typename T>
487 void *Ptr, const T &Pattern,
size_t Count,
488 const detail::code_location &CodeLoc = detail::code_location::current()) {
502 template <
typename T>
504 void *Ptr,
const T &Pattern,
size_t Count,
event DepEvent,
510 CGH.
fill<T>(Ptr, Pattern, Count);
524 template <
typename T>
526 void *Ptr,
const T &Pattern,
size_t Count,
527 const std::vector<event> &DepEvents,
533 CGH.
fill<T>(Ptr, Pattern, Count);
548 void *Ptr,
int Value,
size_t Count,
562 void *Ptr,
int Value,
size_t Count,
event DepEvent,
577 void *Ptr,
int Value,
size_t Count,
const std::vector<event> &DepEvents,
592 void *Dest,
const void *Src,
size_t Count,
608 void *Dest,
const void *Src,
size_t Count,
event DepEvent,
625 void *Dest,
const void *Src,
size_t Count,
626 const std::vector<event> &DepEvents,
641 template <
typename T>
643 const T *Src, T *Dest,
size_t Count,
646 return this->memcpy(Dest, Src, Count *
sizeof(T));
662 template <
typename T>
664 const T *Src, T *Dest,
size_t Count,
event DepEvent,
667 return this->memcpy(Dest, Src, Count *
sizeof(T), DepEvent);
683 template <
typename T>
685 const T *Src, T *Dest,
size_t Count,
const std::vector<event> &DepEvents,
688 return this->memcpy(Dest, Src, Count *
sizeof(T), DepEvents);
701 const detail::code_location &CodeLoc = detail::code_location::current());
711 const
void *Ptr,
size_t Length,
int Advice,
712 const detail::code_location &CodeLoc = detail::code_location::current());
723 const
void *Ptr,
size_t Length,
int Advice,
event DepEvent,
724 const detail::code_location &CodeLoc = detail::code_location::current());
736 const
void *Ptr,
size_t Length,
int Advice,
737 const
std::vector<
event> &DepEvents,
738 const detail::code_location &CodeLoc = detail::code_location::current());
748 const
void *Ptr,
size_t Count,
749 const detail::code_location &CodeLoc = detail::code_location::current()) {
763 const void *Ptr,
size_t Count,
event DepEvent,
784 const void *Ptr,
size_t Count,
const std::vector<event> &DepEvents,
813 template <
typename T =
unsigned char,
814 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
816 void *Dest,
size_t DestPitch,
const void *Src,
size_t SrcPitch,
817 size_t Width,
size_t Height,
846 template <
typename T =
unsigned char,
847 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
848 event ext_oneapi_memcpy2d(
849 void *Dest,
size_t DestPitch,
const void *Src,
size_t SrcPitch,
850 size_t Width,
size_t Height,
event DepEvent,
873 template <
typename T =
unsigned char,
874 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
875 event ext_oneapi_memcpy2d(
876 void *Dest,
size_t DestPitch,
const void *Src,
size_t SrcPitch,
877 size_t Width,
size_t Height,
const std::vector<event> &DepEvents,
895 template <
typename T>
896 event ext_oneapi_copy2d(
897 const T *Src,
size_t SrcPitch, T *Dest,
size_t DestPitch,
size_t Width,
917 template <
typename T>
918 event ext_oneapi_copy2d(
919 const T *Src,
size_t SrcPitch, T *Dest,
size_t DestPitch,
size_t Width,
920 size_t Height,
event DepEvent,
940 template <
typename T>
941 event ext_oneapi_copy2d(
942 const T *Src,
size_t SrcPitch, T *Dest,
size_t DestPitch,
size_t Width,
943 size_t Height,
const std::vector<event> &DepEvents,
962 template <
typename T =
unsigned char,
963 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
964 event ext_oneapi_memset2d(
965 void *Dest,
size_t DestPitch,
int Value,
size_t Width,
size_t Height,
985 template <
typename T =
unsigned char,
986 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
987 event ext_oneapi_memset2d(
988 void *Dest,
size_t DestPitch,
int Value,
size_t Width,
size_t Height,
1010 template <
typename T =
unsigned char,
1011 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
1012 event ext_oneapi_memset2d(
1013 void *Dest,
size_t DestPitch,
int Value,
size_t Width,
size_t Height,
1014 const std::vector<event> &DepEvents,
1030 template <
typename T>
1031 event ext_oneapi_fill2d(
1032 void *Dest,
size_t DestPitch,
const T &Pattern,
size_t Width,
1050 template <
typename T>
1051 event ext_oneapi_fill2d(
1052 void *Dest,
size_t DestPitch,
const T &Pattern,
size_t Width,
1053 size_t Height,
event DepEvent,
1071 template <
typename T>
1072 event ext_oneapi_fill2d(
1073 void *Dest,
size_t DestPitch,
const T &Pattern,
size_t Width,
1074 size_t Height,
const std::vector<event> &DepEvents,
1089 template <
typename T,
typename PropertyListT>
1092 const void *Src,
size_t NumBytes,
size_t Offset,
1093 const std::vector<event> &DepEvents,
1096 if (
sizeof(T) < Offset + NumBytes)
1098 "Copy to device_global is out of bounds.");
1106 return CGH.
memcpy(Dest, Src, NumBytes, Offset);
1111 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
1113 return memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes,
1129 template <
typename T,
typename PropertyListT>
1132 const void *Src,
size_t NumBytes,
size_t Offset,
event DepEvent,
1135 return this->memcpy(Dest, Src, NumBytes, Offset,
1136 std::vector<event>{DepEvent});
1149 template <
typename T,
typename PropertyListT>
1152 const void *Src,
size_t NumBytes =
sizeof(T),
size_t Offset = 0,
1155 return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1170 template <
typename T,
typename PropertyListT>
1174 size_t NumBytes,
size_t Offset,
const std::vector<event> &DepEvents,
1177 if (
sizeof(T) < Offset + NumBytes)
1179 "Copy from device_global is out of bounds.");
1186 return CGH.
memcpy(Dest, Src, NumBytes, Offset);
1190 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
1192 return memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
1208 template <
typename T,
typename PropertyListT>
1212 size_t NumBytes,
size_t Offset,
event DepEvent,
1215 return this->memcpy(Dest, Src, NumBytes, Offset,
1216 std::vector<event>{DepEvent});
1229 template <
typename T,
typename PropertyListT>
1233 size_t NumBytes =
sizeof(T),
size_t Offset = 0,
1236 return this->memcpy(Dest, Src, NumBytes, Offset, std::vector<event>{});
1252 template <
typename T,
typename PropertyListT>
1254 const std::remove_all_extents_t<T> *Src,
1256 size_t Count,
size_t StartIndex,
const std::vector<event> &DepEvents,
1259 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1260 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1277 template <
typename T,
typename PropertyListT>
1279 const std::remove_all_extents_t<T> *Src,
1281 size_t Count,
size_t StartIndex,
event DepEvent,
1284 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1285 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1300 template <
typename T,
typename PropertyListT>
1302 const std::remove_all_extents_t<T> *Src,
1304 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
1305 size_t StartIndex = 0,
1308 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1309 StartIndex *
sizeof(std::remove_all_extents_t<T>));
1325 template <
typename T,
typename PropertyListT>
1328 std::remove_all_extents_t<T> *Dest,
size_t Count,
size_t StartIndex,
1329 const std::vector<event> &DepEvents,
1332 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1333 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1350 template <
typename T,
typename PropertyListT>
1353 std::remove_all_extents_t<T> *Dest,
size_t Count,
size_t StartIndex,
1357 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1358 StartIndex *
sizeof(std::remove_all_extents_t<T>),
1373 template <
typename T,
typename PropertyListT>
1376 std::remove_all_extents_t<T> *Dest,
1377 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
1378 size_t StartIndex = 0,
1381 return this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
1382 StartIndex *
sizeof(std::remove_all_extents_t<T>));
1436 DestImgDesc, CopyExtent);
1499 DestImgDesc, CopyExtent);
1520 const std::vector<event> &DepEvents,
1564 DestImgDesc, CopyExtent);
1620 DestExtent, CopyExtent);
1683 DestExtent, CopyExtent);
1703 const std::vector<event> &DepEvents,
1748 DestExtent, CopyExtent);
1765 void *Src,
void *Dest,
1767 size_t DeviceRowPitch,
1810 DeviceRowPitch, HostExtent, CopyExtent);
1828 void *Src,
void *Dest,
1830 size_t DeviceRowPitch,
event DepEvent,
1879 const std::vector<event> &DepEvents,
1944 DeviceRowPitch, HostExtent, CopyExtent);
1963 void *Src,
void *Dest,
1965 size_t DeviceRowPitch,
const std::vector<event> &DepEvents,
2012 DeviceRowPitch, HostExtent, CopyExtent);
2061 const std::vector<event> &DepEvents,
2119 const std::vector<event> &DepEvents,
2136 typename PropertiesT>
2146 void(kernel_handler)>::value),
2147 "sycl::queue.single_task() requires a kernel instead of command group. "
2148 "Use queue.submit() instead");
2153 CGH.template single_task<KernelName, KernelType, PropertiesT>(
2154 Properties, KernelFunc);
2163 template <
typename KernelName = detail::auto_name,
typename KernelType>
2167 return single_task<KernelName, KernelType>(
2178 typename PropertiesT>
2188 void(kernel_handler)>::value),
2189 "sycl::queue.single_task() requires a kernel instead of command group. "
2190 "Use queue.submit() instead");
2196 CGH.template single_task<KernelName, KernelType, PropertiesT>(
2197 Properties, KernelFunc);
2207 template <
typename KernelName = detail::auto_name,
typename KernelType>
2211 return single_task<KernelName, KernelType>(
2224 typename PropertiesT>
2228 const std::vector<event> &DepEvents, PropertiesT Properties,
2235 void(kernel_handler)>::value),
2236 "sycl::queue.single_task() requires a kernel instead of command group. "
2237 "Use queue.submit() instead");
2243 CGH.template single_task<KernelName, KernelType, PropertiesT>(
2244 Properties, KernelFunc);
2255 template <
typename KernelName = detail::auto_name,
typename KernelType>
2259 return single_task<KernelName, KernelType>(
2272 return parallel_for_impl<KernelName>(Range, Rest...);
2283 return parallel_for_impl<KernelName>(Range, Rest...);
2294 return parallel_for_impl<KernelName>(Range, Rest...);
2306 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2318 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2330 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
2344 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2358 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2372 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
2388 const std::vector<event> &DepEvents,
2390 static_assert(1 <= Dim && Dim <= 3,
"Invalid number of dimensions");
2391 return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
2405 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
2411 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2428 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
2435 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2453 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
2454 const
std::vector<
event> &DepEvents,
2461 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
2475 typename PropertiesT,
typename... RestT>
2481 using KI = sycl::detail::KernelInfo<KernelName>;
2483 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2484 KI::getColumnNumber());
2488 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2503 return parallel_for<KernelName>(
2517 using KI = sycl::detail::KernelInfo<KernelName>;
2519 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2520 KI::getColumnNumber());
2525 CGH.template parallel_for<KernelName>(Range, Rest...);
2542 using KI = sycl::detail::KernelInfo<KernelName>;
2544 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2545 KI::getColumnNumber());
2550 CGH.template parallel_for<KernelName>(Range, Rest...);
2565 std::shared_ptr<DestT> Dest,
2570 CGH.
copy(Src, Dest);
2581 template <
typename SrcT,
typename DestT,
int DestDims,
access_mode DestMode,
2584 std::shared_ptr<SrcT> Src,
2590 CGH.
copy(Src, Dest);
2609 CGH.
copy(Src, Dest);
2620 template <
typename SrcT,
typename DestT,
int DestDims,
access_mode DestMode,
2629 CGH.
copy(Src, Dest);
2652 CGH.
copy(Src, Dest);
2689 CGH.
fill<T>(Dest, Src);
2700 bool ext_codeplay_supports_fusion()
const;
2703 #undef _KERNELFUNCPARAM
2747 const std::vector<event> &DepEvents,
2760 bool is_in_order()
const;
2771 bool ext_oneapi_empty() const;
2775 event ext_oneapi_get_last_event() const;
2777 void ext_oneapi_set_external_event(const
event &external_event);
2780 std::shared_ptr<detail::queue_impl> impl;
2781 queue(
std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
2783 template <
class Obj>
2788 template <backend BackendName,
class SyclObjectT>
2789 friend auto get_native(
const SyclObjectT &Obj)
2792 #if __SYCL_USE_FALLBACK_ASSERT
2798 event submit_impl(std::function<
void(
handler &)> CGH,
2801 event submit_impl(std::function<
void(
handler &)> CGH,
queue secondQueue,
2807 event discard_or_return(
const event &Event);
2815 using SubmitPostProcessF = std::function<void(
bool,
bool,
event &)>;
2822 event submit_impl_and_postprocess(std::function<
void(
handler &)> CGH,
2824 const SubmitPostProcessF &PostProcess);
2831 event submit_impl_and_postprocess(std::function<
void(
handler &)> CGH,
2834 const SubmitPostProcessF &PostProcess);
2842 template <
typename KernelName,
int Dims,
typename PropertiesT,
2848 parallel_for_impl(
range<Dims> Range, PropertiesT Properties,
2850 using KI = sycl::detail::KernelInfo<KernelName>;
2852 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2853 KI::getColumnNumber());
2857 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2867 template <
typename KernelName,
int Dims,
typename... RestT>
2868 std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value,
event>
2869 parallel_for_impl(
range<Dims> Range, RestT &&...Rest) {
2870 return parallel_for_impl<KernelName>(
2881 template <
typename KernelName,
int Dims,
typename PropertiesT,
2884 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
event>
2885 parallel_for_impl(range<Dims> Range, event DepEvent, PropertiesT Properties,
2887 using KI = sycl::detail::KernelInfo<KernelName>;
2888 constexpr detail::code_location CodeLoc(
2889 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2890 KI::getColumnNumber());
2891 detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2894 CGH.depends_on(DepEvent);
2895 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2906 template <
typename KernelName,
int Dims,
typename... RestT>
2907 event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
2908 return parallel_for_impl<KernelName>(
2921 template <
typename KernelName,
int Dims,
typename PropertiesT,
2924 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
event>
2925 parallel_for_impl(range<Dims> Range,
const std::vector<event> &DepEvents,
2926 PropertiesT Properties, RestT &&...Rest) {
2927 using KI = sycl::detail::KernelInfo<KernelName>;
2928 constexpr detail::code_location CodeLoc(
2929 KI::getFileName(), KI::getFunctionName(), KI::getLineNumber(),
2930 KI::getColumnNumber());
2931 detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
2934 CGH.depends_on(DepEvents);
2935 CGH.template parallel_for<KernelName>(Range, Properties, Rest...);
2947 template <
typename KernelName,
int Dims,
typename... RestT>
2948 event parallel_for_impl(range<Dims> Range,
2949 const std::vector<event> &DepEvents,
2951 return parallel_for_impl<KernelName>(
2956 event memcpyToDeviceGlobal(
void *DeviceGlobalPtr,
const void *Src,
2957 bool IsDeviceImageScope,
size_t NumBytes,
2959 const std::vector<event> &DepEvents);
2960 event memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
2961 bool IsDeviceImageScope,
size_t NumBytes,
2963 const std::vector<event> &DepEvents);
2970 template <>
struct __SYCL_EXPORT hash<
sycl::queue> {
2975 #if __SYCL_USE_FALLBACK_ASSERT
2977 #ifndef __STDC_FORMAT_MACROS
2978 #define __STDC_FORMAT_MACROS 1
2980 #include <cinttypes>
2983 inline namespace _V1 {
2986 #define __SYCL_ASSERT_START 1
2988 namespace __sycl_service_kernel__ {
2989 class AssertInfoCopier;
3003 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
3004 const detail::code_location &CodeLoc) {
3005 buffer<detail::AssertHappened, 1> Buffer{1};
3007 event CopierEv, CheckerEv, PostCheckerEv;
3008 auto CopierCGF = [&](handler &CGH) {
3009 CGH.depends_on(Event);
3013 CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
3014 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
3015 __devicelib_assert_read(&Acc[0]);
3021 auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
3022 CGH.depends_on(CopierEv);
3026 auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
3029 const detail::AssertHappened *AH = &Acc[0];
3034 if (AH->Flag == __SYCL_ASSERT_START)
3037 "Internal Error. Invalid value in assert description.");
3041 const char *Expr = AH->Expr[0] ? AH->Expr :
"<unknown expr>";
3042 const char *File = AH->File[0] ? AH->File :
"<unknown file>";
3043 const char *Func = AH->Func[0] ? AH->Func :
"<unknown func>";
3046 "%s:%d: %s: global id: [%" PRIu64
",%" PRIu64
",%" PRIu64
3047 "], local id: [%" PRIu64
",%" PRIu64
",%" PRIu64
"] "
3048 "Assertion `%s` failed.\n",
3049 File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
3050 AH->LID1, AH->LID2, Expr);
3057 if (SecondaryQueue) {
3058 CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
3059 CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
3061 CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
3062 CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
3067 #undef __SYCL_ASSERT_START
The file contains implementations of accessor class.
The context class represents a SYCL context on which kernel functions may be executed.
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Data type that manages the code_location information in TLS.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
Command group handler class.
void depends_on(event Event)
Registers event dependencies on this command group.
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
A unique identifier of an item in an index space.
Defines the iteration domain of both the work-groups and the overall dispatch.
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, DestT *Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
void wait(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified pattern.
event ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one 2D memory region to another, both pointed by USM pointers.
queue & operator=(const queue &RHS)=default
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from device to device memory, where Src and Dest are opaque image memory handles.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from device to device memory, where Src and Dest are opaque image memory handles.
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
queue(const context &syclContext, const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(const std::vector< event > &DepEvents, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const device_selector &DeviceSelector
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
event copy(std::shared_ptr< SrcT > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a shared_ptr to another memory region pointed to by a ...
queue(const context &syclContext, const DeviceSelector &deviceSelector, const property_list &propList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with multiple dependencies.
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsSrcPlaceholder > Src, accessor< DestT, DestDims, DestMode, DestTgt, IsDestPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, both pointed by placeholder accessors.
event copy(const T *Src, T *Dest, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
queue(queue &&RHS)=default
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event update_host(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Acc, const detail::code_location &CodeLoc=detail::code_location::current())
Provides guarantees that the memory object accessed via Acc is updated on the host after operation is...
event copy(const T *Src, T *Dest, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, void *Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range< 3 > HostExtent, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
event prefetch(const void *Ptr, size_t Count, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
queue(const DeviceSelector &deviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
__SYCL2020_DEPRECATED("is_host() is deprecated as the host device is no longer supported.") bool is_host() const
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t Offset=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
queue & operator=(queue &&RHS)=default
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
queue(const DeviceSelector &deviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance using the device identified by the device selector provided.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands.
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
__SYCL2020_DEPRECATED("SYCL 1.2.1 device selectors are deprecated. Please " "use SYCL 2020 device selectors instead.") queue(const context &SyclContext
Constructs a SYCL queue instance that is associated with the context provided, using the device retur...
event ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue with a non-blocking wait on an external semaphore.
event single_task(_KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event fill(accessor< T, Dims, Mode, Tgt, IsPlaceholder > Dest, const T &Src, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the specified memory with the specified data.
bool operator==(const queue &RHS) const
queue(const queue &RHS)=default
Constructs a SYCL queue with an optional async_handler from an OpenCL cl_command_queue.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &ImageDesc, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from device to device memory, where Src and Dest are opaque image memory handles.
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
void wait_and_throw(const detail::code_location &CodeLoc=detail::code_location::current())
Performs a blocking wait for the completion of all enqueued tasks in the queue.
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
event memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a USM memory region to a device_global.
event ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Shortcut for executing a graph of commands with a single dependency.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
event copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
event copy(const SrcT *Src, accessor< DestT, DestDims, DestMode, DestTgt, IsPlaceholder > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a raw pointer to another memory region pointed to by a...
event copy(accessor< SrcT, SrcDims, SrcMode, SrcTgt, IsPlaceholder > Src, std::shared_ptr< DestT > Dest, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a memory region pointed to by a placeholder accessor to another memory region pointe...
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > single_task(event DepEvent, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc), const detail::code_location &CodeLoc=detail::code_location::current())
single_task version with a kernel represented as a lambda.
event ext_oneapi_copy(void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src and Dest are USM pointers.
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
event ext_oneapi_copy(ext::oneapi::experimental::image_mem_handle Src, sycl::range< 3 > SrcOffset, const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range< 3 > DestOffset, sycl::range< 3 > DestExtent, sycl::range< 3 > CopyExtent, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is an opaque image memory handle and Dest is...
event copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count, size_t StartIndex, event DepEvent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
event ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, queue &SecondaryQueue, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
event ext_oneapi_copy(void *Src, sycl::range< 3 > SrcOffset, sycl::range< 3 > SrcExtent, ext::oneapi::experimental::image_mem_handle Dest, sycl::range< 3 > DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range< 3 > CopyExtent, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
event memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from a device_global to USM memory.
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value, event > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
bool operator!=(const queue &RHS) const
Defines the iteration domain of either a single work-group in a parallel dispatch,...
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
void defaultAsyncHandler(exception_list Exceptions)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
@ modifiable
In modifiable state, commands can be added to graph.
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
constexpr device_has_key::value_t< Aspects... > device_has
static constexpr auto get_property()
void submit(queue Q, CommandGroupFunc &&CGF)
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
decltype(properties{}) empty_properties_t
signed char __SYCL2020_DEPRECATED
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
uintptr_t pi_native_handle
#define _KERNELFUNCPARAM(a)
_Abi const simd< _Tp, _Abi > & noexcept
Predicate returning true if all template type parameters except the last one are reductions.
static constexpr code_location current(const char *fileName=__CODELOC_FILE_NAME, const char *funcName=__CODELOC_FUNCTION, unsigned long lineNo=__CODELOC_LINE, unsigned long columnNo=__CODELOC_COLUMN) noexcept
A struct to describe the properties of an image.
Opaque image memory handle type.
Opaque interop semaphore handle type.