26 #ifndef __STDC_FORMAT_MACROS
27 #define __STDC_FORMAT_MACROS 1
44 #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
45 #define _CODELOCONLYPARAM(a) \
46 const detail::code_location a = detail::code_location::current()
47 #define _CODELOCPARAM(a) \
48 , const detail::code_location a = detail::code_location::current()
50 #define _CODELOCARG(a)
51 #define _CODELOCFW(a) , a
53 #define _CODELOCONLYPARAM(a)
54 #define _CODELOCPARAM(a)
56 #define _CODELOCARG(a) const detail::code_location a = {}
62 #ifdef __SYCL_NONCONST_FUNCTOR__
63 #define _KERNELFUNCPARAM(a) KernelType a
65 #define _KERNELFUNCPARAM(a) const KernelType &a
70 #if defined(SYCL_FALLBACK_ASSERT)
71 #define __SYCL_USE_FALLBACK_ASSERT SYCL_FALLBACK_ASSERT
73 #define __SYCL_USE_FALLBACK_ASSERT 0
86 #if __SYCL_USE_FALLBACK_ASSERT
88 const detail::code_location &);
153 const property_list &PropList = {});
161 queue(
const context &SyclContext,
const device_selector &DeviceSelector,
162 const property_list &PropList = {});
172 queue(
const context &SyclContext,
const device_selector &DeviceSelector,
173 const async_handler &AsyncHandler,
const property_list &PropList = {});
181 queue(
const context &SyclContext,
const device &SyclDevice,
182 const property_list &PropList = {});
191 queue(
const context &SyclContext,
const device &SyclDevice,
192 const async_handler &AsyncHandler,
const property_list &PropList = {});
202 #ifdef __SYCL_INTERNAL_API
203 queue(cl_command_queue ClQueue,
const context &SyclContext,
221 #ifdef __SYCL_INTERNAL_API
222 cl_command_queue
get()
const;
229 device get_device()
const;
232 bool is_host()
const;
237 template <info::queue param>
243 bool device_has(
aspect Aspect)
const;
255 #if __SYCL_USE_FALLBACK_ASSERT
257 auto PostProcess = [
this, &CodeLoc](
bool IsKernel,
bool KernelUsesAssert,
259 if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
260 KernelUsesAssert && !device_has(aspect::accelerator)) {
265 submitAssertCapture(*
this, E,
nullptr,
270 auto Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
271 return discard_or_return(Event);
273 #endif // __SYCL_USE_FALLBACK_ASSERT
275 auto Event = submit_impl(CGF, CodeLoc);
276 return discard_or_return(Event);
291 template <
typename T>
295 #if __SYCL_USE_FALLBACK_ASSERT
297 auto PostProcess = [
this, &SecondaryQueue, &CodeLoc](
298 bool IsKernel,
bool KernelUsesAssert,
event &E) {
299 if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
300 KernelUsesAssert && !device_has(aspect::accelerator)) {
305 queue *DeviceSecondaryQueue =
306 SecondaryQueue.
is_host() ? nullptr : &SecondaryQueue;
311 submitAssertCapture(*
this, E, DeviceSecondaryQueue, CodeLoc);
315 auto Event = submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
317 return discard_or_return(Event);
319 #endif // __SYCL_USE_FALLBACK_ASSERT
321 auto Event = submit_impl(CGF, SecondaryQueue, CodeLoc);
322 return discard_or_return(Event);
348 return ext_oneapi_submit_barrier(CodeLoc);
361 const std::vector<event> &WaitList
_CODELOCPARAM(&CodeLoc)) {
362 return submit([=](
handler &CGH) {
380 return ext_oneapi_submit_barrier(WaitList, CodeLoc);
405 wait_and_throw_proxy(CodeLoc);
420 void throw_asynchronous();
424 template <
typename PropertyT>
bool has_property()
const;
429 template <
typename PropertyT> PropertyT
get_property()
const;
438 template <
typename T>
event fill(
void *Ptr,
const T &Pattern,
size_t Count) {
439 return submit([&](
handler &CGH) { CGH.
fill<T>(Ptr, Pattern, Count); });
450 template <
typename T>
451 event fill(
void *Ptr,
const T &Pattern,
size_t Count,
event DepEvent) {
452 return submit([&](
handler &CGH) {
454 CGH.
fill<T>(Ptr, Pattern, Count);
467 template <
typename T>
468 event fill(
void *Ptr,
const T &Pattern,
size_t Count,
469 const std::vector<event> &DepEvents) {
470 return submit([&](
handler &CGH) {
472 CGH.
fill<T>(Ptr, Pattern, Count);
485 event memset(
void *Ptr,
int Value,
size_t Count);
497 event memset(
void *Ptr,
int Value,
size_t Count,
event DepEvent);
510 event memset(
void *Ptr,
int Value,
size_t Count,
511 const std::vector<event> &DepEvents);
523 event memcpy(
void *Dest,
const void *Src,
size_t Count);
536 event memcpy(
void *Dest,
const void *Src,
size_t Count,
event DepEvent);
550 event memcpy(
void *Dest,
const void *Src,
size_t Count,
551 const std::vector<event> &DepEvents);
563 template <
typename T>
event copy(
const T *Src, T *Dest,
size_t Count) {
564 return this->
memcpy(Dest, Src, Count *
sizeof(T));
578 template <
typename T>
579 event copy(
const T *Src, T *Dest,
size_t Count,
event DepEvent) {
580 return this->
memcpy(Dest, Src, Count *
sizeof(T), DepEvent);
594 template <
typename T>
595 event copy(
const T *Src, T *Dest,
size_t Count,
596 const std::vector<event> &DepEvents) {
597 return this->
memcpy(Dest, Src, Count *
sizeof(T), DepEvents);
617 event mem_advise(const
void *Ptr,
size_t Length,
int Advice);
627 event mem_advise(const
void *Ptr,
size_t Length,
int Advice,
event DepEvent);
638 event mem_advise(const
void *Ptr,
size_t Length,
int Advice,
639 const
std::vector<
event> &DepEvents);
648 event prefetch(const
void *Ptr,
size_t Count) {
661 return submit([=](
handler &CGH) {
677 const std::vector<event> &DepEvents) {
678 return submit([=](
handler &CGH) {
688 template <
typename KernelName = detail::auto_name,
typename KernelType>
695 "sycl::queue.single_task() requires a kernel instead of command group. "
696 "Use queue.submit() instead");
700 CGH.template single_task<KernelName, KernelType>(
KernelFunc);
710 template <
typename KernelName = detail::auto_name,
typename KernelType>
718 "sycl::queue.single_task() requires a kernel instead of command group. "
719 "Use queue.submit() instead");
724 CGH.template single_task<KernelName, KernelType>(
KernelFunc);
735 template <
typename KernelName = detail::auto_name,
typename KernelType>
743 "sycl::queue.single_task() requires a kernel instead of command group. "
744 "Use queue.submit() instead");
749 CGH.template single_task<KernelName, KernelType>(
KernelFunc);
762 return parallel_for_impl<KernelName>(Range, Rest...);
773 return parallel_for_impl<KernelName>(Range, Rest...);
784 return parallel_for_impl<KernelName>(Range, Rest...);
796 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
808 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
820 return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
834 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
848 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
862 return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
875 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
881 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
898 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
905 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
923 event parallel_for_impl(
range<Dims> Range,
id<Dims> WorkItemOffset,
924 const
std::vector<
event> &DepEvents,
931 CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
952 CGH.template parallel_for<KernelName>(Range, Rest...);
972 CGH.template parallel_for<KernelName>(Range, Rest...);
994 CGH.template parallel_for<KernelName>(Range, Rest...);
1000 #undef _CODELOCPARAM
1001 #undef _CODELOCONLYPARAM
1004 #undef _KERNELFUNCPARAM
1009 bool is_in_order()
const;
1014 backend get_backend() const noexcept;
1028 std::shared_ptr<detail::queue_impl> impl;
1029 queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
1031 template <
class Obj>
1036 #if __SYCL_USE_FALLBACK_ASSERT
1037 friend event detail::submitAssertCapture(
queue &, event &,
queue *,
1038 const detail::code_location &);
1042 event submit_impl(std::function<
void(handler &)> CGH,
1043 const detail::code_location &CodeLoc);
1045 event submit_impl(std::function<
void(handler &)> CGH,
queue secondQueue,
1046 const detail::code_location &CodeLoc);
1050 event discard_or_return(
const event &Event);
1058 using SubmitPostProcessF = std::function<void(
bool,
bool, event &)>;
1065 event submit_impl_and_postprocess(std::function<
void(handler &)> CGH,
1066 const detail::code_location &CodeLoc,
1067 const SubmitPostProcessF &PostProcess);
1074 event submit_impl_and_postprocess(std::function<
void(handler &)> CGH,
1076 const detail::code_location &CodeLoc,
1077 const SubmitPostProcessF &PostProcess);
1084 template <
typename KernelName,
int Dims,
typename... RestT>
1086 ext::oneapi::detail::AreAllButLastReductions<RestT...>::value,
event>
1087 parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
1089 const detail::code_location CodeLoc = {};
1092 CGH.template parallel_for<KernelName>(Range, Rest...);
1104 template <
typename KernelName,
int Dims,
typename... RestT>
1105 event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
1107 const detail::code_location CodeLoc = {};
1110 CGH.depends_on(DepEvent);
1111 CGH.template parallel_for<KernelName>(Range, Rest...);
1123 template <
typename KernelName,
int Dims,
typename... RestT>
1124 event parallel_for_impl(range<Dims> Range,
1125 const std::vector<event> &DepEvents,
1128 const detail::code_location CodeLoc = {};
1131 CGH.depends_on(DepEvents);
1132 CGH.template parallel_for<KernelName>(Range, Rest...);
1137 buffer<detail::AssertHappened, 1> &getAssertHappenedBuffer();
1141 #if __SYCL_USE_FALLBACK_ASSERT
1142 #define __SYCL_ASSERT_START 1
1154 event submitAssertCapture(
queue &Self, event &Event,
queue *SecondaryQueue,
1155 const detail::code_location &CodeLoc) {
1156 using AHBufT = buffer<detail::AssertHappened, 1>;
1158 AHBufT &Buffer = Self.getAssertHappenedBuffer();
1160 event CopierEv, CheckerEv, PostCheckerEv;
1161 auto CopierCGF = [&](handler &CGH) {
1162 CGH.depends_on(Event);
1166 CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
1167 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
1168 __devicelib_assert_read(&Acc[0]);
1171 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
1174 auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
1175 CGH.depends_on(CopierEv);
1179 auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
1182 const detail::AssertHappened *AH = &Acc[0];
1187 if (AH->Flag == __SYCL_ASSERT_START)
1188 throw sycl::runtime_error(
1189 "Internal Error. Invalid value in assert description.",
1190 PI_ERROR_INVALID_VALUE);
1194 const char *Expr = AH->Expr[0] ? AH->Expr :
"<unknown expr>";
1195 const char *File = AH->File[0] ? AH->File :
"<unknown file>";
1196 const char *Func = AH->Func[0] ? AH->Func :
"<unknown func>";
1199 "%s:%d: %s: global id: [%" PRIu64
",%" PRIu64
",%" PRIu64
1200 "], local id: [%" PRIu64
",%" PRIu64
",%" PRIu64
"] "
1201 "Assertion `%s` failed.\n",
1202 File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
1203 AH->LID1, AH->LID2, Expr);
1210 if (SecondaryQueue) {
1211 CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
1212 CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
1214 CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
1215 CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
1220 #undef __SYCL_ASSERT_START
1221 #endif // __SYCL_USE_FALLBACK_ASSERT
1230 return std::hash<std::shared_ptr<cl::sycl::detail::queue_impl>>()(
1236 #undef __SYCL_USE_FALLBACK_ASSERT