DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
16 #include <CL/sycl/device.hpp>
18 #include <CL/sycl/event.hpp>
20 #include <CL/sycl/handler.hpp>
23 #include <CL/sycl/stl.hpp>
24 
25 // Explicitly request format macros
26 #ifndef __STDC_FORMAT_MACROS
27 #define __STDC_FORMAT_MACROS 1
28 #endif
29 #include <cinttypes>
30 #include <utility>
31 
32 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
33 // Here we simplify the &CodeLoc declaration to be _CODELOCPARAM(&CodeLoc) and
34 // _CODELOCARG(&CodeLoc) Similarly, the KernelFunc param is simplified to be
35 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
36 // these macros are #undef immediately.
37 
38 // replace _CODELOCPARAM(&CodeLoc) with nothing
39 // or : , const detail::code_location &CodeLoc =
40 // detail::code_location::current()
41 // replace _CODELOCARG(&CodeLoc) with nothing
42 // or : const detail::code_location &CodeLoc = {}
43 
44 #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA
45 #define _CODELOCONLYPARAM(a) \
46  const detail::code_location a = detail::code_location::current()
47 #define _CODELOCPARAM(a) \
48  , const detail::code_location a = detail::code_location::current()
49 
50 #define _CODELOCARG(a)
51 #define _CODELOCFW(a) , a
52 #else
53 #define _CODELOCONLYPARAM(a)
54 #define _CODELOCPARAM(a)
55 
56 #define _CODELOCARG(a) const detail::code_location a = {}
57 #define _CODELOCFW(a)
58 #endif
59 
60 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
61 // or const KernelType &KernelFunc
62 #ifdef __SYCL_NONCONST_FUNCTOR__
63 #define _KERNELFUNCPARAM(a) KernelType a
64 #else
65 #define _KERNELFUNCPARAM(a) const KernelType &a
66 #endif
67 
68 // Helper macro to identify if fallback assert is needed
69 // FIXME remove __NVPTX__ condition once devicelib supports CUDA
70 #if defined(SYCL_FALLBACK_ASSERT)
71 #define __SYCL_USE_FALLBACK_ASSERT SYCL_FALLBACK_ASSERT
72 #else
73 #define __SYCL_USE_FALLBACK_ASSERT 0
74 #endif
75 
77 namespace sycl {
78 
79 // Forward declaration
80 class context;
81 class device;
82 class queue;
83 
84 namespace detail {
85 class queue_impl;
86 #if __SYCL_USE_FALLBACK_ASSERT
87 static event submitAssertCapture(queue &, event &, queue *,
88  const detail::code_location &);
89 #endif
90 } // namespace detail
91 
103 class __SYCL_EXPORT queue {
104 public:
109  explicit queue(const property_list &PropList = {})
110  : queue(default_selector(), async_handler{}, PropList) {}
111 
117  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
118  : queue(default_selector(), AsyncHandler, PropList) {}
119 
125  queue(const device_selector &DeviceSelector,
126  const property_list &PropList = {})
127  : queue(DeviceSelector.select_device(), async_handler{}, PropList) {}
128 
135  queue(const device_selector &DeviceSelector,
136  const async_handler &AsyncHandler, const property_list &PropList = {})
137  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
138 
143  explicit queue(const device &SyclDevice, const property_list &PropList = {})
144  : queue(SyclDevice, async_handler{}, PropList) {}
145 
152  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
153  const property_list &PropList = {});
154 
161  queue(const context &SyclContext, const device_selector &DeviceSelector,
162  const property_list &PropList = {});
163 
172  queue(const context &SyclContext, const device_selector &DeviceSelector,
173  const async_handler &AsyncHandler, const property_list &PropList = {});
174 
181  queue(const context &SyclContext, const device &SyclDevice,
182  const property_list &PropList = {});
183 
191  queue(const context &SyclContext, const device &SyclDevice,
192  const async_handler &AsyncHandler, const property_list &PropList = {});
193 
202 #ifdef __SYCL_INTERNAL_API
203  queue(cl_command_queue ClQueue, const context &SyclContext,
204  const async_handler &AsyncHandler = {});
205 #endif
206 
207  queue(const queue &RHS) = default;
208 
209  queue(queue &&RHS) = default;
210 
211  queue &operator=(const queue &RHS) = default;
212 
213  queue &operator=(queue &&RHS) = default;
214 
215  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
216 
217  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
218 
221 #ifdef __SYCL_INTERNAL_API
222  cl_command_queue get() const;
223 #endif
224 
226  context get_context() const;
227 
229  device get_device() const;
230 
232  bool is_host() const;
233 
237  template <info::queue param>
238  typename info::param_traits<info::queue, param>::return_type get_info() const;
239 
240 private:
241  // A shorthand for `get_device().has()' which is expected to be a bit quicker
242  // than the long version
243  bool device_has(aspect Aspect) const;
244 
245 public:
252  template <typename T> event submit(T CGF _CODELOCPARAM(&CodeLoc)) {
253  _CODELOCARG(&CodeLoc);
254 
255 #if __SYCL_USE_FALLBACK_ASSERT
256  if (!is_host()) {
257  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
258  event &E) {
259  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
260  KernelUsesAssert && !device_has(aspect::accelerator)) {
261  // __devicelib_assert_fail isn't supported by Device-side Runtime
262  // Linking against fallback impl of __devicelib_assert_fail is
263  // performed by program manager class
264  // Fallback assert isn't supported for FPGA
265  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr,
266  CodeLoc);
267  }
268  };
269 
270  auto Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
271  return discard_or_return(Event);
272  } else
273 #endif // __SYCL_USE_FALLBACK_ASSERT
274  {
275  auto Event = submit_impl(CGF, CodeLoc);
276  return discard_or_return(Event);
277  }
278  }
279 
291  template <typename T>
292  event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc)) {
293  _CODELOCARG(&CodeLoc);
294 
295 #if __SYCL_USE_FALLBACK_ASSERT
296  if (!is_host()) {
297  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
298  bool IsKernel, bool KernelUsesAssert, event &E) {
299  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
300  KernelUsesAssert && !device_has(aspect::accelerator)) {
301  // Only secondary queues on devices need to be added to the assert
302  // capture.
303  // TODO: Handle case where primary queue is host but the secondary
304  // queue is not.
305  queue *DeviceSecondaryQueue =
306  SecondaryQueue.is_host() ? nullptr : &SecondaryQueue;
307  // __devicelib_assert_fail isn't supported by Device-side Runtime
308  // Linking against fallback impl of __devicelib_assert_fail is
309  // performed by program manager class
310  // Fallback assert isn't supported for FPGA
311  submitAssertCapture(*this, E, DeviceSecondaryQueue, CodeLoc);
312  }
313  };
314 
315  auto Event = submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc,
316  PostProcess);
317  return discard_or_return(Event);
318  } else
319 #endif // __SYCL_USE_FALLBACK_ASSERT
320  {
321  auto Event = submit_impl(CGF, SecondaryQueue, CodeLoc);
322  return discard_or_return(Event);
323  }
324  }
325 
334  return submit(
335  [=](handler &CGH) { CGH.ext_oneapi_barrier(); } _CODELOCFW(CodeLoc));
336  }
337 
345  __SYCL2020_DEPRECATED("use 'ext_oneapi_submit_barrier' instead")
346  event submit_barrier(_CODELOCONLYPARAM(&CodeLoc)) {
347  _CODELOCARG(&CodeLoc);
348  return ext_oneapi_submit_barrier(CodeLoc);
349  }
350 
361  const std::vector<event> &WaitList _CODELOCPARAM(&CodeLoc)) {
362  return submit([=](handler &CGH) {
363  CGH.ext_oneapi_barrier(WaitList);
364  } _CODELOCFW(CodeLoc));
365  }
366 
376  __SYCL2020_DEPRECATED("use 'ext_oneapi_submit_barrier' instead")
377  event
378  submit_barrier(const std::vector<event> &WaitList _CODELOCPARAM(&CodeLoc)) {
379  _CODELOCARG(&CodeLoc);
380  return ext_oneapi_submit_barrier(WaitList, CodeLoc);
381  }
382 
388  void wait(_CODELOCONLYPARAM(&CodeLoc)) {
389  _CODELOCARG(&CodeLoc);
390 
391  wait_proxy(CodeLoc);
392  }
393 
403  _CODELOCARG(&CodeLoc);
404 
405  wait_and_throw_proxy(CodeLoc);
406  }
407 
410  void wait_proxy(const detail::code_location &CodeLoc);
413  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
414 
420  void throw_asynchronous();
421 
424  template <typename PropertyT> bool has_property() const;
425 
429  template <typename PropertyT> PropertyT get_property() const;
430 
438  template <typename T> event fill(void *Ptr, const T &Pattern, size_t Count) {
439  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); });
440  }
441 
450  template <typename T>
451  event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent) {
452  return submit([&](handler &CGH) {
453  CGH.depends_on(DepEvent);
454  CGH.fill<T>(Ptr, Pattern, Count);
455  });
456  }
457 
467  template <typename T>
468  event fill(void *Ptr, const T &Pattern, size_t Count,
469  const std::vector<event> &DepEvents) {
470  return submit([&](handler &CGH) {
471  CGH.depends_on(DepEvents);
472  CGH.fill<T>(Ptr, Pattern, Count);
473  });
474  }
475 
485  event memset(void *Ptr, int Value, size_t Count);
486 
497  event memset(void *Ptr, int Value, size_t Count, event DepEvent);
498 
510  event memset(void *Ptr, int Value, size_t Count,
511  const std::vector<event> &DepEvents);
512 
523  event memcpy(void *Dest, const void *Src, size_t Count);
524 
536  event memcpy(void *Dest, const void *Src, size_t Count, event DepEvent);
537 
550  event memcpy(void *Dest, const void *Src, size_t Count,
551  const std::vector<event> &DepEvents);
552 
563  template <typename T> event copy(const T *Src, T *Dest, size_t Count) {
564  return this->memcpy(Dest, Src, Count * sizeof(T));
565  }
566 
578  template <typename T>
579  event copy(const T *Src, T *Dest, size_t Count, event DepEvent) {
580  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
581  }
582 
594  template <typename T>
595  event copy(const T *Src, T *Dest, size_t Count,
596  const std::vector<event> &DepEvents) {
597  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
598  }
599 
607  __SYCL2020_DEPRECATED("use the overload with int Advice instead")
608  event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice);
609 
617  event mem_advise(const void *Ptr, size_t Length, int Advice);
618 
627  event mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent);
628 
638  event mem_advise(const void *Ptr, size_t Length, int Advice,
639  const std::vector<event> &DepEvents);
640 
648  event prefetch(const void *Ptr, size_t Count) {
649  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); });
650  }
651 
660  event prefetch(const void *Ptr, size_t Count, event DepEvent) {
661  return submit([=](handler &CGH) {
662  CGH.depends_on(DepEvent);
663  CGH.prefetch(Ptr, Count);
664  });
665  }
666 
676  event prefetch(const void *Ptr, size_t Count,
677  const std::vector<event> &DepEvents) {
678  return submit([=](handler &CGH) {
679  CGH.depends_on(DepEvents);
680  CGH.prefetch(Ptr, Count);
681  });
682  }
683 
688  template <typename KernelName = detail::auto_name, typename KernelType>
690  static_assert(
692  void()>::value ||
694  void(kernel_handler)>::value),
695  "sycl::queue.single_task() requires a kernel instead of command group. "
696  "Use queue.submit() instead");
697  _CODELOCARG(&CodeLoc);
698  return submit(
699  [&](handler &CGH) {
700  CGH.template single_task<KernelName, KernelType>(KernelFunc);
701  },
702  CodeLoc);
703  }
704 
710  template <typename KernelName = detail::auto_name, typename KernelType>
711  event single_task(event DepEvent,
713  static_assert(
715  void()>::value ||
717  void(kernel_handler)>::value),
718  "sycl::queue.single_task() requires a kernel instead of command group. "
719  "Use queue.submit() instead");
720  _CODELOCARG(&CodeLoc);
721  return submit(
722  [&](handler &CGH) {
723  CGH.depends_on(DepEvent);
724  CGH.template single_task<KernelName, KernelType>(KernelFunc);
725  },
726  CodeLoc);
727  }
728 
735  template <typename KernelName = detail::auto_name, typename KernelType>
736  event single_task(const std::vector<event> &DepEvents,
738  static_assert(
740  void()>::value ||
742  void(kernel_handler)>::value),
743  "sycl::queue.single_task() requires a kernel instead of command group. "
744  "Use queue.submit() instead");
745  _CODELOCARG(&CodeLoc);
746  return submit(
747  [&](handler &CGH) {
748  CGH.depends_on(DepEvents);
749  CGH.template single_task<KernelName, KernelType>(KernelFunc);
750  },
751  CodeLoc);
752  }
753 
760  template <typename KernelName = detail::auto_name, typename... RestT>
761  event parallel_for(range<1> Range, RestT &&...Rest) {
762  return parallel_for_impl<KernelName>(Range, Rest...);
763  }
764 
771  template <typename KernelName = detail::auto_name, typename... RestT>
772  event parallel_for(range<2> Range, RestT &&...Rest) {
773  return parallel_for_impl<KernelName>(Range, Rest...);
774  }
775 
782  template <typename KernelName = detail::auto_name, typename... RestT>
783  event parallel_for(range<3> Range, RestT &&...Rest) {
784  return parallel_for_impl<KernelName>(Range, Rest...);
785  }
786 
794  template <typename KernelName = detail::auto_name, typename... RestT>
795  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
796  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
797  }
798 
806  template <typename KernelName = detail::auto_name, typename... RestT>
807  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
808  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
809  }
810 
818  template <typename KernelName = detail::auto_name, typename... RestT>
819  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
820  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
821  }
822 
831  template <typename KernelName = detail::auto_name, typename... RestT>
832  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
833  RestT &&...Rest) {
834  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
835  }
836 
845  template <typename KernelName = detail::auto_name, typename... RestT>
846  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
847  RestT &&...Rest) {
848  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
849  }
850 
859  template <typename KernelName = detail::auto_name, typename... RestT>
860  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
861  RestT &&...Rest) {
862  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
863  }
864 
872  template <typename KernelName = detail::auto_name, typename KernelType,
873  int Dims>
874  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
875  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
877  // Actual code location needs to be captured from KernelInfo object.
878  const detail::code_location CodeLoc = {};
879  return submit(
880  [&](handler &CGH) {
881  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
882  KernelFunc);
883  },
884  CodeLoc);
885  }
886 
895  template <typename KernelName = detail::auto_name, typename KernelType,
896  int Dims>
897  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
898  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
899  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
900  // Actual code location needs to be captured from KernelInfo object.
901  const detail::code_location CodeLoc = {};
902  return submit(
903  [&](handler &CGH) {
904  CGH.depends_on(DepEvent);
905  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
906  KernelFunc);
907  },
908  CodeLoc);
909  }
910 
920  template <typename KernelName = detail::auto_name, typename KernelType,
921  int Dims>
922  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
923  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
924  const std::vector<event> &DepEvents,
926  // Actual code location needs to be captured from KernelInfo object.
927  const detail::code_location CodeLoc = {};
928  return submit(
929  [&](handler &CGH) {
930  CGH.depends_on(DepEvents);
931  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
932  KernelFunc);
933  },
934  CodeLoc);
935  }
936 
943  template <typename KernelName = detail::auto_name, int Dims,
944  typename... RestT>
946  ext::oneapi::detail::AreAllButLastReductions<RestT...>::value, event>
947  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
948  // Actual code location needs to be captured from KernelInfo object.
949  const detail::code_location CodeLoc = {};
950  return submit(
951  [&](handler &CGH) {
952  CGH.template parallel_for<KernelName>(Range, Rest...);
953  },
954  CodeLoc);
955  }
956 
964  template <typename KernelName = detail::auto_name, int Dims,
965  typename... RestT>
966  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
967  // Actual code location needs to be captured from KernelInfo object.
968  const detail::code_location CodeLoc = {};
969  return submit(
970  [&](handler &CGH) {
971  CGH.depends_on(DepEvent);
972  CGH.template parallel_for<KernelName>(Range, Rest...);
973  },
974  CodeLoc);
975  }
976 
985  template <typename KernelName = detail::auto_name, int Dims,
986  typename... RestT>
987  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
988  RestT &&...Rest) {
989  // Actual code location needs to be captured from KernelInfo object.
990  const detail::code_location CodeLoc = {};
991  return submit(
992  [&](handler &CGH) {
993  CGH.depends_on(DepEvents);
994  CGH.template parallel_for<KernelName>(Range, Rest...);
995  },
996  CodeLoc);
997  }
998 
999 // Clean up CODELOC and KERNELFUNC macros.
1000 #undef _CODELOCPARAM
1001 #undef _CODELOCONLYPARAM
1002 #undef _CODELOCARG
1003 #undef _CODELOCFW
1004 #undef _KERNELFUNCPARAM
1005 
1009  bool is_in_order() const;
1010 
1014  backend get_backend() const noexcept;
1015 
1019  template <backend Backend>
1020  __SYCL_DEPRECATED("Use SYCL 2020 sycl::get_native free function")
1021  backend_return_t<Backend, queue> get_native() const {
1022  return reinterpret_cast<backend_return_t<Backend, queue>>(getNative());
1023  }
1024 
1025 private:
1026  pi_native_handle getNative() const;
1027 
1028  std::shared_ptr<detail::queue_impl> impl;
1029  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
1030 
1031  template <class Obj>
1032  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
1033  template <class T>
1034  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
1035 
1036 #if __SYCL_USE_FALLBACK_ASSERT
1037  friend event detail::submitAssertCapture(queue &, event &, queue *,
1038  const detail::code_location &);
1039 #endif
1040 
1042  event submit_impl(std::function<void(handler &)> CGH,
1043  const detail::code_location &CodeLoc);
1045  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
1046  const detail::code_location &CodeLoc);
1047 
1050  event discard_or_return(const event &Event);
1051 
1052  // Function to postprocess submitted command
1053  // Arguments:
1054  // bool IsKernel - true if the submitted command was kernel, false otherwise
1055  // bool KernelUsesAssert - true if submitted kernel uses assert, only
1056  // meaningful when IsKernel is true
1057  // event &Event - event after which post processing should be executed
1058  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
1059 
1065  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
1066  const detail::code_location &CodeLoc,
1067  const SubmitPostProcessF &PostProcess);
1074  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
1075  queue secondQueue,
1076  const detail::code_location &CodeLoc,
1077  const SubmitPostProcessF &PostProcess);
1078 
1084  template <typename KernelName, int Dims, typename... RestT>
1086  ext::oneapi::detail::AreAllButLastReductions<RestT...>::value, event>
1087  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
1088  // Actual code location needs to be captured from KernelInfo object.
1089  const detail::code_location CodeLoc = {};
1090  return submit(
1091  [&](handler &CGH) {
1092  CGH.template parallel_for<KernelName>(Range, Rest...);
1093  },
1094  CodeLoc);
1095  }
1096 
1104  template <typename KernelName, int Dims, typename... RestT>
1105  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
1106  // Actual code location needs to be captured from KernelInfo object.
1107  const detail::code_location CodeLoc = {};
1108  return submit(
1109  [&](handler &CGH) {
1110  CGH.depends_on(DepEvent);
1111  CGH.template parallel_for<KernelName>(Range, Rest...);
1112  },
1113  CodeLoc);
1114  }
1115 
1123  template <typename KernelName, int Dims, typename... RestT>
1124  event parallel_for_impl(range<Dims> Range,
1125  const std::vector<event> &DepEvents,
1126  RestT &&...Rest) {
1127  // Actual code location needs to be captured from KernelInfo object.
1128  const detail::code_location CodeLoc = {};
1129  return submit(
1130  [&](handler &CGH) {
1131  CGH.depends_on(DepEvents);
1132  CGH.template parallel_for<KernelName>(Range, Rest...);
1133  },
1134  CodeLoc);
1135  }
1136 
1137  buffer<detail::AssertHappened, 1> &getAssertHappenedBuffer();
1138 };
1139 
1140 namespace detail {
1141 #if __SYCL_USE_FALLBACK_ASSERT
1142 #define __SYCL_ASSERT_START 1
1143 
1154 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
1155  const detail::code_location &CodeLoc) {
1156  using AHBufT = buffer<detail::AssertHappened, 1>;
1157 
1158  AHBufT &Buffer = Self.getAssertHappenedBuffer();
1159 
1160  event CopierEv, CheckerEv, PostCheckerEv;
1161  auto CopierCGF = [&](handler &CGH) {
1162  CGH.depends_on(Event);
1163 
1164  auto Acc = Buffer.get_access<access::mode::write>(CGH);
1165 
1166  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
1167 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
1168  __devicelib_assert_read(&Acc[0]);
1169 #else
1170  (void)Acc;
1171 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
1172  });
1173  };
1174  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
1175  CGH.depends_on(CopierEv);
1176  using mode = access::mode;
1177  using target = access::target;
1178 
1179  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
1180 
1181  CGH.host_task([=] {
1182  const detail::AssertHappened *AH = &Acc[0];
1183 
1184  // Don't use assert here as msvc will insert reference to __imp__wassert
1185  // which won't be properly resolved in separate compile use-case
1186 #ifndef NDEBUG
1187  if (AH->Flag == __SYCL_ASSERT_START)
1188  throw sycl::runtime_error(
1189  "Internal Error. Invalid value in assert description.",
1190  PI_ERROR_INVALID_VALUE);
1191 #endif
1192 
1193  if (AH->Flag) {
1194  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
1195  const char *File = AH->File[0] ? AH->File : "<unknown file>";
1196  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
1197 
1198  fprintf(stderr,
1199  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
1200  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
1201  "Assertion `%s` failed.\n",
1202  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
1203  AH->LID1, AH->LID2, Expr);
1204  fflush(stderr);
1205  abort(); // no need to release memory as it's abort anyway
1206  }
1207  });
1208  };
1209 
1210  if (SecondaryQueue) {
1211  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
1212  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
1213  } else {
1214  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
1215  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
1216  }
1217 
1218  return CheckerEv;
1219 }
1220 #undef __SYCL_ASSERT_START
1221 #endif // __SYCL_USE_FALLBACK_ASSERT
1222 } // namespace detail
1223 
1224 } // namespace sycl
1225 } // __SYCL_INLINE_NAMESPACE(cl)
1226 
1227 namespace std {
1228 template <> struct hash<cl::sycl::queue> {
1229  size_t operator()(const cl::sycl::queue &Q) const {
1230  return std::hash<std::shared_ptr<cl::sycl::detail::queue_impl>>()(
1232  }
1233 };
1234 } // namespace std
1235 
1236 #undef __SYCL_USE_FALLBACK_ASSERT
cl::sycl::nd_range
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:23
cl::sycl::backend
backend
Definition: backend_types.hpp:21
property_list.hpp
cl::sycl::device_selector
The device_selector class provides ability to choose the best SYCL device based on heuristics specifi...
Definition: device_selector.hpp:28
cl::sycl::detail::check_fn_signature
Definition: cg_types.hpp:126
cl::sycl::backend_return_t
typename backend_traits< Backend >::template return_type< SyclType > backend_return_t
Definition: backend.hpp:72
cl::sycl::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:32
cl::sycl::queue::parallel_for
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:819
cl::sycl::device_selector::select_device
virtual device select_device() const
Definition: device_selector.cpp:53
cl::sycl::info::param_traits
Definition: info_desc.hpp:310
cl::sycl::queue::prefetch
event prefetch(const void *Ptr, size_t Count, event DepEvent)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:660
__SYCL2020_DEPRECATED
#define __SYCL2020_DEPRECATED(message)
Definition: defines_elementary.hpp:56
cl::sycl::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:451
_CODELOCPARAM
#define _CODELOCPARAM(a)
Definition: queue.hpp:47
cl::sycl::get_native
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:123
cl::sycl::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:468
stl.hpp
device_selector.hpp
cl::sycl::detail::createSyclObjFromImpl
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: common.hpp:221
device.hpp
cl::sycl::queue::queue
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:109
cl::sycl::queue::parallel_for
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:987
cl::sycl::queue::parallel_for
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:966
cl::sycl::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:438
cl::sycl::queue::wait
void wait(_CODELOCONLYPARAM(&CodeLoc))
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:388
__SYCL_DEPRECATED
#define __SYCL_DEPRECATED(message)
Definition: defines_elementary.hpp:47
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:405
cl::sycl::queue::parallel_for
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:795
cl::sycl::queue::copy
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue.hpp:595
cl::sycl::queue::wait_and_throw
void wait_and_throw(_CODELOCONLYPARAM(&CodeLoc))
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:402
event.hpp
service_kernel_names.hpp
sycl
Definition: invoke_simd.hpp:68
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
cl::sycl::queue::single_task
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:736
cl::sycl::detail::code_location
Definition: common.hpp:54
cl::sycl::queue
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:103
cl::sycl::default_selector
The default selector chooses the first available SYCL device.
Definition: device_selector.hpp:46
cl::sycl::detail::remove_reference_t
typename std::remove_reference< T >::type remove_reference_t
Definition: stl_type_traits.hpp:35
backend_traits.hpp
cl::sycl::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:110
cl::sycl::queue::parallel_for
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:772
cl::sycl::range< 1 >
cl::sycl::queue::parallel_for
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:860
cl::sycl::info::event
event
Definition: info_desc.hpp:289
cl::sycl::info::queue
queue
Definition: info_desc.hpp:229
cl::sycl::handler::depends_on
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:800
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
cl::sycl::queue::queue
queue(const device_selector &DeviceSelector, const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by the DeviceSelector provided.
Definition: queue.hpp:125
export.hpp
cl::sycl::queue::queue
queue(const device &SyclDevice, const property_list &PropList={})
Constructs a SYCL queue instance using the device provided.
Definition: queue.hpp:143
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:36
cl::sycl::queue::ext_oneapi_submit_barrier
event ext_oneapi_submit_barrier(const std::vector< event > &WaitList _CODELOCPARAM(&CodeLoc))
Prevents any commands submitted afterward to this queue from executing until all events in WaitList h...
Definition: queue.hpp:360
cl::sycl::handler::prefetch
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:748
cl::sycl::queue::parallel_for
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:807
cl::sycl::handler::ext_oneapi_barrier
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2538
cl::sycl::ext::oneapi::detail::AreAllButLastReductions
Predicate returning true if all template type parameters except the last one are reductions.
Definition: handler.hpp:323
cl::sycl::queue::submit
event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc))
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:292
cl::sycl::access::target
target
Definition: access.hpp:17
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
_KERNELFUNCPARAM
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:63
_CODELOCARG
#define _CODELOCARG(a)
Definition: queue.hpp:50
cl::sycl::aspect
aspect
Definition: aspects.hpp:15
cl::sycl::queue::parallel_for
std::enable_if_t< ext::oneapi::detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:947
cl::sycl::kernel_handler
Reading the value of a specialization constant.
Definition: kernel_handler.hpp:22
cl::sycl::queue::parallel_for
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:832
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:98
cl::sycl::handler
Command group handler class.
Definition: handler.hpp:362
cl::sycl::info::context
context
Definition: info_desc.hpp:42
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(cl::sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
cl::sycl::queue::single_task
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:711
cl::sycl::queue::parallel_for
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:846
KernelFunc
std::function< void(const sycl::nd_item< NDims > &)> KernelFunc
Definition: pi_esimd_emulator.cpp:172
cl::sycl::queue::operator==
bool operator==(const queue &RHS) const
Definition: queue.hpp:215
cl::sycl::queue::operator!=
bool operator!=(const queue &RHS) const
Definition: queue.hpp:217
cl::sycl::detail::usm::free
void free(void *Ptr, const context &Ctxt, const code_location &CL)
Definition: usm_impl.cpp:220
cl::sycl::detail::auto_name
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:36
cl::sycl::queue::parallel_for
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:761
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:204
handler.hpp
cl::sycl::queue::parallel_for
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:783
std
Definition: accessor.hpp:2617
_CODELOCONLYPARAM
#define _CODELOCONLYPARAM(a)
Definition: queue.hpp:45
cl::sycl::ext::oneapi::experimental::detail::get_property
static constexpr std::enable_if_t< HasProperty, typename FindCompileTimePropertyValueType< CTPropertyT, PropertiesT >::type > get_property()
Definition: properties.hpp:65
cl::sycl::queue::prefetch
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:676
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:38
cl::sycl::queue::submit
event submit(T CGF _CODELOCPARAM(&CodeLoc))
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:252
cl::sycl::queue::single_task
event single_task(_KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:689
cl::sycl::queue::ext_oneapi_submit_barrier
event ext_oneapi_submit_barrier(_CODELOCONLYPARAM(&CodeLoc))
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: queue.hpp:333
cl::sycl::queue::queue
queue(const device_selector &DeviceSelector, const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by the DeviceSelecto...
Definition: queue.hpp:135
cl::sycl::queue::is_host
bool is_host() const
Definition: queue.cpp:77
exception_list.hpp
info_desc.hpp
common.hpp
cl::sycl::queue::copy
event copy(const T *Src, T *Dest, size_t Count)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue.hpp:563
std::hash< cl::sycl::queue >::operator()
size_t operator()(const cl::sycl::queue &Q) const
Definition: queue.hpp:1229
cl::sycl::access::mode
mode
Definition: access.hpp:28
cl::sycl::handler::fill
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2486
cl::sycl::info::device
device
Definition: info_desc.hpp:53
assert_happened.hpp
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
cl::sycl::queue::copy
event copy(const T *Src, T *Dest, size_t Count, event DepEvent)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue.hpp:579
cl::sycl::async_handler
std::function< void(cl::sycl::exception_list)> async_handler
Definition: exception_list.hpp:53
_CODELOCFW
#define _CODELOCFW(a)
Definition: queue.hpp:51
cl::sycl::queue::queue
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:117
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12