DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.hpp
Go to the documentation of this file.
1 //==-------------------- queue.hpp - SYCL queue ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/export.hpp>
17 #include <sycl/device.hpp>
18 #include <sycl/device_selector.hpp>
19 #include <sycl/event.hpp>
20 #include <sycl/exception_list.hpp>
21 #include <sycl/handler.hpp>
22 #include <sycl/info/info_desc.hpp>
23 #include <sycl/property_list.hpp>
24 #include <sycl/stl.hpp>
25 
26 // Explicitly request format macros
27 #ifndef __STDC_FORMAT_MACROS
28 #define __STDC_FORMAT_MACROS 1
29 #endif
30 #include <cinttypes>
31 #include <type_traits>
32 #include <utility>
33 
34 // having _TWO_ mid-param #ifdefs makes the functions very difficult to read.
35 // Here we simplify the KernelFunc param is simplified to be
36 // _KERNELFUNCPARAM(KernelFunc) Once the queue kernel functions are defined,
37 // these macros are #undef immediately.
38 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
39 // or const KernelType &KernelFunc
40 #ifdef __SYCL_NONCONST_FUNCTOR__
41 #define _KERNELFUNCPARAM(a) KernelType a
42 #else
43 #define _KERNELFUNCPARAM(a) const KernelType &a
44 #endif
45 
46 // Helper macro to identify if fallback assert is needed
47 // FIXME remove __NVPTX__ condition once devicelib supports CUDA
48 #if defined(SYCL_FALLBACK_ASSERT)
49 #define __SYCL_USE_FALLBACK_ASSERT SYCL_FALLBACK_ASSERT
50 #else
51 #define __SYCL_USE_FALLBACK_ASSERT 0
52 #endif
53 
54 namespace sycl {
56 
57 // Forward declaration
58 class context;
59 class device;
60 class queue;
61 
62 template <backend BackendName, class SyclObjectT>
63 auto get_native(const SyclObjectT &Obj)
64  -> backend_return_t<BackendName, SyclObjectT>;
65 
66 namespace detail {
67 class queue_impl;
68 
69 #if __SYCL_USE_FALLBACK_ASSERT
70 static event submitAssertCapture(queue &, event &, queue *,
71  const detail::code_location &);
72 #endif
73 } // namespace detail
74 
86 class __SYCL_EXPORT queue {
87 public:
92  explicit queue(const property_list &PropList = {})
93  : queue(default_selector(), async_handler{}, PropList) {}
94 
100  queue(const async_handler &AsyncHandler, const property_list &PropList = {})
101  : queue(default_selector(), AsyncHandler, PropList) {}
102 
103 #if __cplusplus >= 201703L
104  template <typename DeviceSelector,
111  typename = detail::EnableIfDeviceSelectorInvocable<DeviceSelector>>
112  explicit queue(const DeviceSelector &deviceSelector,
113  const async_handler &AsyncHandler,
114  const property_list &PropList = {})
115  : queue(detail::select_device(deviceSelector), AsyncHandler, PropList) {}
116 
122  template <typename DeviceSelector,
123  typename = detail::EnableIfDeviceSelectorInvocable<DeviceSelector>>
124  explicit queue(const DeviceSelector &deviceSelector,
125  const property_list &PropList = {})
126  : queue(detail::select_device(deviceSelector), async_handler{},
127  PropList) {}
128 
135  template <typename DeviceSelector,
136  typename = detail::EnableIfDeviceSelectorInvocable<DeviceSelector>>
137  explicit queue(const context &syclContext,
138  const DeviceSelector &deviceSelector,
139  const property_list &propList = {})
140  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
141  propList) {}
142 
150  template <typename DeviceSelector,
151  typename = detail::EnableIfDeviceSelectorInvocable<DeviceSelector>>
152  explicit queue(const context &syclContext,
153  const DeviceSelector &deviceSelector,
154  const async_handler &AsyncHandler,
155  const property_list &propList = {})
156  : queue(syclContext, detail::select_device(deviceSelector, syclContext),
157  AsyncHandler, propList) {}
158 
159 #endif
160 
166  __SYCL2020_DEPRECATED("Use Callable device selectors instead of deprecated "
167  "device_selector subclasses.")
168  queue(const device_selector &DeviceSelector,
169  const property_list &PropList = {})
170  : queue(DeviceSelector.select_device(), async_handler{}, PropList) {}
171 
178  __SYCL2020_DEPRECATED("Use Callable device selectors instead of deprecated "
179  "device_selector subclasses.")
180  queue(const device_selector &DeviceSelector,
181  const async_handler &AsyncHandler, const property_list &PropList = {})
182  : queue(DeviceSelector.select_device(), AsyncHandler, PropList) {}
183 
188  explicit queue(const device &SyclDevice, const property_list &PropList = {})
189  : queue(SyclDevice, async_handler{}, PropList) {}
190 
197  explicit queue(const device &SyclDevice, const async_handler &AsyncHandler,
198  const property_list &PropList = {});
199 
206  __SYCL2020_DEPRECATED("Use Callable device selectors instead of deprecated "
207  "device_selector subclasses.")
208  queue(const context &SyclContext, const device_selector &DeviceSelector,
209  const property_list &PropList = {});
210 
219  __SYCL2020_DEPRECATED("Use Callable device selectors instead of deprecated "
220  "device_selector subclasses.")
221  queue(const context &SyclContext, const device_selector &DeviceSelector,
222  const async_handler &AsyncHandler, const property_list &PropList = {});
223 
230  queue(const context &SyclContext, const device &SyclDevice,
231  const property_list &PropList = {});
232 
240  queue(const context &SyclContext, const device &SyclDevice,
241  const async_handler &AsyncHandler, const property_list &PropList = {});
242 
251 #ifdef __SYCL_INTERNAL_API
252  queue(cl_command_queue ClQueue, const context &SyclContext,
253  const async_handler &AsyncHandler = {});
254 #endif
255 
256  queue(const queue &RHS) = default;
257 
258  queue(queue &&RHS) = default;
259 
260  queue &operator=(const queue &RHS) = default;
261 
262  queue &operator=(queue &&RHS) = default;
263 
264  bool operator==(const queue &RHS) const { return impl == RHS.impl; }
265 
266  bool operator!=(const queue &RHS) const { return !(*this == RHS); }
267 
270 #ifdef __SYCL_INTERNAL_API
271  cl_command_queue get() const;
272 #endif
273 
275  context get_context() const;
276 
278  device get_device() const;
279 
282  "is_host() is deprecated as the host device is no longer supported.")
283  bool is_host() const;
284 
288  template <typename Param>
289  typename detail::is_queue_info_desc<Param>::return_type get_info() const;
290 
291 private:
292  // A shorthand for `get_device().has()' which is expected to be a bit quicker
293  // than the long version
294  bool device_has(aspect Aspect) const;
295 
296 public:
303  template <typename T> event submit(T CGF _CODELOCPARAM(&CodeLoc)) {
304  _CODELOCARG(&CodeLoc);
305 
306 #if __SYCL_USE_FALLBACK_ASSERT
307  auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
308  event &E) {
309  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
310  KernelUsesAssert && !device_has(aspect::accelerator)) {
311  // __devicelib_assert_fail isn't supported by Device-side Runtime
312  // Linking against fallback impl of __devicelib_assert_fail is
313  // performed by program manager class
314  // Fallback assert isn't supported for FPGA
315  submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc);
316  }
317  };
318 
319  auto Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess);
320  return discard_or_return(Event);
321 #else
322  auto Event = submit_impl(CGF, CodeLoc);
323  return discard_or_return(Event);
324 #endif // __SYCL_USE_FALLBACK_ASSERT
325  }
326 
338  template <typename T>
339  event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc)) {
340  _CODELOCARG(&CodeLoc);
341 
342 #if __SYCL_USE_FALLBACK_ASSERT
343  auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
344  bool IsKernel, bool KernelUsesAssert, event &E) {
345  if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
346  KernelUsesAssert && !device_has(aspect::accelerator)) {
347  // Only secondary queues on devices need to be added to the assert
348  // capture.
349  // __devicelib_assert_fail isn't supported by Device-side Runtime
350  // Linking against fallback impl of __devicelib_assert_fail is
351  // performed by program manager class
352  // Fallback assert isn't supported for FPGA
353  submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc);
354  }
355  };
356 
357  auto Event =
358  submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, PostProcess);
359  return discard_or_return(Event);
360 #else
361  auto Event = submit_impl(CGF, SecondaryQueue, CodeLoc);
362  return discard_or_return(Event);
363 #endif // __SYCL_USE_FALLBACK_ASSERT
364  }
365 
374  return submit(
375  [=](handler &CGH) { CGH.ext_oneapi_barrier(); } _CODELOCFW(CodeLoc));
376  }
377 
385  __SYCL2020_DEPRECATED("use 'ext_oneapi_submit_barrier' instead")
386  event submit_barrier(_CODELOCONLYPARAM(&CodeLoc)) {
387  _CODELOCARG(&CodeLoc);
388  return ext_oneapi_submit_barrier(CodeLoc);
389  }
390 
401  const std::vector<event> &WaitList _CODELOCPARAM(&CodeLoc)) {
402  return submit([=](handler &CGH) {
403  CGH.ext_oneapi_barrier(WaitList);
404  } _CODELOCFW(CodeLoc));
405  }
406 
416  __SYCL2020_DEPRECATED("use 'ext_oneapi_submit_barrier' instead")
417  event
418  submit_barrier(const std::vector<event> &WaitList _CODELOCPARAM(&CodeLoc)) {
419  _CODELOCARG(&CodeLoc);
420  return ext_oneapi_submit_barrier(WaitList, CodeLoc);
421  }
422 
428  void wait(_CODELOCONLYPARAM(&CodeLoc)) {
429  _CODELOCARG(&CodeLoc);
430 
431  wait_proxy(CodeLoc);
432  }
433 
443  _CODELOCARG(&CodeLoc);
444 
445  wait_and_throw_proxy(CodeLoc);
446  }
447 
450  void wait_proxy(const detail::code_location &CodeLoc);
453  void wait_and_throw_proxy(const detail::code_location &CodeLoc);
454 
460  void throw_asynchronous();
461 
464  template <typename PropertyT> bool has_property() const noexcept;
465 
469  template <typename PropertyT> PropertyT get_property() const;
470 
478  template <typename T> event fill(void *Ptr, const T &Pattern, size_t Count) {
479  return submit([&](handler &CGH) { CGH.fill<T>(Ptr, Pattern, Count); });
480  }
481 
490  template <typename T>
491  event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent) {
492  return submit([&](handler &CGH) {
493  CGH.depends_on(DepEvent);
494  CGH.fill<T>(Ptr, Pattern, Count);
495  });
496  }
497 
507  template <typename T>
508  event fill(void *Ptr, const T &Pattern, size_t Count,
509  const std::vector<event> &DepEvents) {
510  return submit([&](handler &CGH) {
511  CGH.depends_on(DepEvents);
512  CGH.fill<T>(Ptr, Pattern, Count);
513  });
514  }
515 
525  event memset(void *Ptr, int Value, size_t Count);
526 
537  event memset(void *Ptr, int Value, size_t Count, event DepEvent);
538 
550  event memset(void *Ptr, int Value, size_t Count,
551  const std::vector<event> &DepEvents);
552 
563  event memcpy(void *Dest, const void *Src, size_t Count);
564 
576  event memcpy(void *Dest, const void *Src, size_t Count, event DepEvent);
577 
590  event memcpy(void *Dest, const void *Src, size_t Count,
591  const std::vector<event> &DepEvents);
592 
603  template <typename T> event copy(const T *Src, T *Dest, size_t Count) {
604  return this->memcpy(Dest, Src, Count * sizeof(T));
605  }
606 
618  template <typename T>
619  event copy(const T *Src, T *Dest, size_t Count, event DepEvent) {
620  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
621  }
622 
634  template <typename T>
635  event copy(const T *Src, T *Dest, size_t Count,
636  const std::vector<event> &DepEvents) {
637  return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
638  }
639 
647  __SYCL2020_DEPRECATED("use the overload with int Advice instead")
648  event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice);
649 
657  event mem_advise(const void *Ptr, size_t Length, int Advice);
658 
667  event mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent);
668 
678  event mem_advise(const void *Ptr, size_t Length, int Advice,
679  const std::vector<event> &DepEvents);
680 
688  event prefetch(const void *Ptr, size_t Count) {
689  return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); });
690  }
691 
700  event prefetch(const void *Ptr, size_t Count, event DepEvent) {
701  return submit([=](handler &CGH) {
702  CGH.depends_on(DepEvent);
703  CGH.prefetch(Ptr, Count);
704  });
705  }
706 
716  event prefetch(const void *Ptr, size_t Count,
717  const std::vector<event> &DepEvents) {
718  return submit([=](handler &CGH) {
719  CGH.depends_on(DepEvents);
720  CGH.prefetch(Ptr, Count);
721  });
722  }
723 
728  template <typename KernelName = detail::auto_name, typename KernelType>
730  static_assert(
732  void()>::value ||
734  void(kernel_handler)>::value),
735  "sycl::queue.single_task() requires a kernel instead of command group. "
736  "Use queue.submit() instead");
737  _CODELOCARG(&CodeLoc);
738  return submit(
739  [&](handler &CGH) {
740  CGH.template single_task<KernelName, KernelType>(KernelFunc);
741  },
742  CodeLoc);
743  }
744 
750  template <typename KernelName = detail::auto_name, typename KernelType>
751  event single_task(event DepEvent,
753  static_assert(
755  void()>::value ||
757  void(kernel_handler)>::value),
758  "sycl::queue.single_task() requires a kernel instead of command group. "
759  "Use queue.submit() instead");
760  _CODELOCARG(&CodeLoc);
761  return submit(
762  [&](handler &CGH) {
763  CGH.depends_on(DepEvent);
764  CGH.template single_task<KernelName, KernelType>(KernelFunc);
765  },
766  CodeLoc);
767  }
768 
775  template <typename KernelName = detail::auto_name, typename KernelType>
776  event single_task(const std::vector<event> &DepEvents,
778  static_assert(
780  void()>::value ||
782  void(kernel_handler)>::value),
783  "sycl::queue.single_task() requires a kernel instead of command group. "
784  "Use queue.submit() instead");
785  _CODELOCARG(&CodeLoc);
786  return submit(
787  [&](handler &CGH) {
788  CGH.depends_on(DepEvents);
789  CGH.template single_task<KernelName, KernelType>(KernelFunc);
790  },
791  CodeLoc);
792  }
793 
800  template <typename KernelName = detail::auto_name, typename... RestT>
801  event parallel_for(range<1> Range, RestT &&...Rest) {
802  return parallel_for_impl<KernelName>(Range, Rest...);
803  }
804 
811  template <typename KernelName = detail::auto_name, typename... RestT>
812  event parallel_for(range<2> Range, RestT &&...Rest) {
813  return parallel_for_impl<KernelName>(Range, Rest...);
814  }
815 
822  template <typename KernelName = detail::auto_name, typename... RestT>
823  event parallel_for(range<3> Range, RestT &&...Rest) {
824  return parallel_for_impl<KernelName>(Range, Rest...);
825  }
826 
834  template <typename KernelName = detail::auto_name, typename... RestT>
835  event parallel_for(range<1> Range, event DepEvent, RestT &&...Rest) {
836  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
837  }
838 
846  template <typename KernelName = detail::auto_name, typename... RestT>
847  event parallel_for(range<2> Range, event DepEvent, RestT &&...Rest) {
848  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
849  }
850 
858  template <typename KernelName = detail::auto_name, typename... RestT>
859  event parallel_for(range<3> Range, event DepEvent, RestT &&...Rest) {
860  return parallel_for_impl<KernelName>(Range, DepEvent, Rest...);
861  }
862 
871  template <typename KernelName = detail::auto_name, typename... RestT>
872  event parallel_for(range<1> Range, const std::vector<event> &DepEvents,
873  RestT &&...Rest) {
874  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
875  }
876 
885  template <typename KernelName = detail::auto_name, typename... RestT>
886  event parallel_for(range<2> Range, const std::vector<event> &DepEvents,
887  RestT &&...Rest) {
888  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
889  }
890 
899  template <typename KernelName = detail::auto_name, typename... RestT>
900  event parallel_for(range<3> Range, const std::vector<event> &DepEvents,
901  RestT &&...Rest) {
902  return parallel_for_impl<KernelName>(Range, DepEvents, Rest...);
903  }
904 
905  // While other shortcuts with offsets are able to go through parallel_for(...,
906  // RestT &&...Rest), those that accept dependency events vector have to be
907  // overloaded to allow implicit construction from an init-list.
915  template <typename KernelName = detail::auto_name, typename KernelType,
916  int Dim>
917  event parallel_for(range<Dim> Range, id<Dim> WorkItemOffset,
918  const std::vector<event> &DepEvents,
920  static_assert(1 <= Dim && Dim <= 3, "Invalid number of dimensions");
921  return parallel_for_impl<KernelName>(Range, WorkItemOffset, DepEvents,
922  KernelFunc);
923  }
924 
932  template <typename KernelName = detail::auto_name, typename KernelType,
933  int Dims>
934  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
935  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
937  // Actual code location needs to be captured from KernelInfo object.
938  const detail::code_location CodeLoc = {};
939  return submit(
940  [&](handler &CGH) {
941  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
942  KernelFunc);
943  },
944  CodeLoc);
945  }
946 
955  template <typename KernelName = detail::auto_name, typename KernelType,
956  int Dims>
957  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
958  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
959  event DepEvent, _KERNELFUNCPARAM(KernelFunc)) {
960  // Actual code location needs to be captured from KernelInfo object.
961  const detail::code_location CodeLoc = {};
962  return submit(
963  [&](handler &CGH) {
964  CGH.depends_on(DepEvent);
965  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
966  KernelFunc);
967  },
968  CodeLoc);
969  }
970 
980  template <typename KernelName = detail::auto_name, typename KernelType,
981  int Dims>
982  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
983  event parallel_for_impl(range<Dims> Range, id<Dims> WorkItemOffset,
984  const std::vector<event> &DepEvents,
986  // Actual code location needs to be captured from KernelInfo object.
987  const detail::code_location CodeLoc = {};
988  return submit(
989  [&](handler &CGH) {
990  CGH.depends_on(DepEvents);
991  CGH.template parallel_for<KernelName>(Range, WorkItemOffset,
992  KernelFunc);
993  },
994  CodeLoc);
995  }
996 
1003  template <typename KernelName = detail::auto_name, int Dims,
1004  typename... RestT>
1005  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
1006  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
1007  // Actual code location needs to be captured from KernelInfo object.
1008  const detail::code_location CodeLoc = {};
1009  return submit(
1010  [&](handler &CGH) {
1011  CGH.template parallel_for<KernelName>(Range, Rest...);
1012  },
1013  CodeLoc);
1014  }
1015 
1023  template <typename KernelName = detail::auto_name, int Dims,
1024  typename... RestT>
1025  event parallel_for(nd_range<Dims> Range, event DepEvent, RestT &&...Rest) {
1026  // Actual code location needs to be captured from KernelInfo object.
1027  const detail::code_location CodeLoc = {};
1028  return submit(
1029  [&](handler &CGH) {
1030  CGH.depends_on(DepEvent);
1031  CGH.template parallel_for<KernelName>(Range, Rest...);
1032  },
1033  CodeLoc);
1034  }
1035 
1044  template <typename KernelName = detail::auto_name, int Dims,
1045  typename... RestT>
1046  event parallel_for(nd_range<Dims> Range, const std::vector<event> &DepEvents,
1047  RestT &&...Rest) {
1048  // Actual code location needs to be captured from KernelInfo object.
1049  const detail::code_location CodeLoc = {};
1050  return submit(
1051  [&](handler &CGH) {
1052  CGH.depends_on(DepEvents);
1053  CGH.template parallel_for<KernelName>(Range, Rest...);
1054  },
1055  CodeLoc);
1056  }
1057 
1058 // Clean KERNELFUNC macros.
1059 #undef _KERNELFUNCPARAM
1060 
1064  bool is_in_order() const;
1065 
1069  backend get_backend() const noexcept;
1070 
1071 private:
1072  pi_native_handle getNative() const;
1073 
1074  std::shared_ptr<detail::queue_impl> impl;
1075  queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {}
1076 
1077  template <class Obj>
1078  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
1079  template <class T>
1080  friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj);
1081 
1082  template <backend BackendName, class SyclObjectT>
1083  friend auto get_native(const SyclObjectT &Obj)
1084  -> backend_return_t<BackendName, SyclObjectT>;
1085 
1086 #if __SYCL_USE_FALLBACK_ASSERT
1087  friend event detail::submitAssertCapture(queue &, event &, queue *,
1088  const detail::code_location &);
1089 #endif
1090 
1092  event submit_impl(std::function<void(handler &)> CGH,
1093  const detail::code_location &CodeLoc);
1095  event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
1096  const detail::code_location &CodeLoc);
1097 
1100  event discard_or_return(const event &Event);
1101 
1102  // Function to postprocess submitted command
1103  // Arguments:
1104  // bool IsKernel - true if the submitted command was kernel, false otherwise
1105  // bool KernelUsesAssert - true if submitted kernel uses assert, only
1106  // meaningful when IsKernel is true
1107  // event &Event - event after which post processing should be executed
1108  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
1109 
1115  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
1116  const detail::code_location &CodeLoc,
1117  const SubmitPostProcessF &PostProcess);
1124  event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
1125  queue secondQueue,
1126  const detail::code_location &CodeLoc,
1127  const SubmitPostProcessF &PostProcess);
1128 
1134  template <typename KernelName, int Dims, typename... RestT>
1135  std::enable_if_t<detail::AreAllButLastReductions<RestT...>::value, event>
1136  parallel_for_impl(range<Dims> Range, RestT &&...Rest) {
1137  // Actual code location needs to be captured from KernelInfo object.
1138  const detail::code_location CodeLoc = {};
1139  return submit(
1140  [&](handler &CGH) {
1141  CGH.template parallel_for<KernelName>(Range, Rest...);
1142  },
1143  CodeLoc);
1144  }
1145 
1153  template <typename KernelName, int Dims, typename... RestT>
1154  event parallel_for_impl(range<Dims> Range, event DepEvent, RestT &&...Rest) {
1155  // Actual code location needs to be captured from KernelInfo object.
1156  const detail::code_location CodeLoc = {};
1157  return submit(
1158  [&](handler &CGH) {
1159  CGH.depends_on(DepEvent);
1160  CGH.template parallel_for<KernelName>(Range, Rest...);
1161  },
1162  CodeLoc);
1163  }
1164 
1172  template <typename KernelName, int Dims, typename... RestT>
1173  event parallel_for_impl(range<Dims> Range,
1174  const std::vector<event> &DepEvents,
1175  RestT &&...Rest) {
1176  // Actual code location needs to be captured from KernelInfo object.
1177  const detail::code_location CodeLoc = {};
1178  return submit(
1179  [&](handler &CGH) {
1180  CGH.depends_on(DepEvents);
1181  CGH.template parallel_for<KernelName>(Range, Rest...);
1182  },
1183  CodeLoc);
1184  }
1185 
1186  buffer<detail::AssertHappened, 1> &getAssertHappenedBuffer();
1187 };
1188 
1189 namespace detail {
1190 #if __SYCL_USE_FALLBACK_ASSERT
1191 #define __SYCL_ASSERT_START 1
1192 
1203 event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue,
1204  const detail::code_location &CodeLoc) {
1205  using AHBufT = buffer<detail::AssertHappened, 1>;
1206 
1207  AHBufT &Buffer = Self.getAssertHappenedBuffer();
1208 
1209  event CopierEv, CheckerEv, PostCheckerEv;
1210  auto CopierCGF = [&](handler &CGH) {
1211  CGH.depends_on(Event);
1212 
1213  auto Acc = Buffer.get_access<access::mode::write>(CGH);
1214 
1215  CGH.single_task<__sycl_service_kernel__::AssertInfoCopier>([Acc] {
1216 #if defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
1217  __devicelib_assert_read(&Acc[0]);
1218 #else
1219  (void)Acc;
1220 #endif // defined(__SYCL_DEVICE_ONLY__) && !defined(__NVPTX__)
1221  });
1222  };
1223  auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) {
1224  CGH.depends_on(CopierEv);
1225  using mode = access::mode;
1226  using target = access::target;
1227 
1228  auto Acc = Buffer.get_access<mode::read, target::host_buffer>(CGH);
1229 
1230  CGH.host_task([=] {
1231  const detail::AssertHappened *AH = &Acc[0];
1232 
1233  // Don't use assert here as msvc will insert reference to __imp__wassert
1234  // which won't be properly resolved in separate compile use-case
1235 #ifndef NDEBUG
1236  if (AH->Flag == __SYCL_ASSERT_START)
1237  throw sycl::runtime_error(
1238  "Internal Error. Invalid value in assert description.",
1239  PI_ERROR_INVALID_VALUE);
1240 #endif
1241 
1242  if (AH->Flag) {
1243  const char *Expr = AH->Expr[0] ? AH->Expr : "<unknown expr>";
1244  const char *File = AH->File[0] ? AH->File : "<unknown file>";
1245  const char *Func = AH->Func[0] ? AH->Func : "<unknown func>";
1246 
1247  fprintf(stderr,
1248  "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64
1249  "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] "
1250  "Assertion `%s` failed.\n",
1251  File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0,
1252  AH->LID1, AH->LID2, Expr);
1253  fflush(stderr);
1254  abort(); // no need to release memory as it's abort anyway
1255  }
1256  });
1257  };
1258 
1259  if (SecondaryQueue) {
1260  CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc);
1261  CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc);
1262  } else {
1263  CopierEv = Self.submit_impl(CopierCGF, CodeLoc);
1264  CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc);
1265  }
1266 
1267  return CheckerEv;
1268 }
1269 #undef __SYCL_ASSERT_START
1270 #endif // __SYCL_USE_FALLBACK_ASSERT
1271 } // namespace detail
1272 
1273 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
1274 } // namespace sycl
1275 
1276 namespace std {
1277 template <> struct hash<sycl::queue> {
1278  size_t operator()(const sycl::queue &Q) const {
1279  return std::hash<std::shared_ptr<sycl::detail::queue_impl>>()(
1281  }
1282 };
1283 } // namespace std
1284 
1285 #undef __SYCL_USE_FALLBACK_ASSERT
sycl::_V1::queue::prefetch
event prefetch(const void *Ptr, size_t Count, event DepEvent)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:700
sycl::_V1::queue::parallel_for
event parallel_for(range< 1 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:835
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
sycl::_V1::queue::parallel_for
event parallel_for(range< 1 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:872
property_list.hpp
sycl::_V1::handler::ext_oneapi_barrier
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2441
sycl::_V1::access::mode
mode
Definition: access.hpp:28
sycl::_V1::backend
backend
Definition: backend_types.hpp:21
sycl::_V1::queue::parallel_for
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:801
sycl::_V1::detail::auto_name
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:37
sycl::_V1::queue::prefetch
event prefetch(const void *Ptr, size_t Count, const std::vector< event > &DepEvents)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: queue.hpp:716
stl.hpp
device_selector.hpp
sycl::_V1::queue::queue
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:92
sycl::_V1::queue::parallel_for
event parallel_for(range< 3 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:900
device.hpp
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:13
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:411
sycl::_V1::queue::single_task
event single_task(event DepEvent, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:751
event.hpp
sycl::_V1::queue::single_task
event single_task(const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:776
service_kernel_names.hpp
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:13
sycl::_V1::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:38
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
sycl::_V1::id
A unique identifier of an item in an index space.
Definition: array.hpp:17
backend_traits.hpp
sycl::_V1::queue::parallel_for
event parallel_for(nd_range< Dims > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1046
sycl::_V1::ext::oneapi::experimental::detail::get_property
static constexpr std::enable_if_t< HasProperty, typename FindCompileTimePropertyValueType< CTPropertyT, PropertiesT >::type > get_property()
Definition: properties.hpp:65
sycl::_V1::queue::copy
event copy(const T *Src, T *Dest, size_t Count)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue.hpp:603
sycl::_V1::detail::check_fn_signature
Definition: cg_types.hpp:126
sycl::_V1::queue::queue
queue(const async_handler &AsyncHandler, const property_list &PropList={})
Constructs a SYCL queue instance with an async_handler using the device returned by an instance of de...
Definition: queue.hpp:100
sycl::_V1::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
sycl::_V1::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:109
sycl::_V1::range< 1 >
export.hpp
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
sycl::_V1::queue::parallel_for
event parallel_for(range< 3 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:859
_CODELOCPARAM
#define _CODELOCPARAM(a)
Definition: common.hpp:108
sycl::_V1::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:491
sycl::_V1::queue::parallel_for
event parallel_for(range< 2 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:812
sycl::_V1::queue
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:86
_CODELOCARG
#define _CODELOCARG(a)
Definition: common.hpp:112
device_selector
sycl::_V1::queue::wait
void wait(_CODELOCONLYPARAM(&CodeLoc))
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:428
sycl::_V1::detail::select_device
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
Definition: device_selector.cpp:115
sycl::_V1::handler
Command group handler class.
Definition: handler.hpp:352
sycl::_V1::queue::operator!=
bool operator!=(const queue &RHS) const
Definition: queue.hpp:266
common.hpp
_CODELOCFW
#define _CODELOCFW(a)
Definition: common.hpp:113
_KERNELFUNCPARAM
#define _KERNELFUNCPARAM(a)
Definition: queue.hpp:41
sycl::_V1::nd_range
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: uniform.hpp:38
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:47
sycl::_V1::detail::createSyclObjFromImpl
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: common.hpp:258
sycl::_V1::access::target
target
Definition: access.hpp:17
sycl::_V1::queue::parallel_for
event parallel_for(range< 2 > Range, const std::vector< event > &DepEvents, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:886
sycl::_V1::queue::parallel_for
event parallel_for(range< 3 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:823
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:103
sycl::_V1::queue::fill
event fill(void *Ptr, const T &Pattern, size_t Count, const std::vector< event > &DepEvents)
Fills the specified memory with the specified pattern.
Definition: queue.hpp:508
sycl::_V1::detail::remove_reference_t
typename std::remove_reference< T >::type remove_reference_t
Definition: stl_type_traits.hpp:35
std::hash< sycl::queue >::operator()
size_t operator()(const sycl::queue &Q) const
Definition: queue.hpp:1278
KernelFunc
std::function< void(const sycl::nd_item< NDims > &)> KernelFunc
Definition: pi_esimd_emulator.cpp:173
sycl::_V1::detail::code_location
Definition: common.hpp:66
sycl::_V1::handler::fill
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2390
sycl::_V1::handler::prefetch
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:645
handler.hpp
_CODELOCONLYPARAM
#define _CODELOCONLYPARAM(a)
Definition: common.hpp:106
sycl::_V1::queue::submit
event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc))
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:339
std
Definition: accessor.hpp:3071
info_desc_helpers.hpp
sycl::_V1::queue::parallel_for
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value, event > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1006
sycl::_V1::detail::AreAllButLastReductions
Predicate returning true if all template type parameters except the last one are reductions.
Definition: handler.hpp:312
sycl::_V1::queue::wait_and_throw
void wait_and_throw(_CODELOCONLYPARAM(&CodeLoc))
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue.hpp:442
sycl::_V1::async_handler
std::function< void(sycl::exception_list)> async_handler
Definition: exception_list.hpp:53
sycl::_V1::queue::parallel_for
event parallel_for(nd_range< Dims > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + nd_range that specifies global,...
Definition: queue.hpp:1025
sycl::_V1::queue::parallel_for
event parallel_for(range< 2 > Range, event DepEvent, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
Definition: queue.hpp:847
sycl::_V1::queue::operator==
bool operator==(const queue &RHS) const
Definition: queue.hpp:264
exception_list.hpp
info_desc.hpp
sycl::_V1::queue::parallel_for
event parallel_for(range< Dim > Range, id< Dim > WorkItemOffset, const std::vector< event > &DepEvents, _KERNELFUNCPARAM(KernelFunc))
parallel_for version with a kernel represented as a lambda + range and offset that specify global siz...
Definition: queue.hpp:917
assert_happened.hpp
sycl::_V1::queue::ext_oneapi_submit_barrier
event ext_oneapi_submit_barrier(_CODELOCONLYPARAM(&CodeLoc))
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: queue.hpp:373
sycl::_V1::queue::copy
event copy(const T *Src, T *Dest, size_t Count, const std::vector< event > &DepEvents)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue.hpp:635
sycl::_V1::queue::copy
event copy(const T *Src, T *Dest, size_t Count, event DepEvent)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue.hpp:619
sycl::_V1::handler::depends_on
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:684
sycl::_V1::get_native
auto get_native(const SyclObjectT &Obj) -> backend_return_t< BackendName, SyclObjectT >
Definition: backend.hpp:123
__SYCL2020_DEPRECATED
#define __SYCL2020_DEPRECATED(message)
Definition: defines_elementary.hpp:54
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:240
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:39
sycl::_V1::queue::single_task
event single_task(_KERNELFUNCPARAM(KernelFunc) _CODELOCPARAM(&CodeLoc))
single_task version with a kernel represented as a lambda.
Definition: queue.hpp:729
sycl::_V1::queue::ext_oneapi_submit_barrier
event ext_oneapi_submit_barrier(const std::vector< event > &WaitList _CODELOCPARAM(&CodeLoc))
Prevents any commands submitted afterward to this queue from executing until all events in WaitList h...
Definition: queue.hpp:400