DPC++ Runtime
Runtime libraries for oneAPI DPC++
handler.hpp
Go to the documentation of this file.
1 //==-------- handler.hpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp>
12 #include <sycl/accessor.hpp>
13 #include <sycl/context.hpp>
14 #include <sycl/detail/cg.hpp>
15 #include <sycl/detail/cg_types.hpp>
16 #include <sycl/detail/cl.h>
17 #include <sycl/detail/export.hpp>
19 #include <sycl/detail/os_util.hpp>
20 #include <sycl/event.hpp>
24 #include <sycl/id.hpp>
25 #include <sycl/interop_handle.hpp>
26 #include <sycl/item.hpp>
27 #include <sycl/kernel.hpp>
28 #include <sycl/kernel_bundle.hpp>
29 #include <sycl/kernel_handler.hpp>
30 #include <sycl/nd_item.hpp>
31 #include <sycl/nd_range.hpp>
32 #include <sycl/property_list.hpp>
34 #include <sycl/sampler.hpp>
35 #include <sycl/stl.hpp>
36 
37 #include <functional>
38 #include <limits>
39 #include <memory>
40 #include <tuple>
41 #include <type_traits>
42 
43 // SYCL_LANGUAGE_VERSION is 4 digit year followed by 2 digit revision
44 #if !SYCL_LANGUAGE_VERSION || SYCL_LANGUAGE_VERSION < 202001
45 #define __SYCL_NONCONST_FUNCTOR__
46 #endif
47 
48 // replace _KERNELFUNCPARAM(KernelFunc) with KernelType KernelFunc
49 // or const KernelType &KernelFunc
50 #ifdef __SYCL_NONCONST_FUNCTOR__
51 #define _KERNELFUNCPARAMTYPE KernelType
52 #else
53 #define _KERNELFUNCPARAMTYPE const KernelType &
54 #endif
55 #define _KERNELFUNCPARAM(a) _KERNELFUNCPARAMTYPE a
56 
57 template <typename DataT, int Dimensions, sycl::access::mode AccessMode,
58  sycl::access::target AccessTarget,
60 class __fill;
61 
62 template <typename T> class __usmfill;
63 template <typename T> class __usmfill2d;
64 template <typename T> class __usmmemcpy2d;
65 
66 template <typename T_Src, typename T_Dst, int Dims,
70 
71 template <typename T_Src, typename T_Dst, int Dims,
75 
76 template <typename T_Src, int Dims_Src, sycl::access::mode AccessMode_Src,
77  sycl::access::target AccessTarget_Src, typename T_Dst, int Dims_Dst,
78  sycl::access::mode AccessMode_Dst,
79  sycl::access::target AccessTarget_Dst,
80  sycl::access::placeholder IsPlaceholder_Src,
81  sycl::access::placeholder IsPlaceholder_Dst>
83 
84 // For unit testing purposes
85 class MockHandler;
86 
87 namespace sycl {
89 
90 // Forward declaration
91 
92 class handler;
93 template <typename T, int Dimensions, typename AllocatorT, typename Enable>
94 class buffer;
95 namespace detail {
96 
97 class handler_impl;
98 class kernel_impl;
99 class queue_impl;
100 class stream_impl;
101 template <typename DataT, int Dimensions, access::mode AccessMode,
103 class image_accessor;
104 template <typename RetType, typename Func, typename Arg>
105 static Arg member_ptr_helper(RetType (Func::*)(Arg) const);
106 
107 // Non-const version of the above template to match functors whose 'operator()'
108 // is declared w/o the 'const' qualifier.
109 template <typename RetType, typename Func, typename Arg>
110 static Arg member_ptr_helper(RetType (Func::*)(Arg));
111 
112 // template <typename RetType, typename Func>
113 // static void member_ptr_helper(RetType (Func::*)() const);
114 
115 // template <typename RetType, typename Func>
116 // static void member_ptr_helper(RetType (Func::*)());
117 
118 template <typename F, typename SuggestedArgType>
119 decltype(member_ptr_helper(&F::operator())) argument_helper(int);
120 
121 template <typename F, typename SuggestedArgType>
122 SuggestedArgType argument_helper(...);
123 
124 template <typename F, typename SuggestedArgType>
125 using lambda_arg_type = decltype(argument_helper<F, SuggestedArgType>(0));
126 
127 // Used when parallel_for range is rounded-up.
128 template <typename Name> class __pf_kernel_wrapper;
129 
130 template <typename Type> struct get_kernel_wrapper_name_t {
132 };
133 
134 __SYCL_EXPORT device getDeviceFromHandler(handler &);
135 
136 #if __SYCL_ID_QUERIES_FIT_IN_INT__
137 template <typename T> struct NotIntMsg;
138 
139 template <int Dims> struct NotIntMsg<range<Dims>> {
140  constexpr static const char *Msg =
141  "Provided range is out of integer limits. Pass "
142  "`-fno-sycl-id-queries-fit-in-int' to disable range check.";
143 };
144 
145 template <int Dims> struct NotIntMsg<id<Dims>> {
146  constexpr static const char *Msg =
147  "Provided offset is out of integer limits. Pass "
148  "`-fno-sycl-id-queries-fit-in-int' to disable offset check.";
149 };
150 #endif
151 
152 // Helper for merging properties with ones defined in an optional kernel functor
153 // getter.
154 template <typename KernelType, typename PropertiesT, typename Cond = void>
156  using type = PropertiesT;
157 };
158 template <typename KernelType, typename PropertiesT>
160  KernelType, PropertiesT,
161  std::enable_if_t<ext::oneapi::experimental::detail::
162  HasKernelPropertiesGetMethod<KernelType>::value>> {
165  KernelType>::properties_t;
166  static_assert(
168  "get(sycl::ext::oneapi::experimental::properties_tag) member in kernel "
169  "functor class must return a valid property list.");
171  PropertiesT, get_method_properties>;
172 };
173 
174 #if __SYCL_ID_QUERIES_FIT_IN_INT__
175 template <typename T, typename ValT>
177  std::is_same<ValT, unsigned long long>::value>
178 checkValueRangeImpl(ValT V) {
179  static constexpr size_t Limit =
180  static_cast<size_t>((std::numeric_limits<int>::max)());
181  if (V > Limit)
182  throw runtime_error(NotIntMsg<T>::Msg, PI_ERROR_INVALID_VALUE);
183 }
184 #endif
185 
186 template <int Dims, typename T>
188  std::is_same<T, id<Dims>>::value>
189 checkValueRange(const T &V) {
190 #if __SYCL_ID_QUERIES_FIT_IN_INT__
191  for (size_t Dim = 0; Dim < Dims; ++Dim)
192  checkValueRangeImpl<T>(V[Dim]);
193 
194  {
195  unsigned long long Product = 1;
196  for (size_t Dim = 0; Dim < Dims; ++Dim) {
197  Product *= V[Dim];
198  // check value now to prevent product overflow in the end
199  checkValueRangeImpl<T>(Product);
200  }
201  }
202 #else
203  (void)V;
204 #endif
205 }
206 
207 template <int Dims>
208 void checkValueRange(const range<Dims> &R, const id<Dims> &O) {
209 #if __SYCL_ID_QUERIES_FIT_IN_INT__
210  checkValueRange<Dims>(R);
211  checkValueRange<Dims>(O);
212 
213  for (size_t Dim = 0; Dim < Dims; ++Dim) {
214  unsigned long long Sum = R[Dim] + O[Dim];
215 
216  checkValueRangeImpl<range<Dims>>(Sum);
217  }
218 #else
219  (void)R;
220  (void)O;
221 #endif
222 }
223 
224 template <int Dims, typename T>
226 checkValueRange(const T &V) {
227 #if __SYCL_ID_QUERIES_FIT_IN_INT__
228  checkValueRange<Dims>(V.get_global_range());
229  checkValueRange<Dims>(V.get_local_range());
230  checkValueRange<Dims>(V.get_offset());
231 
232  checkValueRange<Dims>(V.get_global_range(), V.get_offset());
233 #else
234  (void)V;
235 #endif
236 }
237 
238 template <typename TransformedArgType, int Dims, typename KernelType>
240 public:
241  RoundedRangeKernel(range<Dims> NumWorkItems, KernelType KernelFunc)
242  : NumWorkItems(NumWorkItems), KernelFunc(KernelFunc) {}
243 
244  void operator()(TransformedArgType Arg) const {
245  if (Arg[0] >= NumWorkItems[0])
246  return;
247  Arg.set_allowed_range(NumWorkItems);
248  KernelFunc(Arg);
249  }
250 
251 private:
252  range<Dims> NumWorkItems;
253  KernelType KernelFunc;
254 };
255 
256 template <typename TransformedArgType, int Dims, typename KernelType>
258 public:
260  : NumWorkItems(NumWorkItems), KernelFunc(KernelFunc) {}
261 
262  void operator()(TransformedArgType Arg, kernel_handler KH) const {
263  if (Arg[0] >= NumWorkItems[0])
264  return;
265  Arg.set_allowed_range(NumWorkItems);
266  KernelFunc(Arg, KH);
267  }
268 
269 private:
270  range<Dims> NumWorkItems;
271  KernelType KernelFunc;
272 };
273 
275 using sycl::detail::queue_impl;
276 
277 } // namespace detail
278 
312 class __SYCL_EXPORT handler {
313 private:
318  handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost);
319 
329  handler(std::shared_ptr<detail::queue_impl> Queue,
330  std::shared_ptr<detail::queue_impl> PrimaryQueue,
331  std::shared_ptr<detail::queue_impl> SecondaryQueue, bool IsHost);
332 
334  template <typename T, typename F = typename detail::remove_const_t<
336  F *storePlainArg(T &&Arg) {
337  MArgsStorage.emplace_back(sizeof(T));
338  auto Storage = reinterpret_cast<F *>(MArgsStorage.back().data());
339  *Storage = Arg;
340  return Storage;
341  }
342 
343  void setType(detail::CG::CGTYPE Type) { MCGType = Type; }
344 
345  detail::CG::CGTYPE getType() { return MCGType; }
346 
347  void throwIfActionIsCreated() {
348  if (detail::CG::None != getType())
349  throw sycl::runtime_error("Attempt to set multiple actions for the "
350  "command group. Command group must consist of "
351  "a single kernel or explicit memory operation.",
352  PI_ERROR_INVALID_OPERATION);
353  }
354 
357  void
358  extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum,
359  const detail::kernel_param_desc_t *KernelArgs,
360  bool IsESIMD);
361 
363  void extractArgsAndReqs();
364 
365  void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
366  const int Size, const size_t Index, size_t &IndexShift,
367  bool IsKernelCreatedFromSource, bool IsESIMD);
368 
370  std::string getKernelName();
371 
372  template <typename LambdaNameT> bool lambdaAndKernelHaveEqualName() {
373  // TODO It is unclear a kernel and a lambda/functor must to be equal or not
374  // for parallel_for with sycl::kernel and lambda/functor together
375  // Now if they are equal we extract argumets from lambda/functor for the
376  // kernel. Else it is necessary use set_atg(s) for resolve the order and
377  // values of arguments for the kernel.
378  assert(MKernel && "MKernel is not initialized");
379  const std::string LambdaName = detail::KernelInfo<LambdaNameT>::getName();
380  const std::string KernelName = getKernelName();
381  return LambdaName == KernelName;
382  }
383 
386  void saveCodeLoc(detail::code_location CodeLoc) { MCodeLoc = CodeLoc; }
387 
394  event finalize();
395 
401  void addStream(const std::shared_ptr<detail::stream_impl> &Stream) {
402  MStreamStorage.push_back(Stream);
403  }
404 
410  void addReduction(const std::shared_ptr<const void> &ReduObj);
411 
412  ~handler() = default;
413 
414  // TODO: Private and unusued. Remove when ABI break is allowed.
415  bool is_host() { return MIsHost; }
416 
417 #ifdef __SYCL_DEVICE_ONLY__
418  // In device compilation accessor isn't inherited from AccessorBaseHost, so
419  // can't detect by it. Since we don't expect it to be ever called in device
420  // execution, just use blind void *.
421  void associateWithHandler(void *AccBase, access::target AccTarget);
422 #else
424  access::target AccTarget);
425 #endif
426 
427  // Recursively calls itself until arguments pack is fully processed.
428  // The version for regular(standard layout) argument.
429  template <typename T, typename... Ts>
430  void setArgsHelper(int ArgIndex, T &&Arg, Ts &&...Args) {
431  set_arg(ArgIndex, std::move(Arg));
432  setArgsHelper(++ArgIndex, std::move(Args)...);
433  }
434 
435  void setArgsHelper(int) {}
436 
437  void setLocalAccessorArgHelper(int ArgIndex,
438  detail::LocalAccessorBaseHost &LocalAccBase) {
439  detail::LocalAccessorImplPtr LocalAccImpl =
440  detail::getSyclObjImpl(LocalAccBase);
441  detail::LocalAccessorImplHost *Req = LocalAccImpl.get();
442  MLocalAccStorage.push_back(std::move(LocalAccImpl));
443  MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
444  static_cast<int>(access::target::local), ArgIndex);
445  }
446 
447  // setArgHelper for local accessor argument (legacy accessor interface)
448  template <typename DataT, int Dims, access::mode AccessMode,
450  void setArgHelper(int ArgIndex,
451  accessor<DataT, Dims, AccessMode, access::target::local,
452  IsPlaceholder> &&Arg) {
453 #ifndef __SYCL_DEVICE_ONLY__
454  setLocalAccessorArgHelper(ArgIndex, Arg);
455 #endif
456  }
457 
458  // setArgHelper for local accessor argument (up to date accessor interface)
459  template <typename DataT, int Dims>
460  void setArgHelper(int ArgIndex, local_accessor<DataT, Dims> &&Arg) {
461 #ifndef __SYCL_DEVICE_ONLY__
462  setLocalAccessorArgHelper(ArgIndex, Arg);
463 #endif
464  }
465 
466  // setArgHelper for non local accessor argument.
467  template <typename DataT, int Dims, access::mode AccessMode,
470  setArgHelper(
471  int ArgIndex,
475  detail::AccessorImplHost *Req = AccImpl.get();
476  // Add accessor to the list of requirements.
477  MRequirements.push_back(Req);
478  // Store copy of the accessor.
479  MAccStorage.push_back(std::move(AccImpl));
480  // Add accessor to the list of arguments.
481  MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
482  static_cast<int>(AccessTarget), ArgIndex);
483  }
484 
485  template <typename T> void setArgHelper(int ArgIndex, T &&Arg) {
486  auto StoredArg = static_cast<void *>(storePlainArg(Arg));
487 
488  if (!std::is_same<cl_mem, T>::value && std::is_pointer<T>::value) {
489  MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg,
490  sizeof(T), ArgIndex);
491  } else {
492  MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout,
493  StoredArg, sizeof(T), ArgIndex);
494  }
495  }
496 
497  void setArgHelper(int ArgIndex, sampler &&Arg) {
498  auto StoredArg = static_cast<void *>(storePlainArg(Arg));
499  MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg,
500  sizeof(sampler), ArgIndex);
501  }
502 
503  // TODO: Unusued. Remove when ABI break is allowed.
504  void verifyKernelInvoc(const kernel &Kernel) {
505  std::ignore = Kernel;
506  return;
507  }
508 
509  /* The kernel passed to StoreLambda can take an id, an item or an nd_item as
510  * its argument. Since esimd plugin directly invokes the kernel (doesn’t use
511  * piKernelSetArg), the kernel argument type must be known to the plugin.
512  * However, passing kernel argument type to the plugin requires changing ABI
513  * in HostKernel class. To overcome this problem, helpers below wrap the
514  * “original” kernel with a functor that always takes an nd_item as argument.
515  * A functor is used instead of a lambda because extractArgsAndReqsFromLambda
516  * needs access to the “original” kernel and keeps references to its internal
517  * data, i.e. the kernel passed as argument cannot be local in scope. The
518  * functor itself is again encapsulated in a std::function since functor’s
519  * type is unknown to the plugin.
520  */
521 
522  // For 'id, item w/wo offset, nd_item' kernel arguments
523  template <class KernelType, class NormalizedKernelType, int Dims>
524  KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) {
525  NormalizedKernelType NormalizedKernel(KernelFunc);
526  auto NormalizedKernelFunc =
527  std::function<void(const sycl::nd_item<Dims> &)>(NormalizedKernel);
528  auto HostKernelPtr =
529  new detail::HostKernel<decltype(NormalizedKernelFunc),
530  sycl::nd_item<Dims>, Dims>(NormalizedKernelFunc);
531  MHostKernel.reset(HostKernelPtr);
532  return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
533  ->MKernelFunc;
534  }
535 
536  // For 'sycl::id<Dims>' kernel argument
537  template <class KernelType, typename ArgT, int Dims>
538  typename std::enable_if<std::is_same<ArgT, sycl::id<Dims>>::value,
539  KernelType *>::type
540  ResetHostKernel(const KernelType &KernelFunc) {
541  struct NormalizedKernelType {
542  KernelType MKernelFunc;
543  NormalizedKernelType(const KernelType &KernelFunc)
544  : MKernelFunc(KernelFunc) {}
545  void operator()(const nd_item<Dims> &Arg) {
546  detail::runKernelWithArg(MKernelFunc, Arg.get_global_id());
547  }
548  };
549  return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
550  KernelFunc);
551  }
552 
553  // For 'sycl::nd_item<Dims>' kernel argument
554  template <class KernelType, typename ArgT, int Dims>
555  typename std::enable_if<std::is_same<ArgT, sycl::nd_item<Dims>>::value,
556  KernelType *>::type
557  ResetHostKernel(const KernelType &KernelFunc) {
558  struct NormalizedKernelType {
559  KernelType MKernelFunc;
560  NormalizedKernelType(const KernelType &KernelFunc)
561  : MKernelFunc(KernelFunc) {}
562  void operator()(const nd_item<Dims> &Arg) {
563  detail::runKernelWithArg(MKernelFunc, Arg);
564  }
565  };
566  return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
567  KernelFunc);
568  }
569 
570  // For 'sycl::item<Dims, without_offset>' kernel argument
571  template <class KernelType, typename ArgT, int Dims>
572  typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, false>>::value,
573  KernelType *>::type
574  ResetHostKernel(const KernelType &KernelFunc) {
575  struct NormalizedKernelType {
576  KernelType MKernelFunc;
577  NormalizedKernelType(const KernelType &KernelFunc)
578  : MKernelFunc(KernelFunc) {}
579  void operator()(const nd_item<Dims> &Arg) {
580  sycl::item<Dims, false> Item = detail::Builder::createItem<Dims, false>(
581  Arg.get_global_range(), Arg.get_global_id());
582  detail::runKernelWithArg(MKernelFunc, Item);
583  }
584  };
585  return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
586  KernelFunc);
587  }
588 
589  // For 'sycl::item<Dims, with_offset>' kernel argument
590  template <class KernelType, typename ArgT, int Dims>
591  typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, true>>::value,
592  KernelType *>::type
593  ResetHostKernel(const KernelType &KernelFunc) {
594  struct NormalizedKernelType {
595  KernelType MKernelFunc;
596  NormalizedKernelType(const KernelType &KernelFunc)
597  : MKernelFunc(KernelFunc) {}
598  void operator()(const nd_item<Dims> &Arg) {
599  sycl::item<Dims, true> Item = detail::Builder::createItem<Dims, true>(
600  Arg.get_global_range(), Arg.get_global_id(), Arg.get_offset());
601  detail::runKernelWithArg(MKernelFunc, Item);
602  }
603  };
604  return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
605  KernelFunc);
606  }
607 
608  // For 'void' kernel argument (single_task)
609  template <class KernelType, typename ArgT, int Dims>
610  typename std::enable_if_t<std::is_same<ArgT, void>::value, KernelType *>
611  ResetHostKernel(const KernelType &KernelFunc) {
612  struct NormalizedKernelType {
613  KernelType MKernelFunc;
614  NormalizedKernelType(const KernelType &KernelFunc)
615  : MKernelFunc(KernelFunc) {}
616  void operator()(const nd_item<Dims> &Arg) {
617  (void)Arg;
618  detail::runKernelWithoutArg(MKernelFunc);
619  }
620  };
621  return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
622  KernelFunc);
623  }
624 
625  // For 'sycl::group<Dims>' kernel argument
626  // 'wrapper'-based approach using 'NormalizedKernelType' struct is not used
627  // for 'void(sycl::group<Dims>)' since 'void(sycl::group<Dims>)' is not
628  // supported in ESIMD.
629  template <class KernelType, typename ArgT, int Dims>
630  typename std::enable_if<std::is_same<ArgT, sycl::group<Dims>>::value,
631  KernelType *>::type
632  ResetHostKernel(const KernelType &KernelFunc) {
633  MHostKernel.reset(
635  return (KernelType *)(MHostKernel->getPtr());
636  }
637 
645  void verifyUsedKernelBundle(const std::string &KernelName);
646 
653  template <typename KernelName, typename KernelType, int Dims,
654  typename LambdaArgType>
655  void StoreLambda(KernelType KernelFunc) {
657 
658  constexpr bool IsCallableWithKernelHandler =
660  LambdaArgType>::value;
661 
662  if (IsCallableWithKernelHandler && MIsHost) {
663  throw sycl::feature_not_supported(
664  "kernel_handler is not yet supported by host device.",
665  PI_ERROR_INVALID_OPERATION);
666  }
667 
668  KernelType *KernelPtr =
669  ResetHostKernel<KernelType, LambdaArgType, Dims>(KernelFunc);
670 
671  using KI = sycl::detail::KernelInfo<KernelName>;
672  constexpr bool KernelHasName =
673  KI::getName() != nullptr && KI::getName()[0] != '\0';
674 
675  // Some host compilers may have different captures from Clang. Currently
676  // there is no stable way of handling this when extracting the captures, so
677  // a static assert is made to fail for incompatible kernel lambdas.
678  static_assert(
679  !KernelHasName || sizeof(KernelFunc) == KI::getKernelSize(),
680  "Unexpected kernel lambda size. This can be caused by an "
681  "external host compiler producing a lambda with an "
682  "unexpected layout. This is a limitation of the compiler."
683  "In many cases the difference is related to capturing constexpr "
684  "variables. In such cases removing constexpr specifier aligns the "
685  "captures between the host compiler and the device compiler."
686  "\n"
687  "In case of MSVC, passing "
688  "-fsycl-host-compiler-options='/std:c++latest' "
689  "might also help.");
690 
691  // Empty name indicates that the compilation happens without integration
692  // header, so don't perform things that require it.
693  if (KernelHasName) {
694  // TODO support ESIMD in no-integration-header case too.
695  MArgs.clear();
696  extractArgsAndReqsFromLambda(reinterpret_cast<char *>(KernelPtr),
697  KI::getNumParams(), &KI::getParamDesc(0),
698  KI::isESIMD());
699  MKernelName = KI::getName();
700  MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName());
701  } else {
702  // In case w/o the integration header it is necessary to process
703  // accessors from the list(which are associated with this handler) as
704  // arguments.
705  MArgs = std::move(MAssociatedAccesors);
706  }
707 
708  // If the kernel lambda is callable with a kernel_handler argument, manifest
709  // the associated kernel handler.
710  if (IsCallableWithKernelHandler) {
711  getOrInsertHandlerKernelBundle(/*Insert=*/true);
712  }
713  }
714 
719  template <int Dims_Src, int Dims_Dst>
720  static bool IsCopyingRectRegionAvailable(const range<Dims_Src> Src,
721  const range<Dims_Dst> Dst) {
722  if (Dims_Src > Dims_Dst)
723  return false;
724  for (size_t I = 0; I < Dims_Src; ++I)
725  if (Src[I] > Dst[I])
726  return false;
727  return true;
728  }
729 
735  template <typename TSrc, int DimSrc, access::mode ModeSrc,
736  access::target TargetSrc, typename TDst, int DimDst,
737  access::mode ModeDst, access::target TargetDst,
738  access::placeholder IsPHSrc, access::placeholder IsPHDst>
739  detail::enable_if_t<(DimSrc > 0) && (DimDst > 0), bool>
742  if (!MIsHost &&
743  IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
744  return false;
745 
746  range<1> LinearizedRange(Src.size());
747  parallel_for<
748  class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
749  ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
750  LinearizedRange, [=](id<1> Id) {
751  size_t Index = Id[0];
752  id<DimSrc> SrcId = detail::getDelinearizedId(Src.get_range(), Index);
753  id<DimDst> DstId = detail::getDelinearizedId(Dst.get_range(), Index);
754  Dst[DstId] = Src[SrcId];
755  });
756  return true;
757  }
758 
766  template <typename TSrc, int DimSrc, access::mode ModeSrc,
767  access::target TargetSrc, typename TDst, int DimDst,
768  access::mode ModeDst, access::target TargetDst,
769  access::placeholder IsPHSrc, access::placeholder IsPHDst>
773  if (!MIsHost)
774  return false;
775 
776  single_task<
777  class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
778  ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
779  [=]() { *(Dst.get_pointer()) = *(Src.get_pointer()); });
780  return true;
781  }
782 
783 #ifndef __SYCL_DEVICE_ONLY__
789  template <typename TSrc, typename TDst, int Dim, access::mode AccMode,
790  access::target AccTarget, access::placeholder IsPH>
791  detail::enable_if_t<(Dim > 0)>
793  TDst *Dst) {
794  range<Dim> Range = Src.get_range();
795  parallel_for<
796  class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
797  Range, [=](id<Dim> Index) {
798  const size_t LinearIndex = detail::getLinearIndex(Index, Range);
799  using TSrcNonConst = typename detail::remove_const_t<TSrc>;
800  (reinterpret_cast<TSrcNonConst *>(Dst))[LinearIndex] = Src[Index];
801  });
802  }
803 
809  template <typename TSrc, typename TDst, int Dim, access::mode AccMode,
810  access::target AccTarget, access::placeholder IsPH>
813  TDst *Dst) {
814  single_task<class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
815  [=]() {
816  using TSrcNonConst = typename detail::remove_const_t<TSrc>;
817  *(reinterpret_cast<TSrcNonConst *>(Dst)) = *(Src.get_pointer());
818  });
819  }
820 
825  template <typename TSrc, typename TDst, int Dim, access::mode AccMode,
826  access::target AccTarget, access::placeholder IsPH>
827  detail::enable_if_t<(Dim > 0)>
828  copyPtrToAccHost(TSrc *Src,
830  range<Dim> Range = Dst.get_range();
831  parallel_for<
832  class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
833  Range, [=](id<Dim> Index) {
834  const size_t LinearIndex = detail::getLinearIndex(Index, Range);
835  Dst[Index] = (reinterpret_cast<const TDst *>(Src))[LinearIndex];
836  });
837  }
838 
844  template <typename TSrc, typename TDst, int Dim, access::mode AccMode,
845  access::target AccTarget, access::placeholder IsPH>
847  copyPtrToAccHost(TSrc *Src,
849  single_task<class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
850  [=]() {
851  *(Dst.get_pointer()) = *(reinterpret_cast<const TDst *>(Src));
852  });
853  }
854 #endif // __SYCL_DEVICE_ONLY__
855 
856  constexpr static bool isConstOrGlobal(access::target AccessTarget) {
857  return AccessTarget == access::target::device ||
858  AccessTarget == access::target::constant_buffer;
859  }
860 
861  constexpr static bool isImageOrImageArray(access::target AccessTarget) {
862  return AccessTarget == access::target::image ||
863  AccessTarget == access::target::image_array;
864  }
865 
866  constexpr static bool
867  isValidTargetForExplicitOp(access::target AccessTarget) {
868  return isConstOrGlobal(AccessTarget) || isImageOrImageArray(AccessTarget);
869  }
870 
871  constexpr static bool isValidModeForSourceAccessor(access::mode AccessMode) {
872  return AccessMode == access::mode::read ||
874  }
875 
876  constexpr static bool
877  isValidModeForDestinationAccessor(access::mode AccessMode) {
878  return AccessMode == access::mode::write ||
880  AccessMode == access::mode::discard_write ||
881  AccessMode == access::mode::discard_read_write;
882  }
883 
884  template <int Dims, typename LambdaArgType> struct TransformUserItemType {
885  using type = typename std::conditional<
886  std::is_convertible<nd_item<Dims>, LambdaArgType>::value, nd_item<Dims>,
887  typename std::conditional<
888  std::is_convertible<item<Dims>, LambdaArgType>::value, item<Dims>,
889  LambdaArgType>::type>::type;
890  };
891 
903  template <typename KernelName, typename KernelType, int Dims,
904  typename PropertiesT =
906  void parallel_for_lambda_impl(range<Dims> NumWorkItems,
907  KernelType KernelFunc) {
908  throwIfActionIsCreated();
909  using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
910 
911  // If 1D kernel argument is an integral type, convert it to sycl::item<1>
912  // If user type is convertible from sycl::item/sycl::nd_item, use
913  // sycl::item/sycl::nd_item to transport item information
914  using TransformedArgType = typename std::conditional<
915  std::is_integral<LambdaArgType>::value && Dims == 1, item<Dims>,
916  typename TransformUserItemType<Dims, LambdaArgType>::type>::type;
917 
918  // TODO: Properties may change the kernel function, so in order to avoid
919  // conflicts they should be included in the name.
920  using NameT =
922 
923  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
924 
925  // Range rounding can be disabled by the user.
926  // Range rounding is not done on the host device.
927  // Range rounding is supported only for newer SYCL standards.
928 #if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
929  !defined(DPCPP_HOST_DEVICE_OPENMP) && \
930  !defined(DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
931  // Range should be a multiple of this for reasonable performance.
932  size_t MinFactorX = 16;
933  // Range should be a multiple of this for improved performance.
934  size_t GoodFactorX = 32;
935  // Range should be at least this to make rounding worthwhile.
936  size_t MinRangeX = 1024;
937 
938  // Check if rounding parameters have been set through environment:
939  // SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=MinRound:PreferredRound:MinRange
940  this->GetRangeRoundingSettings(MinFactorX, GoodFactorX, MinRangeX);
941 
942  // Disable the rounding-up optimizations under these conditions:
943  // 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set.
944  // 2. The kernel is provided via an interoperability method.
945  // 3. The range is already a multiple of the rounding factor.
946  //
947  // Cases 2 and 3 could be supported with extra effort.
948  // As an optimization for the common case it is an
949  // implementation choice to not support those scenarios.
950  // Note that "this_item" is a free function, i.e. not tied to any
951  // specific id or item. When concurrent parallel_fors are executing
952  // on a device it is difficult to tell which parallel_for the call is
953  // being made from. One could replicate portions of the
954  // call-graph to make this_item calls kernel-specific but this is
955  // not considered worthwhile.
956 
957  // Get the kernel name to check condition 2.
958  std::string KName = typeid(NameT *).name();
960  bool DisableRounding =
961  this->DisableRangeRounding() ||
962  (KI::getName() == nullptr || KI::getName()[0] == '\0');
963 
964  // Perform range rounding if rounding-up is enabled
965  // and there are sufficient work-items to need rounding
966  // and the user-specified range is not a multiple of a "good" value.
967  if (!DisableRounding && (NumWorkItems[0] >= MinRangeX) &&
968  (NumWorkItems[0] % MinFactorX != 0)) {
969  // It is sufficient to round up just the first dimension.
970  // Multiplying the rounded-up value of the first dimension
971  // by the values of the remaining dimensions (if any)
972  // will yield a rounded-up value for the total range.
973  size_t NewValX =
974  ((NumWorkItems[0] + GoodFactorX - 1) / GoodFactorX) * GoodFactorX;
975  if (this->RangeRoundingTrace())
976  std::cout << "parallel_for range adjusted from " << NumWorkItems[0]
977  << " to " << NewValX << std::endl;
978 
979  using NameWT = typename detail::get_kernel_wrapper_name_t<NameT>::name;
980  auto Wrapper =
981  getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
982  KernelFunc, NumWorkItems);
983 
984  using KName = std::conditional_t<std::is_same<KernelType, NameT>::value,
985  decltype(Wrapper), NameWT>;
986 
987  range<Dims> AdjustedRange = NumWorkItems;
988  AdjustedRange.set_range_dim0(NewValX);
989  kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
990  PropertiesT>(Wrapper);
991 #ifndef __SYCL_DEVICE_ONLY__
992  detail::checkValueRange<Dims>(AdjustedRange);
993  MNDRDesc.set(std::move(AdjustedRange));
994  StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
995  std::move(Wrapper));
996  setType(detail::CG::Kernel);
997 #endif
998  } else
999 #endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ &&
1000  // !DPCPP_HOST_DEVICE_OPENMP && !DPCPP_HOST_DEVICE_PERF_NATIVE &&
1001  // SYCL_LANGUAGE_VERSION >= 202001
1002  {
1003  (void)NumWorkItems;
1004  kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1005  PropertiesT>(KernelFunc);
1006 #ifndef __SYCL_DEVICE_ONLY__
1007  detail::checkValueRange<Dims>(NumWorkItems);
1008  MNDRDesc.set(std::move(NumWorkItems));
1009  StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1010  std::move(KernelFunc));
1011  setType(detail::CG::Kernel);
1012 #endif
1013  }
1014  }
1015 
1029  template <typename KernelName, typename KernelType, int Dims,
1030  typename PropertiesT>
1031  void parallel_for_impl(nd_range<Dims> ExecutionRange, PropertiesT,
1033  throwIfActionIsCreated();
1034  // TODO: Properties may change the kernel function, so in order to avoid
1035  // conflicts they should be included in the name.
1036  using NameT =
1038  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1039  using LambdaArgType =
1040  sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1041  // If user type is convertible from sycl::item/sycl::nd_item, use
1042  // sycl::item/sycl::nd_item to transport item information
1043  using TransformedArgType =
1044  typename TransformUserItemType<Dims, LambdaArgType>::type;
1045  (void)ExecutionRange;
1046  kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1047  PropertiesT>(KernelFunc);
1048 #ifndef __SYCL_DEVICE_ONLY__
1049  detail::checkValueRange<Dims>(ExecutionRange);
1050  MNDRDesc.set(std::move(ExecutionRange));
1051  StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1052  std::move(KernelFunc));
1053  setType(detail::CG::Kernel);
1054 #endif
1055  }
1056 
1064  template <int Dims>
1065  void parallel_for_impl(range<Dims> NumWorkItems, kernel Kernel) {
1066  throwIfActionIsCreated();
1067  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1068  detail::checkValueRange<Dims>(NumWorkItems);
1069  MNDRDesc.set(std::move(NumWorkItems));
1070  setType(detail::CG::Kernel);
1071  extractArgsAndReqs();
1072  MKernelName = getKernelName();
1073  }
1074 
1085  template <typename KernelName, typename KernelType, int Dims,
1086  typename PropertiesT =
1088  void parallel_for_work_group_lambda_impl(range<Dims> NumWorkGroups,
1090  throwIfActionIsCreated();
1091  // TODO: Properties may change the kernel function, so in order to avoid
1092  // conflicts they should be included in the name.
1093  using NameT =
1095  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1096  using LambdaArgType =
1097  sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1098  (void)NumWorkGroups;
1099  kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1100  PropertiesT>(KernelFunc);
1101 #ifndef __SYCL_DEVICE_ONLY__
1102  detail::checkValueRange<Dims>(NumWorkGroups);
1103  MNDRDesc.setNumWorkGroups(NumWorkGroups);
1104  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1105  setType(detail::CG::Kernel);
1106 #endif // __SYCL_DEVICE_ONLY__
1107  }
1108 
1121  template <typename KernelName, typename KernelType, int Dims,
1122  typename PropertiesT =
1124  void parallel_for_work_group_lambda_impl(range<Dims> NumWorkGroups,
1127  throwIfActionIsCreated();
1128  // TODO: Properties may change the kernel function, so in order to avoid
1129  // conflicts they should be included in the name.
1130  using NameT =
1132  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1133  using LambdaArgType =
1134  sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1135  (void)NumWorkGroups;
1136  (void)WorkGroupSize;
1137  kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1138  PropertiesT>(KernelFunc);
1139 #ifndef __SYCL_DEVICE_ONLY__
1140  nd_range<Dims> ExecRange =
1141  nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
1142  detail::checkValueRange<Dims>(ExecRange);
1143  MNDRDesc.set(std::move(ExecRange));
1144  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1145  setType(detail::CG::Kernel);
1146 #endif // __SYCL_DEVICE_ONLY__
1147  }
1148 
1149 #ifdef SYCL_LANGUAGE_VERSION
1150 #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]]
1151 #else
1152 #define __SYCL_KERNEL_ATTR__
1153 #endif
1154 
1155  // NOTE: the name of this function - "kernel_single_task" - is used by the
1156  // Front End to determine kernel invocation kind.
1157  template <typename KernelName, typename KernelType, typename... Props>
1158 #ifdef __SYCL_DEVICE_ONLY__
1159  [[__sycl_detail__::add_ir_attributes_function(
1160  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1161  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1162 #endif
1164  kernel_single_task(_KERNELFUNCPARAM(KernelFunc)) {
1165 #ifdef __SYCL_DEVICE_ONLY__
1166  KernelFunc();
1167 #else
1168  (void)KernelFunc;
1169 #endif
1170  }
1171 
1172  // NOTE: the name of this function - "kernel_single_task" - is used by the
1173  // Front End to determine kernel invocation kind.
1174  template <typename KernelName, typename KernelType, typename... Props>
1175 #ifdef __SYCL_DEVICE_ONLY__
1176  [[__sycl_detail__::add_ir_attributes_function(
1177  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1178  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1179 #endif
1181  kernel_single_task(_KERNELFUNCPARAM(KernelFunc), kernel_handler KH) {
1182 #ifdef __SYCL_DEVICE_ONLY__
1183  KernelFunc(KH);
1184 #else
1185  (void)KernelFunc;
1186  (void)KH;
1187 #endif
1188  }
1189 
1190  // NOTE: the name of these functions - "kernel_parallel_for" - are used by the
1191  // Front End to determine kernel invocation kind.
1192  template <typename KernelName, typename ElementType, typename KernelType,
1193  typename... Props>
1194 #ifdef __SYCL_DEVICE_ONLY__
1195  [[__sycl_detail__::add_ir_attributes_function(
1196  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1197  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1198 #endif
1200  kernel_parallel_for(_KERNELFUNCPARAM(KernelFunc)) {
1201 #ifdef __SYCL_DEVICE_ONLY__
1202  KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1203 #else
1204  (void)KernelFunc;
1205 #endif
1206  }
1207 
1208  // NOTE: the name of these functions - "kernel_parallel_for" - are used by the
1209  // Front End to determine kernel invocation kind.
1210  template <typename KernelName, typename ElementType, typename KernelType,
1211  typename... Props>
1212 #ifdef __SYCL_DEVICE_ONLY__
1213  [[__sycl_detail__::add_ir_attributes_function(
1214  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1215  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1216 #endif
1218  kernel_parallel_for(_KERNELFUNCPARAM(KernelFunc), kernel_handler KH) {
1219 #ifdef __SYCL_DEVICE_ONLY__
1220  KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1221 #else
1222  (void)KernelFunc;
1223  (void)KH;
1224 #endif
1225  }
1226 
1227  // NOTE: the name of this function - "kernel_parallel_for_work_group" - is
1228  // used by the Front End to determine kernel invocation kind.
1229  template <typename KernelName, typename ElementType, typename KernelType,
1230  typename... Props>
1231 #ifdef __SYCL_DEVICE_ONLY__
1232  [[__sycl_detail__::add_ir_attributes_function(
1233  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1234  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1235 #endif
1237  kernel_parallel_for_work_group(_KERNELFUNCPARAM(KernelFunc)) {
1238 #ifdef __SYCL_DEVICE_ONLY__
1239  KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1240 #else
1241  (void)KernelFunc;
1242 #endif
1243  }
1244 
1245  // NOTE: the name of this function - "kernel_parallel_for_work_group" - is
1246  // used by the Front End to determine kernel invocation kind.
1247  template <typename KernelName, typename ElementType, typename KernelType,
1248  typename... Props>
1249 #ifdef __SYCL_DEVICE_ONLY__
1250  [[__sycl_detail__::add_ir_attributes_function(
1251  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1252  ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1253 #endif
1255  kernel_parallel_for_work_group(_KERNELFUNCPARAM(KernelFunc),
1256  kernel_handler KH) {
1257 #ifdef __SYCL_DEVICE_ONLY__
1258  KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1259 #else
1260  (void)KernelFunc;
1261  (void)KH;
1262 #endif
1263  }
1264 
1265  template <typename... Props> struct KernelPropertiesUnpackerImpl {
1266  // Just pass extra Props... as template parameters to the underlying
1267  // Caller->* member functions. Don't have reflection so try to use
1268  // templates as much as possible to reduce the amount of boilerplate code
1269  // needed. All the type checks are expected to be done at the Caller's
1270  // methods side.
1271 
1272  template <typename... TypesToForward, typename... ArgsTy>
1273  static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
1274  h->kernel_single_task<TypesToForward..., Props...>(Args...);
1275  }
1276 
1277  template <typename... TypesToForward, typename... ArgsTy>
1278  static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
1279  h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
1280  }
1281 
1282  template <typename... TypesToForward, typename... ArgsTy>
1283  static void kernel_parallel_for_work_group_unpack(handler *h,
1284  ArgsTy... Args) {
1285  h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
1286  }
1287  };
1288 
1289  template <typename PropertiesT>
1290  struct KernelPropertiesUnpacker : public KernelPropertiesUnpackerImpl<> {
1291  // This should always fail outside the specialization below but must be
1292  // dependent to avoid failing even if not instantiated.
1293  static_assert(
1294  ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1295  "Template type is not a property list.");
1296  };
1297 
1298  template <typename... Props>
1299  struct KernelPropertiesUnpacker<
1301  : public KernelPropertiesUnpackerImpl<Props...> {};
1302 
1303  // Helper function to
1304  //
1305  // * Make use of the KernelPropertiesUnpacker above
1306  // * Decide if we need an extra kernel_handler parameter
1307  //
1308  // The interface uses a \p Lambda callback to propagate that information back
1309  // to the caller as we need the caller to communicate:
1310  //
1311  // * Name of the method to call
1312  // * Provide explicit template type parameters for the call
1313  //
1314  // Couldn't think of a better way to achieve both.
1315  template <typename KernelType, typename PropertiesT, bool HasKernelHandlerArg,
1316  typename FuncTy>
1317  void unpack(_KERNELFUNCPARAM(KernelFunc), FuncTy Lambda) {
1318 #ifdef __SYCL_DEVICE_ONLY__
1319  detail::CheckDeviceCopyable<KernelType>();
1320 #endif // __SYCL_DEVICE_ONLY__
1321  using MergedPropertiesT =
1322  typename detail::GetMergedKernelProperties<KernelType,
1323  PropertiesT>::type;
1324  using Unpacker = KernelPropertiesUnpacker<MergedPropertiesT>;
1325  if constexpr (HasKernelHandlerArg) {
1326  kernel_handler KH;
1327  Lambda(Unpacker{}, this, KernelFunc, KH);
1328  } else {
1329  Lambda(Unpacker{}, this, KernelFunc);
1330  }
1331  }
1332 
1333  // NOTE: to support kernel_handler argument in kernel lambdas, only
1334  // kernel_***_wrapper functions must be called in this code
1335 
1336  template <typename KernelName, typename KernelType,
1337  typename PropertiesT =
1339  void kernel_single_task_wrapper(_KERNELFUNCPARAM(KernelFunc)) {
1340  unpack<KernelType, PropertiesT,
1341  detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
1342  KernelFunc, [&](auto Unpacker, auto... args) {
1343  Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
1344  args...);
1345  });
1346  }
1347 
1348  template <typename KernelName, typename ElementType, typename KernelType,
1349  typename PropertiesT =
1351  void kernel_parallel_for_wrapper(_KERNELFUNCPARAM(KernelFunc)) {
1352  unpack<KernelType, PropertiesT,
1353  detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1354  ElementType>::value>(
1355  KernelFunc, [&](auto Unpacker, auto... args) {
1356  Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
1357  KernelType>(args...);
1358  });
1359  }
1360 
1361  template <typename KernelName, typename ElementType, typename KernelType,
1362  typename PropertiesT =
1364  void kernel_parallel_for_work_group_wrapper(_KERNELFUNCPARAM(KernelFunc)) {
1365  unpack<KernelType, PropertiesT,
1366  detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1367  ElementType>::value>(
1368  KernelFunc, [&](auto Unpacker, auto... args) {
1369  Unpacker.template kernel_parallel_for_work_group_unpack<
1370  KernelName, ElementType, KernelType>(args...);
1371  });
1372  }
1373 
1381  template <typename KernelName, typename KernelType,
1382  typename PropertiesT =
1384  void single_task_lambda_impl(_KERNELFUNCPARAM(KernelFunc)) {
1385  throwIfActionIsCreated();
1386  // TODO: Properties may change the kernel function, so in order to avoid
1387  // conflicts they should be included in the name.
1388  using NameT =
1389  typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1390  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1391  kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(KernelFunc);
1392 #ifndef __SYCL_DEVICE_ONLY__
1393  // No need to check if range is out of INT_MAX limits as it's compile-time
1394  // known constant.
1395  MNDRDesc.set(range<1>{1});
1396 
1397  StoreLambda<NameT, KernelType, /*Dims*/ 1, void>(KernelFunc);
1398  setType(detail::CG::Kernel);
1399 #endif
1400  }
1401 
1402  void setStateExplicitKernelBundle();
1403  void setStateSpecConstSet();
1404  bool isStateExplicitKernelBundle() const;
1405 
1406  std::shared_ptr<detail::kernel_bundle_impl>
1407  getOrInsertHandlerKernelBundle(bool Insert) const;
1408 
1409  void setHandlerKernelBundle(kernel Kernel);
1410 
1411  void setHandlerKernelBundle(
1412  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr);
1413 
1414  template <typename FuncT>
1416  detail::check_fn_signature<detail::remove_reference_t<FuncT>,
1417  void()>::value ||
1418  detail::check_fn_signature<detail::remove_reference_t<FuncT>,
1419  void(interop_handle)>::value>
1420  host_task_impl(FuncT &&Func) {
1421  throwIfActionIsCreated();
1422 
1423  MNDRDesc.set(range<1>(1));
1424  MArgs = std::move(MAssociatedAccesors);
1425 
1426  MHostTask.reset(new detail::HostTask(std::move(Func)));
1427 
1428  setType(detail::CG::CodeplayHostTask);
1429  }
1430 
1431 public:
1432  handler(const handler &) = delete;
1433  handler(handler &&) = delete;
1434  handler &operator=(const handler &) = delete;
1435  handler &operator=(handler &&) = delete;
1436 
1437  template <auto &SpecName>
1439  typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {
1440 
1441  setStateSpecConstSet();
1442 
1443  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
1444  getOrInsertHandlerKernelBundle(/*Insert=*/true);
1445 
1446  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1448  .set_specialization_constant<SpecName>(Value);
1449  }
1450 
1451  template <auto &SpecName>
1452  typename std::remove_reference_t<decltype(SpecName)>::value_type
1454 
1455  if (isStateExplicitKernelBundle())
1456  throw sycl::exception(make_error_code(errc::invalid),
1457  "Specialization constants cannot be read after "
1458  "explicitly setting the used kernel bundle");
1459 
1460  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImplPtr =
1461  getOrInsertHandlerKernelBundle(/*Insert=*/true);
1462 
1463  return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1465  .get_specialization_constant<SpecName>();
1466  }
1467 
1468  void
1469  use_kernel_bundle(const kernel_bundle<bundle_state::executable> &ExecBundle);
1470 
1479  template <typename DataT, int Dims, access::mode AccMode,
1480  access::target AccTarget, access::placeholder isPlaceholder>
1482  if (Acc.is_placeholder())
1483  associateWithHandler(&Acc, AccTarget);
1484  }
1485 
1489  void depends_on(event Event);
1490 
1494  void depends_on(const std::vector<event> &Events);
1495 
1496  template <typename T>
1499 
1500  template <typename U, typename T>
1501  using is_same_type = std::is_same<remove_cv_ref_t<U>, remove_cv_ref_t<T>>;
1502 
1503  template <typename T> struct ShouldEnableSetArg {
1504  static constexpr bool value =
1505  std::is_trivially_copyable<detail::remove_reference_t<T>>::value
1506 #if SYCL_LANGUAGE_VERSION && SYCL_LANGUAGE_VERSION <= 201707
1507  && std::is_standard_layout<detail::remove_reference_t<T>>::value
1508 #endif
1509  || is_same_type<sampler, T>::value // Sampler
1511  std::is_pointer<remove_cv_ref_t<T>>::value) // USM
1512  || is_same_type<cl_mem, T>::value; // Interop
1513  };
1514 
1521  template <typename T>
1523  set_arg(int ArgIndex, T &&Arg) {
1524  setArgHelper(ArgIndex, std::move(Arg));
1525  }
1526 
1527  template <typename DataT, int Dims, access::mode AccessMode,
1529  void
1530  set_arg(int ArgIndex,
1532  setArgHelper(ArgIndex, std::move(Arg));
1533  }
1534 
1535  template <typename DataT, int Dims>
1536  void set_arg(int ArgIndex, local_accessor<DataT, Dims> Arg) {
1537  setArgHelper(ArgIndex, std::move(Arg));
1538  }
1539 
1545  template <typename... Ts> void set_args(Ts &&...Args) {
1546  setArgsHelper(0, std::move(Args)...);
1547  }
1548 
1556  template <typename KernelName = detail::auto_name, typename KernelType>
1558  single_task_lambda_impl<KernelName>(KernelFunc);
1559  }
1560 
1561  template <typename KernelName = detail::auto_name, typename KernelType>
1563  parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
1564  }
1565 
1566  template <typename KernelName = detail::auto_name, typename KernelType>
1568  parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
1569  }
1570 
1571  template <typename KernelName = detail::auto_name, typename KernelType>
1573  parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(KernelFunc));
1574  }
1575 
1580  template <typename FuncT>
1582  "run_on_host_intel() is deprecated, use host_task() instead")
1583  void run_on_host_intel(FuncT Func) {
1584  throwIfActionIsCreated();
1585  // No need to check if range is out of INT_MAX limits as it's compile-time
1586  // known constant
1587  MNDRDesc.set(range<1>{1});
1588 
1589  MArgs = std::move(MAssociatedAccesors);
1590  MHostKernel.reset(new detail::HostKernel<FuncT, void, 1>(std::move(Func)));
1591  setType(detail::CG::RunOnHostIntel);
1592  }
1593 
1595  template <typename FuncT>
1598  void()>::value ||
1600  void(interop_handle)>::value>
1601  host_task(FuncT &&Func) {
1602  host_task_impl(Func);
1603  }
1604 
1618  template <typename KernelName = detail::auto_name, typename KernelType,
1619  int Dims>
1620  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL2020")
1621  void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
1623  throwIfActionIsCreated();
1624  using NameT =
1626  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1627  using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1628  (void)NumWorkItems;
1629  (void)WorkItemOffset;
1630  kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
1631 #ifndef __SYCL_DEVICE_ONLY__
1632  detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1633  MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1634  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1635  setType(detail::CG::Kernel);
1636 #endif
1637  }
1638 
1649  template <typename KernelName = detail::auto_name, typename KernelType,
1650  int Dims>
1653  parallel_for_work_group_lambda_impl<KernelName>(NumWorkGroups, KernelFunc);
1654  }
1655 
1668  template <typename KernelName = detail::auto_name, typename KernelType,
1669  int Dims>
1673  parallel_for_work_group_lambda_impl<KernelName>(NumWorkGroups,
1675  }
1676 
1683  void single_task(kernel Kernel) {
1684  throwIfActionIsCreated();
1685  // Ignore any set kernel bundles and use the one associated with the kernel
1686  setHandlerKernelBundle(Kernel);
1687  // No need to check if range is out of INT_MAX limits as it's compile-time
1688  // known constant
1689  MNDRDesc.set(range<1>{1});
1690  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1691  setType(detail::CG::Kernel);
1692  extractArgsAndReqs();
1693  MKernelName = getKernelName();
1694  }
1695 
1696  void parallel_for(range<1> NumWorkItems, kernel Kernel) {
1697  parallel_for_impl(NumWorkItems, Kernel);
1698  }
1699 
1700  void parallel_for(range<2> NumWorkItems, kernel Kernel) {
1701  parallel_for_impl(NumWorkItems, Kernel);
1702  }
1703 
1704  void parallel_for(range<3> NumWorkItems, kernel Kernel) {
1705  parallel_for_impl(NumWorkItems, Kernel);
1706  }
1707 
1716  template <int Dims>
1717  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
1718  void parallel_for(range<Dims> NumWorkItems, id<Dims> WorkItemOffset,
1719  kernel Kernel) {
1720  throwIfActionIsCreated();
1721  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1722  detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1723  MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1724  setType(detail::CG::Kernel);
1725  extractArgsAndReqs();
1726  MKernelName = getKernelName();
1727  }
1728 
1737  template <int Dims> void parallel_for(nd_range<Dims> NDRange, kernel Kernel) {
1738  throwIfActionIsCreated();
1739  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1740  detail::checkValueRange<Dims>(NDRange);
1741  MNDRDesc.set(std::move(NDRange));
1742  setType(detail::CG::Kernel);
1743  extractArgsAndReqs();
1744  MKernelName = getKernelName();
1745  }
1746 
1753  template <typename KernelName = detail::auto_name, typename KernelType>
1755  throwIfActionIsCreated();
1756  // Ignore any set kernel bundles and use the one associated with the kernel
1757  setHandlerKernelBundle(Kernel);
1758  using NameT =
1760  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1761  (void)Kernel;
1762  kernel_single_task<NameT>(KernelFunc);
1763 #ifndef __SYCL_DEVICE_ONLY__
1764  // No need to check if range is out of INT_MAX limits as it's compile-time
1765  // known constant
1766  MNDRDesc.set(range<1>{1});
1767  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1768  setType(detail::CG::Kernel);
1769  if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1770  extractArgsAndReqs();
1771  MKernelName = getKernelName();
1772  } else
1773  StoreLambda<NameT, KernelType, /*Dims*/ 1, void>(std::move(KernelFunc));
1774 #else
1775  detail::CheckDeviceCopyable<KernelType>();
1776 #endif
1777  }
1778 
1782  template <typename FuncT>
1783  __SYCL_DEPRECATED("interop_task() is deprecated, use host_task() instead")
1784  void interop_task(FuncT Func) {
1785 
1786  MInteropTask.reset(new detail::InteropTask(std::move(Func)));
1787  setType(detail::CG::CodeplayInteropTask);
1788  }
1789 
1797  template <typename KernelName = detail::auto_name, typename KernelType,
1798  int Dims>
1799  void parallel_for(kernel Kernel, range<Dims> NumWorkItems,
1801  throwIfActionIsCreated();
1802  // Ignore any set kernel bundles and use the one associated with the kernel
1803  setHandlerKernelBundle(Kernel);
1804  using NameT =
1806  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1807  using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1808  (void)Kernel;
1809  (void)NumWorkItems;
1810  kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
1811 #ifndef __SYCL_DEVICE_ONLY__
1812  detail::checkValueRange<Dims>(NumWorkItems);
1813  MNDRDesc.set(std::move(NumWorkItems));
1814  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1815  setType(detail::CG::Kernel);
1816  if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1817  extractArgsAndReqs();
1818  MKernelName = getKernelName();
1819  } else
1820  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1821  std::move(KernelFunc));
1822 #endif
1823  }
1824 
1834  template <typename KernelName = detail::auto_name, typename KernelType,
1835  int Dims>
1836  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
1837  void parallel_for(kernel Kernel, range<Dims> NumWorkItems,
1838  id<Dims> WorkItemOffset, _KERNELFUNCPARAM(KernelFunc)) {
1839  throwIfActionIsCreated();
1840  // Ignore any set kernel bundles and use the one associated with the kernel
1841  setHandlerKernelBundle(Kernel);
1842  using NameT =
1844  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1845  using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1846  (void)Kernel;
1847  (void)NumWorkItems;
1848  (void)WorkItemOffset;
1849  kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
1850 #ifndef __SYCL_DEVICE_ONLY__
1851  detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1852  MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1853  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1854  setType(detail::CG::Kernel);
1855  if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1856  extractArgsAndReqs();
1857  MKernelName = getKernelName();
1858  } else
1859  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1860  std::move(KernelFunc));
1861 #endif
1862  }
1863 
1873  template <typename KernelName = detail::auto_name, typename KernelType,
1874  int Dims>
1875  void parallel_for(kernel Kernel, nd_range<Dims> NDRange,
1877  throwIfActionIsCreated();
1878  // Ignore any set kernel bundles and use the one associated with the kernel
1879  setHandlerKernelBundle(Kernel);
1880  using NameT =
1882  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1883  using LambdaArgType =
1884  sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1885  (void)Kernel;
1886  (void)NDRange;
1887  kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
1888 #ifndef __SYCL_DEVICE_ONLY__
1889  detail::checkValueRange<Dims>(NDRange);
1890  MNDRDesc.set(std::move(NDRange));
1891  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1892  setType(detail::CG::Kernel);
1893  if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1894  extractArgsAndReqs();
1895  MKernelName = getKernelName();
1896  } else
1897  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1898  std::move(KernelFunc));
1899 #endif
1900  }
1901 
1915  template <typename KernelName = detail::auto_name, typename KernelType,
1916  int Dims>
1917  void parallel_for_work_group(kernel Kernel, range<Dims> NumWorkGroups,
1919  throwIfActionIsCreated();
1920  // Ignore any set kernel bundles and use the one associated with the kernel
1921  setHandlerKernelBundle(Kernel);
1922  using NameT =
1924  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1925  using LambdaArgType =
1926  sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1927  (void)Kernel;
1928  (void)NumWorkGroups;
1929  kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
1930 #ifndef __SYCL_DEVICE_ONLY__
1931  detail::checkValueRange<Dims>(NumWorkGroups);
1932  MNDRDesc.setNumWorkGroups(NumWorkGroups);
1933  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1934  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1935  setType(detail::CG::Kernel);
1936 #endif // __SYCL_DEVICE_ONLY__
1937  }
1938 
1954  template <typename KernelName = detail::auto_name, typename KernelType,
1955  int Dims>
1956  void parallel_for_work_group(kernel Kernel, range<Dims> NumWorkGroups,
1959  throwIfActionIsCreated();
1960  // Ignore any set kernel bundles and use the one associated with the kernel
1961  setHandlerKernelBundle(Kernel);
1962  using NameT =
1964  verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1965  using LambdaArgType =
1966  sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1967  (void)Kernel;
1968  (void)NumWorkGroups;
1969  (void)WorkGroupSize;
1970  kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
1971 #ifndef __SYCL_DEVICE_ONLY__
1972  nd_range<Dims> ExecRange =
1973  nd_range<Dims>(NumWorkGroups * WorkGroupSize, WorkGroupSize);
1974  detail::checkValueRange<Dims>(ExecRange);
1975  MNDRDesc.set(std::move(ExecRange));
1976  MKernel = detail::getSyclObjImpl(std::move(Kernel));
1977  StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1978  setType(detail::CG::Kernel);
1979 #endif // __SYCL_DEVICE_ONLY__
1980  }
1981 
1982  template <typename KernelName = detail::auto_name, typename KernelType,
1983  typename PropertiesT>
1987  single_task_lambda_impl<KernelName, KernelType, PropertiesT>(KernelFunc);
1988  }
1989 
1990  template <typename KernelName = detail::auto_name, typename KernelType,
1991  typename PropertiesT>
1994  parallel_for(range<1> NumWorkItems, PropertiesT,
1996  parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
1997  NumWorkItems, std::move(KernelFunc));
1998  }
1999 
2000  template <typename KernelName = detail::auto_name, typename KernelType,
2001  typename PropertiesT>
2004  parallel_for(range<2> NumWorkItems, PropertiesT,
2006  parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
2007  NumWorkItems, std::move(KernelFunc));
2008  }
2009 
2010  template <typename KernelName = detail::auto_name, typename KernelType,
2011  typename PropertiesT>
2014  parallel_for(range<3> NumWorkItems, PropertiesT,
2016  parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
2017  NumWorkItems, std::move(KernelFunc));
2018  }
2019 
2020  template <typename KernelName = detail::auto_name, typename KernelType,
2021  typename PropertiesT, int Dims>
2024  parallel_for(nd_range<Dims> Range, PropertiesT Properties,
2026  parallel_for_impl<KernelName>(Range, Properties, std::move(KernelFunc));
2027  }
2028 
2030 
2031  template <typename KernelName = detail::auto_name, int Dims,
2032  typename PropertiesT, typename... RestT>
2034  (sizeof...(RestT) > 1) &&
2035  detail::AreAllButLastReductions<RestT...>::value &&
2037  parallel_for(range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2038  detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
2039  std::forward<RestT>(Rest)...);
2040  }
2041 
2042  template <typename KernelName = detail::auto_name, int Dims,
2043  typename... RestT>
2045  parallel_for(range<Dims> Range, RestT &&...Rest) {
2046  parallel_for<KernelName>(
2048  std::forward<RestT>(Rest)...);
2049  }
2050 
2051  template <typename KernelName = detail::auto_name, int Dims,
2052  typename PropertiesT, typename... RestT>
2054  (sizeof...(RestT) > 1) &&
2055  detail::AreAllButLastReductions<RestT...>::value &&
2057  parallel_for(nd_range<Dims> Range, PropertiesT Properties, RestT &&...Rest) {
2058  detail::reduction_parallel_for<KernelName>(*this, Range, Properties,
2059  std::forward<RestT>(Rest)...);
2060  }
2061 
2062  template <typename KernelName = detail::auto_name, int Dims,
2063  typename... RestT>
2065  parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
2066  parallel_for<KernelName>(
2068  std::forward<RestT>(Rest)...);
2069  }
2070 
2072 
2073  template <typename KernelName = detail::auto_name, typename KernelType,
2074  int Dims, typename PropertiesT>
2075  void parallel_for_work_group(range<Dims> NumWorkGroups, PropertiesT,
2077  parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2078  PropertiesT>(NumWorkGroups, KernelFunc);
2079  }
2080 
2081  template <typename KernelName = detail::auto_name, typename KernelType,
2082  int Dims, typename PropertiesT>
2084  range<Dims> WorkGroupSize, PropertiesT,
2086  parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2087  PropertiesT>(NumWorkGroups,
2089  }
2090 
2091  // Clean up KERNELFUNC macro.
2092 #undef _KERNELFUNCPARAM
2093 
2094  // Explicit copy operations API
2095 
2103  template <typename T_Src, typename T_Dst, int Dims, access::mode AccessMode,
2104  access::target AccessTarget,
2105  access::placeholder IsPlaceholder = access::placeholder::false_t>
2107  std::shared_ptr<T_Dst> Dst) {
2108  throwIfActionIsCreated();
2109  static_assert(isValidTargetForExplicitOp(AccessTarget),
2110  "Invalid accessor target for the copy method.");
2111  static_assert(isValidModeForSourceAccessor(AccessMode),
2112  "Invalid accessor mode for the copy method.");
2113  // Make sure data shared_ptr points to is not released until we finish
2114  // work with it.
2115  MSharedPtrStorage.push_back(Dst);
2116  typename std::shared_ptr<T_Dst>::element_type *RawDstPtr = Dst.get();
2117  copy(Src, RawDstPtr);
2118  }
2119 
2127  template <typename T_Src, typename T_Dst, int Dims, access::mode AccessMode,
2128  access::target AccessTarget,
2129  access::placeholder IsPlaceholder = access::placeholder::false_t>
2130  void
2131  copy(std::shared_ptr<T_Src> Src,
2133  throwIfActionIsCreated();
2134  static_assert(isValidTargetForExplicitOp(AccessTarget),
2135  "Invalid accessor target for the copy method.");
2136  static_assert(isValidModeForDestinationAccessor(AccessMode),
2137  "Invalid accessor mode for the copy method.");
2138  // Make sure data shared_ptr points to is not released until we finish
2139  // work with it.
2140  MSharedPtrStorage.push_back(Src);
2141  typename std::shared_ptr<T_Src>::element_type *RawSrcPtr = Src.get();
2142  copy(RawSrcPtr, Dst);
2143  }
2144 
2152  template <typename T_Src, typename T_Dst, int Dims, access::mode AccessMode,
2153  access::target AccessTarget,
2154  access::placeholder IsPlaceholder = access::placeholder::false_t>
2156  T_Dst *Dst) {
2157  throwIfActionIsCreated();
2158  static_assert(isValidTargetForExplicitOp(AccessTarget),
2159  "Invalid accessor target for the copy method.");
2160  static_assert(isValidModeForSourceAccessor(AccessMode),
2161  "Invalid accessor mode for the copy method.");
2162 #ifndef __SYCL_DEVICE_ONLY__
2163  if (MIsHost) {
2164  // TODO: Temporary implementation for host. Should be handled by memory
2165  // manager.
2166  copyAccToPtrHost(Src, Dst);
2167  return;
2168  }
2169 #endif
2170  setType(detail::CG::CopyAccToPtr);
2171 
2173  detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase);
2174 
2175  MRequirements.push_back(AccImpl.get());
2176  MSrcPtr = static_cast<void *>(AccImpl.get());
2177  MDstPtr = static_cast<void *>(Dst);
2178  // Store copy of accessor to the local storage to make sure it is alive
2179  // until we finish
2180  MAccStorage.push_back(std::move(AccImpl));
2181  }
2182 
2190  template <typename T_Src, typename T_Dst, int Dims, access::mode AccessMode,
2191  access::target AccessTarget,
2192  access::placeholder IsPlaceholder = access::placeholder::false_t>
2193  void
2194  copy(const T_Src *Src,
2196  throwIfActionIsCreated();
2197  static_assert(isValidTargetForExplicitOp(AccessTarget),
2198  "Invalid accessor target for the copy method.");
2199  static_assert(isValidModeForDestinationAccessor(AccessMode),
2200  "Invalid accessor mode for the copy method.");
2201 #ifndef __SYCL_DEVICE_ONLY__
2202  if (MIsHost) {
2203  // TODO: Temporary implementation for host. Should be handled by memory
2204  // manager.
2205  copyPtrToAccHost(Src, Dst);
2206  return;
2207  }
2208 #endif
2209  setType(detail::CG::CopyPtrToAcc);
2210 
2212  detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase);
2213 
2214  MRequirements.push_back(AccImpl.get());
2215  MSrcPtr = const_cast<T_Src *>(Src);
2216  MDstPtr = static_cast<void *>(AccImpl.get());
2217  // Store copy of accessor to the local storage to make sure it is alive
2218  // until we finish
2219  MAccStorage.push_back(std::move(AccImpl));
2220  }
2221 
2229  template <
2230  typename T_Src, int Dims_Src, access::mode AccessMode_Src,
2231  access::target AccessTarget_Src, typename T_Dst, int Dims_Dst,
2232  access::mode AccessMode_Dst, access::target AccessTarget_Dst,
2233  access::placeholder IsPlaceholder_Src = access::placeholder::false_t,
2234  access::placeholder IsPlaceholder_Dst = access::placeholder::false_t>
2235  void copy(accessor<T_Src, Dims_Src, AccessMode_Src, AccessTarget_Src,
2236  IsPlaceholder_Src>
2237  Src,
2238  accessor<T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst,
2239  IsPlaceholder_Dst>
2240  Dst) {
2241  throwIfActionIsCreated();
2242  static_assert(isValidTargetForExplicitOp(AccessTarget_Src),
2243  "Invalid source accessor target for the copy method.");
2244  static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
2245  "Invalid destination accessor target for the copy method.");
2246  static_assert(isValidModeForSourceAccessor(AccessMode_Src),
2247  "Invalid source accessor mode for the copy method.");
2248  static_assert(isValidModeForDestinationAccessor(AccessMode_Dst),
2249  "Invalid destination accessor mode for the copy method.");
2250  if (Dst.get_size() < Src.get_size())
2251  throw sycl::invalid_object_error(
2252  "The destination accessor size is too small to copy the memory into.",
2253  PI_ERROR_INVALID_OPERATION);
2254 
2255  if (copyAccToAccHelper(Src, Dst))
2256  return;
2257  setType(detail::CG::CopyAccToAcc);
2258 
2259  detail::AccessorBaseHost *AccBaseSrc = (detail::AccessorBaseHost *)&Src;
2260  detail::AccessorImplPtr AccImplSrc = detail::getSyclObjImpl(*AccBaseSrc);
2261 
2262  detail::AccessorBaseHost *AccBaseDst = (detail::AccessorBaseHost *)&Dst;
2263  detail::AccessorImplPtr AccImplDst = detail::getSyclObjImpl(*AccBaseDst);
2264 
2265  MRequirements.push_back(AccImplSrc.get());
2266  MRequirements.push_back(AccImplDst.get());
2267  MSrcPtr = AccImplSrc.get();
2268  MDstPtr = AccImplDst.get();
2269  // Store copy of accessor to the local storage to make sure it is alive
2270  // until we finish
2271  MAccStorage.push_back(std::move(AccImplSrc));
2272  MAccStorage.push_back(std::move(AccImplDst));
2273  }
2274 
2279  template <typename T, int Dims, access::mode AccessMode,
2280  access::target AccessTarget,
2281  access::placeholder IsPlaceholder = access::placeholder::false_t>
2282  void
2284  throwIfActionIsCreated();
2285  static_assert(isValidTargetForExplicitOp(AccessTarget),
2286  "Invalid accessor target for the update_host method.");
2287  setType(detail::CG::UpdateHost);
2288 
2290  detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase);
2291 
2292  MDstPtr = static_cast<void *>(AccImpl.get());
2293  MRequirements.push_back(AccImpl.get());
2294  MAccStorage.push_back(std::move(AccImpl));
2295  }
2296 
2305  template <typename T, int Dims, access::mode AccessMode,
2306  access::target AccessTarget,
2307  access::placeholder IsPlaceholder = access::placeholder::false_t,
2308  typename PropertyListT = property_list>
2309  void
2311  Dst,
2312  const T &Pattern) {
2313  throwIfActionIsCreated();
2314  // TODO add check:T must be an integral scalar value or a SYCL vector type
2315  static_assert(isValidTargetForExplicitOp(AccessTarget),
2316  "Invalid accessor target for the fill method.");
2317  if (!MIsHost && (((Dims == 1) && isConstOrGlobal(AccessTarget)) ||
2318  isImageOrImageArray(AccessTarget))) {
2319  setType(detail::CG::Fill);
2320 
2322  detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase);
2323 
2324  MDstPtr = static_cast<void *>(AccImpl.get());
2325  MRequirements.push_back(AccImpl.get());
2326  MAccStorage.push_back(std::move(AccImpl));
2327 
2328  MPattern.resize(sizeof(T));
2329  auto PatternPtr = reinterpret_cast<T *>(MPattern.data());
2330  *PatternPtr = Pattern;
2331  } else {
2332 
2333  // TODO: Temporary implementation for host. Should be handled by memory
2334  // manger.
2335  range<Dims> Range = Dst.get_range();
2336  parallel_for<
2337  class __fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2338  Range, [=](id<Dims> Index) { Dst[Index] = Pattern; });
2339  }
2340  }
2341 
2348  template <typename T> void fill(void *Ptr, const T &Pattern, size_t Count) {
2349  throwIfActionIsCreated();
2350  static_assert(std::is_trivially_copyable<T>::value,
2351  "Pattern must be trivially copyable");
2352  parallel_for<class __usmfill<T>>(range<1>(Count), [=](id<1> Index) {
2353  T *CastedPtr = static_cast<T *>(Ptr);
2354  CastedPtr[Index] = Pattern;
2355  });
2356  }
2357 
2362  throwIfActionIsCreated();
2363  setType(detail::CG::Barrier);
2364  }
2365 
2369  __SYCL2020_DEPRECATED("use 'ext_oneapi_barrier' instead")
2370  void barrier() { ext_oneapi_barrier(); }
2371 
2378  void ext_oneapi_barrier(const std::vector<event> &WaitList);
2379 
2386  __SYCL2020_DEPRECATED("use 'ext_oneapi_barrier' instead")
2387  void barrier(const std::vector<event> &WaitList);
2388 
2399  void memcpy(void *Dest, const void *Src, size_t Count);
2400 
2411  template <typename T> void copy(const T *Src, T *Dest, size_t Count) {
2412  this->memcpy(Dest, Src, Count * sizeof(T));
2413  }
2414 
2423  void memset(void *Dest, int Value, size_t Count);
2424 
2431  void prefetch(const void *Ptr, size_t Count);
2432 
2439  void mem_advise(const void *Ptr, size_t Length, int Advice);
2440 
2458  template <typename T = unsigned char,
2459  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2460  void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
2461  size_t SrcPitch, size_t Width, size_t Height) {
2462  throwIfActionIsCreated();
2463  if (Width > DestPitch)
2464  throw sycl::exception(sycl::make_error_code(errc::invalid),
2465  "Destination pitch must be greater than or equal "
2466  "to the width specified in 'ext_oneapi_memcpy2d'");
2467  if (Width > SrcPitch)
2468  throw sycl::exception(sycl::make_error_code(errc::invalid),
2469  "Source pitch must be greater than or equal "
2470  "to the width specified in 'ext_oneapi_memcpy2d'");
2471  // If the backends supports 2D copy we use that. Otherwise we use a fallback
2472  // kernel.
2473  if (supportsUSMMemcpy2D())
2474  ext_oneapi_memcpy2d_impl(Dest, DestPitch, Src, SrcPitch, Width, Height);
2475  else
2476  commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2477  Height);
2478  }
2479 
2494  template <typename T>
2495  void ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest,
2496  size_t DestPitch, size_t Width, size_t Height) {
2497  if (Width > DestPitch)
2498  throw sycl::exception(sycl::make_error_code(errc::invalid),
2499  "Destination pitch must be greater than or equal "
2500  "to the width specified in 'ext_oneapi_copy2d'");
2501  if (Width > SrcPitch)
2502  throw sycl::exception(sycl::make_error_code(errc::invalid),
2503  "Source pitch must be greater than or equal "
2504  "to the width specified in 'ext_oneapi_copy2d'");
2505  // If the backends supports 2D copy we use that. Otherwise we use a fallback
2506  // kernel.
2507  if (supportsUSMMemcpy2D())
2508  ext_oneapi_memcpy2d_impl(Dest, DestPitch * sizeof(T), Src,
2509  SrcPitch * sizeof(T), Width * sizeof(T), Height);
2510  else
2511  commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2512  Height);
2513  }
2514 
2530  template <typename T = unsigned char,
2531  typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2532  void ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value,
2533  size_t Width, size_t Height) {
2534  throwIfActionIsCreated();
2535  if (Width > DestPitch)
2536  throw sycl::exception(sycl::make_error_code(errc::invalid),
2537  "Destination pitch must be greater than or equal "
2538  "to the width specified in 'ext_oneapi_memset2d'");
2539  T CharVal = static_cast<T>(Value);
2540  // If the backends supports 2D fill we use that. Otherwise we use a fallback
2541  // kernel.
2542  if (supportsUSMMemset2D())
2543  ext_oneapi_memset2d_impl(Dest, DestPitch, Value, Width, Height);
2544  else
2545  commonUSMFill2DFallbackKernel(Dest, DestPitch, CharVal, Width, Height);
2546  }
2547 
2560  template <typename T>
2561  void ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern,
2562  size_t Width, size_t Height) {
2563  throwIfActionIsCreated();
2564  static_assert(std::is_trivially_copyable<T>::value,
2565  "Pattern must be trivially copyable");
2566  if (Width > DestPitch)
2567  throw sycl::exception(sycl::make_error_code(errc::invalid),
2568  "Destination pitch must be greater than or equal "
2569  "to the width specified in 'ext_oneapi_fill2d'");
2570  // If the backends supports 2D fill we use that. Otherwise we use a fallback
2571  // kernel.
2572  if (supportsUSMFill2D())
2573  ext_oneapi_fill2d_impl(Dest, DestPitch, &Pattern, sizeof(T), Width,
2574  Height);
2575  else
2576  commonUSMFill2DFallbackKernel(Dest, DestPitch, Pattern, Width, Height);
2577  }
2578 
2579 private:
2580  std::shared_ptr<detail::handler_impl> MImpl;
2581  std::shared_ptr<detail::queue_impl> MQueue;
2586  std::vector<std::vector<char>> MArgsStorage;
2587  std::vector<detail::AccessorImplPtr> MAccStorage;
2588  std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
2589  std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
2590  mutable std::vector<std::shared_ptr<const void>> MSharedPtrStorage;
2592  std::vector<detail::ArgDesc> MArgs;
2596  std::vector<detail::ArgDesc> MAssociatedAccesors;
2598  std::vector<detail::AccessorImplHost *> MRequirements;
2600  detail::NDRDescT MNDRDesc;
2601  std::string MKernelName;
2603  std::shared_ptr<detail::kernel_impl> MKernel;
2607  detail::CG::CGTYPE MCGType = detail::CG::None;
2609  void *MSrcPtr = nullptr;
2611  void *MDstPtr = nullptr;
2613  size_t MLength = 0;
2615  std::vector<char> MPattern;
2617  std::unique_ptr<detail::HostKernelBase> MHostKernel;
2619  std::unique_ptr<detail::HostTask> MHostTask;
2620  detail::OSModuleHandle MOSModuleHandle = detail::OSUtil::ExeModuleHandle;
2621  // Storage for a lambda or function when using InteropTasks
2622  std::unique_ptr<detail::InteropTask> MInteropTask;
2624  std::vector<detail::EventImplPtr> MEvents;
2627  std::vector<detail::EventImplPtr> MEventsWaitWithBarrier;
2628 
2629  bool MIsHost = false;
2630 
2631  detail::code_location MCodeLoc = {};
2632  bool MIsFinalized = false;
2633  event MLastEvent;
2634 
2635  // Make queue_impl class friend to be able to call finalize method.
2636  friend class detail::queue_impl;
2637  // Make accessor class friend to keep the list of associated accessors.
2638  template <typename DataT, int Dims, access::mode AccMode,
2639  access::target AccTarget, access::placeholder isPlaceholder,
2640  typename PropertyListT>
2641  friend class accessor;
2643 
2644  template <typename DataT, int Dimensions, access::mode AccessMode,
2647  // Make stream class friend to be able to keep the list of associated streams
2648  friend class stream;
2649  friend class detail::stream_impl;
2650  // Make reduction friends to store buffers and arrays created for it
2651  // in handler from reduction methods.
2652  template <typename T, class BinaryOperation, int Dims, size_t Extent,
2653  typename RedOutVar>
2655 
2657  template <class FunctorTy>
2658  friend void detail::reduction::withAuxHandler(handler &CGH, FunctorTy Func);
2659 
2660  template <typename KernelName, detail::reduction::strategy Strategy, int Dims,
2661  typename PropertiesT, typename... RestT>
2663  PropertiesT Properties,
2664  RestT... Rest);
2665 
2666  template <typename KernelName, detail::reduction::strategy Strategy, int Dims,
2667  typename PropertiesT, typename... RestT>
2668  friend void
2670  PropertiesT Properties, RestT... Rest);
2671 
2672 #ifndef __SYCL_DEVICE_ONLY__
2675  access::target);
2676 #endif
2677 
2678  friend class ::MockHandler;
2679  friend class detail::queue_impl;
2680 
2681  bool DisableRangeRounding();
2682 
2683  bool RangeRoundingTrace();
2684 
2685  void GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
2686  size_t &MinRange);
2687 
2688  template <typename WrapperT, typename TransformedArgType, int Dims,
2689  typename KernelType,
2691  KernelType, TransformedArgType>::value> * = nullptr>
2692  auto getRangeRoundedKernelLambda(KernelType KernelFunc,
2693  range<Dims> NumWorkItems) {
2694  return detail::RoundedRangeKernelWithKH<TransformedArgType, Dims,
2695  KernelType>(NumWorkItems,
2696  KernelFunc);
2697  }
2698 
2699  template <typename WrapperT, typename TransformedArgType, int Dims,
2700  typename KernelType,
2702  KernelType, TransformedArgType>::value> * = nullptr>
2703  auto getRangeRoundedKernelLambda(KernelType KernelFunc,
2704  range<Dims> NumWorkItems) {
2706  NumWorkItems, KernelFunc);
2707  }
2708 
2709  // Checks if 2D memory operations are supported by the underlying platform.
2710  bool supportsUSMMemcpy2D();
2711  bool supportsUSMFill2D();
2712  bool supportsUSMMemset2D();
2713 
2714  // Helper function for getting a loose bound on work-items.
2715  id<2> computeFallbackKernelBounds(size_t Width, size_t Height);
2716 
2717  // Common function for launching a 2D USM memcpy kernel to avoid redefinitions
2718  // of the kernel from copy and memcpy.
2719  template <typename T>
2720  void commonUSMCopy2DFallbackKernel(const void *Src, size_t SrcPitch,
2721  void *Dest, size_t DestPitch, size_t Width,
2722  size_t Height) {
2723  // Limit number of work items to be resistant to big copies.
2724  id<2> Chunk = computeFallbackKernelBounds(Height, Width);
2725  id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
2726  parallel_for<class __usmmemcpy2d<T>>(
2727  range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
2728  T *CastedDest = static_cast<T *>(Dest);
2729  const T *CastedSrc = static_cast<const T *>(Src);
2730  for (uint32_t I = 0; I < Iterations[0]; ++I) {
2731  for (uint32_t J = 0; J < Iterations[1]; ++J) {
2732  id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
2733  if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
2734  CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
2735  CastedSrc[adjustedIndex[0] * SrcPitch + adjustedIndex[1]];
2736  }
2737  }
2738  }
2739  });
2740  }
2741 
2742  // Common function for launching a 2D USM fill kernel to avoid redefinitions
2743  // of the kernel from memset and fill.
2744  template <typename T>
2745  void commonUSMFill2DFallbackKernel(void *Dest, size_t DestPitch,
2746  const T &Pattern, size_t Width,
2747  size_t Height) {
2748  // Limit number of work items to be resistant to big fill operations.
2749  id<2> Chunk = computeFallbackKernelBounds(Height, Width);
2750  id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
2751  parallel_for<class __usmfill2d<T>>(
2752  range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
2753  T *CastedDest = static_cast<T *>(Dest);
2754  for (uint32_t I = 0; I < Iterations[0]; ++I) {
2755  for (uint32_t J = 0; J < Iterations[1]; ++J) {
2756  id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
2757  if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
2758  CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
2759  Pattern;
2760  }
2761  }
2762  }
2763  });
2764  }
2765 
2766  // Implementation of ext_oneapi_memcpy2d using command for native 2D memcpy.
2767  void ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch, const void *Src,
2768  size_t SrcPitch, size_t Width, size_t Height);
2769 
2770  // Untemplated version of ext_oneapi_fill2d using command for native 2D fill.
2771  void ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch, const void *Value,
2772  size_t ValueSize, size_t Width, size_t Height);
2773 
2774  // Implementation of ext_oneapi_memset2d using command for native 2D memset.
2775  void ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
2776  size_t Width, size_t Height);
2777 };
2778 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
2779 } // namespace sycl
The file contains implementations of accessor class.
CGTYPE
Type of the command group.
Definition: cg.hpp:55
void operator()(TransformedArgType Arg, kernel_handler KH) const
Definition: handler.hpp:262
RoundedRangeKernelWithKH(range< Dims > NumWorkItems, KernelType KernelFunc)
Definition: handler.hpp:259
RoundedRangeKernel(range< Dims > NumWorkItems, KernelType KernelFunc)
Definition: handler.hpp:241
void operator()(TransformedArgType Arg) const
Definition: handler.hpp:244
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
Definition: kernel.hpp:39
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:40
Command group handler class.
Definition: handler.hpp:312
void fill(void *Ptr, const T &Pattern, size_t Count)
Fills the specified memory with the specified pattern.
Definition: handler.hpp:2348
void parallel_for(range< 2 > NumWorkItems, kernel Kernel)
Definition: handler.hpp:1700
void parallel_for(kernel Kernel, range< Dims > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range.
Definition: handler.hpp:1799
void parallel_for_work_group(range< Dims > NumWorkGroups, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
}@
Definition: handler.hpp:2075
void single_task(_KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function as a function object type.
Definition: handler.hpp:1557
void parallel_for(nd_range< Dims > NDRange, kernel Kernel)
Defines and invokes a SYCL kernel function for the specified range and offsets.
Definition: handler.hpp:1737
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:1562
detail::enable_if_t< ShouldEnableSetArg< T >::value, void > set_arg(int ArgIndex, T &&Arg)
Sets argument for OpenCL interoperability kernels.
Definition: handler.hpp:1523
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2106
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
Definition: handler.hpp:2057
void ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.hpp:2532
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 3 > NumWorkItems, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2014
void parallel_for(range< 3 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:1572
handler & operator=(handler &&)=delete
handler(handler &&)=delete
void copy(accessor< T_Src, Dims_Src, AccessMode_Src, AccessTarget_Src, IsPlaceholder_Src > Src, accessor< T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst, IsPlaceholder_Dst > Dst)
Copies the content of memory object accessed by Src to the memory object accessed by Dst.
Definition: handler.hpp:2235
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
Definition: handler.hpp:1670
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2083
detail::enable_if_t< detail::check_fn_signature< detail::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< detail::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
Definition: handler.hpp:1601
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< Dims > Range, RestT &&...Rest)
Definition: handler.hpp:2045
void ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.hpp:2561
void parallel_for(range< 3 > NumWorkItems, kernel Kernel)
Definition: handler.hpp:1704
void parallel_for(range< 1 > NumWorkItems, kernel Kernel)
Definition: handler.hpp:1696
void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function.
Definition: handler.hpp:1754
handler & operator=(const handler &)=delete
void parallel_for(kernel Kernel, nd_range< Dims > NDRange, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range and offsets.
Definition: handler.hpp:1875
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
Definition: handler.hpp:1956
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 2 > NumWorkItems, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2004
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
Definition: handler.hpp:1481
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2310
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
Definition: handler.hpp:2283
void ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: handler.hpp:2495
std::is_same< remove_cv_ref_t< U >, remove_cv_ref_t< T > > is_same_type
Definition: handler.hpp:1501
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
Definition: handler.hpp:2460
void single_task(kernel Kernel)
Invokes a SYCL kernel.
Definition: handler.hpp:1683
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
Definition: handler.hpp:2065
typename detail::remove_cv_t< detail::remove_reference_t< T > > remove_cv_ref_t
Definition: handler.hpp:1498
void copy(const T_Src *Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
Definition: handler.hpp:2194
void copy(std::shared_ptr< T_Src > Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
Definition: handler.hpp:2131
handler(const handler &)=delete
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, T_Dst *Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2155
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2024
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
Definition: handler.hpp:1917
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2361
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 1 > NumWorkItems, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:1994
void parallel_for_work_group(range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
Definition: handler.hpp:1651
void set_specialization_constant(typename std::remove_reference_t< decltype(SpecName)>::value_type Value)
Definition: handler.hpp:1438
void set_args(Ts &&...Args)
Sets arguments for OpenCL interoperability kernels.
Definition: handler.hpp:1545
__SYCL_DEPRECATED("run_on_host_intel() is deprecated, use host_task() instead") void run_on_host_intel(FuncT Func)
Defines and invokes a SYCL kernel on host device.
Definition: handler.hpp:1581
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > single_task(PropertiesT, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:1986
void parallel_for(range< 2 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:1567
__SYCL_DEPRECATED("interop_task() is deprecated, use host_task() instead") void interop_task(FuncT Func)
Invokes a lambda on the host.
Definition: handler.hpp:1783
void set_arg(int ArgIndex, accessor< DataT, Dims, AccessMode, AccessTarget, IsPlaceholder > Arg)
Definition: handler.hpp:1530
std::remove_reference_t< decltype(SpecName)>::value_type get_specialization_constant() const
Definition: handler.hpp:1453
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
Reductions.
Definition: handler.hpp:2037
void set_arg(int ArgIndex, local_accessor< DataT, Dims > Arg)
Definition: handler.hpp:1536
A unique identifier of an item in an index space.
Definition: id.hpp:32
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:39
Provides an abstraction of a SYCL kernel.
Definition: kernel.hpp:71
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:36
id< dimensions > get_global_id() const
Definition: nd_item.hpp:40
range< dimensions > get_global_range() const
Definition: nd_item.hpp:92
id< dimensions > get_offset() const
Definition: nd_item.hpp:105
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:23
Objects of the property_list class are containers for the SYCL properties.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:24
size_t size() const
Definition: range.hpp:50
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL2020_DEPRECATED(message)
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:1262
#define __SYCL_KERNEL_ATTR__
Definition: handler.hpp:1152
#define _KERNELFUNCPARAM(a)
Definition: handler.hpp:55
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
void withAuxHandler(handler &CGH, FunctorTy Func)
Definition: reduction.hpp:809
void finalizeHandler(handler &CGH)
Definition: reduction.hpp:808
void reduction_parallel_for(handler &CGH, nd_range< Dims > NDRange, PropertiesT Properties, RestT... Rest)
Definition: reduction.hpp:2203
detail::enable_if_t< std::is_same< T, nd_range< Dims > >::value > checkValueRange(const T &V)
Definition: handler.hpp:226
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:15
size_t getLinearIndex(const T< Dims > &Index, const U< Dims > &Range)
Definition: common.hpp:433
typename std::remove_cv< T >::type remove_cv_t
id< 1 > getDelinearizedId(const range< 1 > &, size_t Index)
Definition: id.hpp:323
static Arg member_ptr_helper(RetType(Func::*)(Arg) const)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
SuggestedArgType argument_helper(...)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
Definition: cg_types.hpp:197
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
Definition: cg_types.hpp:183
std::shared_ptr< LocalAccessorImplHost > LocalAccessorImplPtr
Definition: accessor.hpp:520
typename std::remove_const< T >::type remove_const_t
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void memcpy(void *Dst, const void *Src, std::size_t Size)
void associateWithHandler(handler &, AccessorBaseHost *, access::target)
typename std::remove_reference< T >::type remove_reference_t
typename std::enable_if< B, T >::type enable_if_t
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:111
decltype(argument_helper< F, SuggestedArgType >(0)) lambda_arg_type
Definition: handler.hpp:125
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor.hpp:465
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
Definition: os_util.hpp:48
F * storePlainArg(std::vector< std::vector< char >> &ArgStorage, T &&Arg)
prefetch_impl< _B > prefetch
Definition: fpga_lsu.hpp:45
typename merged_properties< LHSPropertiesT, RHSPropertiesT >::type merged_properties_t
Definition: properties.hpp:222
properties< std::tuple< PropertyValueTs... > > properties_t
Definition: properties.hpp:209
properties< std::tuple<> > empty_properties_t
Definition: properties.hpp:204
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:2764
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:73
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
Definition: accessor.hpp:2765
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
Definition: accessor.hpp:2764
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:91
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
std::function< void(const sycl::nd_item< NDims > &)> KernelFunc
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
Predicate returning true if all template type parameters except the last one are reductions.
Definition: reduction.hpp:42