44 #include <type_traits>
47 #if !SYCL_LANGUAGE_VERSION || SYCL_LANGUAGE_VERSION < 202001
48 #define __SYCL_NONCONST_FUNCTOR__
53 #ifdef __SYCL_NONCONST_FUNCTOR__
54 #define _KERNELFUNCPARAMTYPE KernelType
56 #define _KERNELFUNCPARAMTYPE const KernelType &
58 #define _KERNELFUNCPARAM(a) _KERNELFUNCPARAMTYPE a
69 template <
typename T_Src,
typename T_Dst,
int Dims,
74 template <
typename T_Src,
typename T_Dst,
int Dims,
96 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
99 namespace ext::intel::experimental {
100 template <
class _name,
class _dataT, int32_t _min_capacity,
class _propertiesT,
113 class image_accessor;
114 template <
typename RetType,
typename Func,
typename Arg>
119 template <
typename RetType,
typename Func,
typename Arg>
128 template <
typename F,
typename SuggestedArgType>
131 template <typename F, typename SuggestedArgType>
134 template <typename F, typename SuggestedArgType>
149 #if __SYCL_ID_QUERIES_FIT_IN_INT__
150 template <
typename T>
struct NotIntMsg;
152 template <
int Dims>
struct NotIntMsg<
range<Dims>> {
153 constexpr
static const char *Msg =
154 "Provided range is out of integer limits. Pass "
155 "`-fno-sycl-id-queries-fit-in-int' to disable range check.";
158 template <
int Dims>
struct NotIntMsg<
id<Dims>> {
159 constexpr
static const char *Msg =
160 "Provided offset is out of integer limits. Pass "
161 "`-fno-sycl-id-queries-fit-in-int' to disable offset check.";
167 template <
typename KernelType,
typename PropertiesT,
typename Cond =
void>
171 template <
typename KernelType,
typename PropertiesT>
173 KernelType, PropertiesT,
174 std::enable_if_t<ext::oneapi::experimental::detail::
175 HasKernelPropertiesGetMethod<KernelType>::value>> {
181 "get(sycl::ext::oneapi::experimental::properties_tag) member in kernel "
182 "functor class must return a valid property list.");
187 #if __SYCL_ID_QUERIES_FIT_IN_INT__
188 template <
typename T,
typename ValT>
189 typename std::enable_if_t<std::is_same<ValT, size_t>::value ||
190 std::is_same<ValT, unsigned long long>::value>
191 checkValueRangeImpl(ValT V) {
192 static constexpr
size_t Limit =
195 throw runtime_error(NotIntMsg<T>::Msg, PI_ERROR_INVALID_VALUE);
199 template <
int Dims,
typename T>
200 typename std::enable_if_t<std::is_same_v<T, range<Dims>> ||
201 std::is_same_v<T, id<Dims>>>
203 #if __SYCL_ID_QUERIES_FIT_IN_INT__
204 for (
size_t Dim = 0; Dim < Dims; ++Dim)
205 checkValueRangeImpl<T>(V[Dim]);
208 unsigned long long Product = 1;
209 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
212 checkValueRangeImpl<T>(Product);
222 #if __SYCL_ID_QUERIES_FIT_IN_INT__
223 checkValueRange<Dims>(R);
224 checkValueRange<Dims>(O);
226 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
227 unsigned long long Sum = R[Dim] + O[Dim];
229 checkValueRangeImpl<range<Dims>>(Sum);
237 template <
int Dims,
typename T>
238 typename std::enable_if_t<std::is_same_v<T, nd_range<Dims>>>
240 #if __SYCL_ID_QUERIES_FIT_IN_INT__
241 checkValueRange<Dims>(V.get_global_range());
242 checkValueRange<Dims>(V.get_local_range());
243 checkValueRange<Dims>(V.get_offset());
245 checkValueRange<Dims>(V.get_global_range(), V.get_offset());
251 template <
typename TransformedArgType,
int Dims,
typename KernelType>
258 if (Arg[0] >= NumWorkItems[0])
260 Arg.set_allowed_range(NumWorkItems);
269 template <
typename TransformedArgType,
int Dims,
typename KernelType>
275 void operator()(TransformedArgType Arg, kernel_handler KH)
const {
276 if (Arg[0] >= NumWorkItems[0])
278 Arg.set_allowed_range(NumWorkItems);
287 using std::enable_if_t;
288 using sycl::detail::queue_impl;
331 handler(std::shared_ptr<detail::queue_impl> Queue,
bool IsHost);
342 handler(std::shared_ptr<detail::queue_impl> Queue,
343 std::shared_ptr<detail::queue_impl> PrimaryQueue,
344 std::shared_ptr<detail::queue_impl> SecondaryQueue,
bool IsHost);
347 template <
typename T,
typename F =
typename std::remove_const_t<
348 typename std::remove_reference_t<T>>>
349 F *storePlainArg(T &&Arg) {
350 CGData.MArgsStorage.emplace_back(
sizeof(T));
351 auto Storage =
reinterpret_cast<F *
>(CGData.MArgsStorage.back().data());
360 void throwIfActionIsCreated() {
361 if (detail::CG::None != getType())
362 throw sycl::runtime_error(
"Attempt to set multiple actions for the "
363 "command group. Command group must consist of "
364 "a single kernel or explicit memory operation.",
365 PI_ERROR_INVALID_OPERATION);
368 constexpr
static int AccessTargetMask = 0x7ff;
372 template <
typename KernelName,
typename KernelType>
373 void throwOnLocalAccessorMisuse()
const {
376 using KI = sycl::detail::KernelInfo<NameT>;
378 auto *KernelArgs = &KI::getParamDesc(0);
380 for (
unsigned I = 0; I < KI::getNumParams(); ++I) {
383 static_cast<access::target>(KernelArgs[I].info & AccessTargetMask);
384 if ((Kind == detail::kernel_param_kind_t::kind_accessor) &&
385 (AccTarget == target::local))
386 throw sycl::exception(
388 "A local accessor must not be used in a SYCL kernel function "
389 "that is invoked via single_task or via the simple form of "
390 "parallel_for that takes a range parameter.");
397 extractArgsAndReqsFromLambda(
char *LambdaPtr,
size_t KernelArgsNum,
402 void extractArgsAndReqs();
405 const int Size,
const size_t Index,
size_t &IndexShift,
406 bool IsKernelCreatedFromSource,
bool IsESIMD);
409 std::string getKernelName();
411 template <
typename LambdaNameT>
bool lambdaAndKernelHaveEqualName() {
417 assert(MKernel &&
"MKernel is not initialized");
419 const std::string KernelName = getKernelName();
420 return LambdaName == KernelName;
440 void addStream(
const std::shared_ptr<detail::stream_impl> &Stream) {
441 MStreamStorage.push_back(Stream);
449 void addReduction(
const std::shared_ptr<const void> &ReduObj);
454 bool is_host() {
return MIsHost; }
456 #ifdef __SYCL_DEVICE_ONLY__
468 template <
typename T,
typename... Ts>
469 void setArgsHelper(
int ArgIndex, T &&Arg, Ts &&...Args) {
470 set_arg(ArgIndex, std::move(Arg));
471 setArgsHelper(++ArgIndex, std::move(Args)...);
474 void setArgsHelper(
int) {}
476 void setLocalAccessorArgHelper(
int ArgIndex,
481 MLocalAccStorage.push_back(std::move(LocalAccImpl));
482 MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
483 static_cast<int>(access::target::local), ArgIndex);
489 void setArgHelper(
int ArgIndex,
492 #ifndef __SYCL_DEVICE_ONLY__
493 setLocalAccessorArgHelper(ArgIndex, Arg);
498 template <
typename DataT,
int Dims>
500 #ifndef __SYCL_DEVICE_ONLY__
501 setLocalAccessorArgHelper(ArgIndex, Arg);
508 typename std::enable_if_t<AccessTarget != access::target::local, void>
516 CGData.MRequirements.push_back(Req);
518 CGData.MAccStorage.push_back(std::move(AccImpl));
520 MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
521 static_cast<int>(AccessTarget), ArgIndex);
524 template <
typename T>
void setArgHelper(
int ArgIndex, T &&Arg) {
525 auto StoredArg =
static_cast<void *
>(storePlainArg(Arg));
527 if (!std::is_same<cl_mem, T>::value && std::is_pointer<T>::value) {
528 MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg,
529 sizeof(T), ArgIndex);
531 MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout,
532 StoredArg,
sizeof(T), ArgIndex);
536 void setArgHelper(
int ArgIndex, sampler &&Arg) {
537 auto StoredArg =
static_cast<void *
>(storePlainArg(Arg));
538 MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg,
539 sizeof(sampler), ArgIndex);
543 void verifyKernelInvoc(
const kernel &Kernel) {
544 std::ignore = Kernel;
562 template <
class KernelType,
class NormalizedKernelType,
int Dims>
563 KernelType *ResetHostKernelHelper(
const KernelType &
KernelFunc) {
564 NormalizedKernelType NormalizedKernel(
KernelFunc);
565 auto NormalizedKernelFunc =
566 std::function<void(
const sycl::nd_item<Dims> &)>(NormalizedKernel);
569 sycl::nd_item<Dims>, Dims>(NormalizedKernelFunc);
570 MHostKernel.reset(HostKernelPtr);
571 return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
576 template <
class KernelType,
typename ArgT,
int Dims>
577 std::enable_if_t<std::is_same_v<ArgT, sycl::id<Dims>>, KernelType *>
578 ResetHostKernel(
const KernelType &
KernelFunc) {
579 struct NormalizedKernelType {
580 KernelType MKernelFunc;
581 NormalizedKernelType(
const KernelType &
KernelFunc)
587 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
592 template <
class KernelType,
typename ArgT,
int Dims>
593 std::enable_if_t<std::is_same_v<ArgT, sycl::nd_item<Dims>>, KernelType *>
594 ResetHostKernel(
const KernelType &
KernelFunc) {
595 struct NormalizedKernelType {
596 KernelType MKernelFunc;
597 NormalizedKernelType(
const KernelType &
KernelFunc)
603 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
608 template <
class KernelType,
typename ArgT,
int Dims>
609 std::enable_if_t<std::is_same_v<ArgT, sycl::item<Dims, false>>, KernelType *>
610 ResetHostKernel(
const KernelType &
KernelFunc) {
611 struct NormalizedKernelType {
612 KernelType MKernelFunc;
613 NormalizedKernelType(
const KernelType &
KernelFunc)
616 sycl::item<Dims, false> Item = detail::Builder::createItem<Dims, false>(
621 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
626 template <
class KernelType,
typename ArgT,
int Dims>
627 std::enable_if_t<std::is_same_v<ArgT, sycl::item<Dims, true>>, KernelType *>
628 ResetHostKernel(
const KernelType &
KernelFunc) {
629 struct NormalizedKernelType {
630 KernelType MKernelFunc;
631 NormalizedKernelType(
const KernelType &
KernelFunc)
634 sycl::item<Dims, true> Item = detail::Builder::createItem<Dims, true>(
639 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
644 template <
class KernelType,
typename ArgT,
int Dims>
645 typename std::enable_if_t<std::is_same_v<ArgT, void>, KernelType *>
646 ResetHostKernel(
const KernelType &
KernelFunc) {
647 struct NormalizedKernelType {
648 KernelType MKernelFunc;
649 NormalizedKernelType(
const KernelType &
KernelFunc)
656 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
664 template <
class KernelType,
typename ArgT,
int Dims>
665 std::enable_if_t<std::is_same_v<ArgT, sycl::group<Dims>>, KernelType *>
666 ResetHostKernel(
const KernelType &
KernelFunc) {
669 return (KernelType *)(MHostKernel->getPtr());
679 void verifyUsedKernelBundle(
const std::string &KernelName);
687 template <
typename KernelName,
typename KernelType,
int Dims,
688 typename LambdaArgType>
691 constexpr
bool IsCallableWithKernelHandler =
693 LambdaArgType>::value;
695 if (IsCallableWithKernelHandler && MIsHost) {
696 throw sycl::feature_not_supported(
697 "kernel_handler is not yet supported by host device.",
698 PI_ERROR_INVALID_OPERATION);
701 KernelType *KernelPtr =
702 ResetHostKernel<KernelType, LambdaArgType, Dims>(
KernelFunc);
704 constexpr
bool KernelHasName =
705 KI::getName() !=
nullptr && KI::getName()[0] !=
'\0';
711 !KernelHasName ||
sizeof(
KernelFunc) == KI::getKernelSize(),
712 "Unexpected kernel lambda size. This can be caused by an "
713 "external host compiler producing a lambda with an "
714 "unexpected layout. This is a limitation of the compiler."
715 "In many cases the difference is related to capturing constexpr "
716 "variables. In such cases removing constexpr specifier aligns the "
717 "captures between the host compiler and the device compiler."
719 "In case of MSVC, passing "
720 "-fsycl-host-compiler-options='/std:c++latest' "
728 extractArgsAndReqsFromLambda(
reinterpret_cast<char *
>(KernelPtr),
729 KI::getNumParams(), &KI::getParamDesc(0),
731 MKernelName = KI::getName();
736 MArgs = std::move(MAssociatedAccesors);
741 if (IsCallableWithKernelHandler) {
742 getOrInsertHandlerKernelBundle(
true);
749 template <
typename PropertiesT =
751 void processProperties(PropertiesT Props) {
754 "Template type is not a property list.");
773 template <
int Dims_Src,
int Dims_Dst>
776 if (Dims_Src > Dims_Dst)
778 for (
size_t I = 0; I < Dims_Src; ++I)
789 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
793 std::enable_if_t<(DimSrc > 0) && (DimDst > 0),
bool>
797 IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
800 range<1> LinearizedRange(Src.size());
802 class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
803 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
804 LinearizedRange, [=](
id<1> Id) {
805 size_t Index = Id[0];
808 Dst[DstId] = Src[SrcId];
820 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
824 std::enable_if_t<DimSrc == 0 || DimDst == 0, bool>
831 class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
832 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
833 [=]() { *(Dst.get_pointer()) = *(Src.get_pointer()); });
837 #ifndef __SYCL_DEVICE_ONLY__
838 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
845 std::enable_if_t<(Dim > 0)>
850 class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
853 using TSrcNonConst =
typename std::remove_const_t<TSrc>;
854 (
reinterpret_cast<TSrcNonConst *
>(Dst))[LinearIndex] = Src[Index];
863 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
865 std::enable_if_t<Dim == 0>
868 single_task<class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
870 using TSrcNonConst =
typename std::remove_const_t<TSrc>;
871 *(
reinterpret_cast<TSrcNonConst *
>(Dst)) = *(Src.get_pointer());
879 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
881 std::enable_if_t<(Dim > 0)>
882 copyPtrToAccHost(TSrc *Src,
886 class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
889 Dst[Index] = (
reinterpret_cast<const TDst *
>(Src))[LinearIndex];
898 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
900 std::enable_if_t<Dim == 0>
901 copyPtrToAccHost(TSrc *Src,
903 single_task<class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
905 *(Dst.get_pointer()) = *(
reinterpret_cast<const TDst *
>(Src));
908 #endif // __SYCL_DEVICE_ONLY__
910 constexpr
static bool isConstOrGlobal(
access::target AccessTarget) {
911 return AccessTarget == access::target::device ||
912 AccessTarget == access::target::constant_buffer;
915 constexpr
static bool isImageOrImageArray(
access::target AccessTarget) {
916 return AccessTarget == access::target::image ||
917 AccessTarget == access::target::image_array;
920 constexpr
static bool
922 return isConstOrGlobal(AccessTarget) || isImageOrImageArray(AccessTarget);
930 constexpr
static bool
935 AccessMode == access::mode::discard_read_write;
939 constexpr
static bool isBackendSupportedFillSize(
size_t Size) {
940 return Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16 ||
941 Size == 32 || Size == 64 || Size == 128;
944 template <
int Dims,
typename LambdaArgType>
struct TransformUserItemType {
945 using type = std::conditional_t<
946 std::is_convertible_v<nd_item<Dims>, LambdaArgType>,
nd_item<Dims>,
947 std::conditional_t<std::is_convertible_v<item<Dims>, LambdaArgType>,
962 template <
typename KernelName,
typename KernelType,
int Dims,
963 typename PropertiesT =
965 void parallel_for_lambda_impl(
range<Dims> NumWorkItems, PropertiesT Props,
967 throwIfActionIsCreated();
968 throwOnLocalAccessorMisuse<KernelName, KernelType>();
969 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
974 using TransformedArgType = std::conditional_t<
975 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
976 typename TransformUserItemType<Dims, LambdaArgType>::type>;
988 #if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
989 !defined(DPCPP_HOST_DEVICE_OPENMP) && \
990 !defined(DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
992 size_t MinFactorX = 16;
994 size_t GoodFactorX = 32;
996 size_t MinRangeX = 1024;
1000 this->GetRangeRoundingSettings(MinFactorX, GoodFactorX, MinRangeX);
1018 std::string KName =
typeid(NameT *).name();
1020 bool DisableRounding =
1021 this->DisableRangeRounding() ||
1022 (KI::getName() ==
nullptr || KI::getName()[0] ==
'\0');
1027 if (!DisableRounding && (NumWorkItems[0] >= MinRangeX) &&
1028 (NumWorkItems[0] % MinFactorX != 0)) {
1034 ((NumWorkItems[0] + GoodFactorX - 1) / GoodFactorX) * GoodFactorX;
1035 if (this->RangeRoundingTrace())
1036 std::cout <<
"parallel_for range adjusted from " << NumWorkItems[0]
1037 <<
" to " << NewValX << std::endl;
1041 getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
1044 using KName = std::conditional_t<std::is_same<KernelType, NameT>::value,
1045 decltype(Wrapper), NameWT>;
1048 AdjustedRange.set_range_dim0(NewValX);
1049 kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
1050 PropertiesT>(Wrapper);
1051 #ifndef __SYCL_DEVICE_ONLY__
1052 detail::checkValueRange<Dims>(AdjustedRange);
1053 MNDRDesc.set(std::move(AdjustedRange));
1054 StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
1055 std::move(Wrapper));
1056 setType(detail::CG::Kernel);
1059 #endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ &&
1064 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1066 #ifndef __SYCL_DEVICE_ONLY__
1067 processProperties<PropertiesT>(Props);
1068 detail::checkValueRange<Dims>(NumWorkItems);
1069 MNDRDesc.set(std::move(NumWorkItems));
1070 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1072 setType(detail::CG::Kernel);
1090 template <
typename KernelName,
typename KernelType,
int Dims,
1091 typename PropertiesT>
1092 void parallel_for_impl(
nd_range<Dims> ExecutionRange, PropertiesT Props,
1094 throwIfActionIsCreated();
1100 using LambdaArgType =
1101 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1104 using TransformedArgType =
1105 typename TransformUserItemType<Dims, LambdaArgType>::type;
1106 (void)ExecutionRange;
1107 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1109 #ifndef __SYCL_DEVICE_ONLY__
1110 processProperties<PropertiesT>(Props);
1111 detail::checkValueRange<Dims>(ExecutionRange);
1112 MNDRDesc.set(std::move(ExecutionRange));
1113 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1115 setType(detail::CG::Kernel);
1128 throwIfActionIsCreated();
1130 detail::checkValueRange<Dims>(NumWorkItems);
1131 MNDRDesc.set(std::move(NumWorkItems));
1132 setType(detail::CG::Kernel);
1133 extractArgsAndReqs();
1134 MKernelName = getKernelName();
1147 template <
typename KernelName,
typename KernelType,
int Dims,
1148 typename PropertiesT =
1150 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1153 throwIfActionIsCreated();
1159 using LambdaArgType =
1160 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1161 (void)NumWorkGroups;
1162 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1164 #ifndef __SYCL_DEVICE_ONLY__
1165 processProperties<PropertiesT>(Props);
1166 detail::checkValueRange<Dims>(NumWorkGroups);
1167 MNDRDesc.setNumWorkGroups(NumWorkGroups);
1168 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1169 setType(detail::CG::Kernel);
1170 #endif // __SYCL_DEVICE_ONLY__
1185 template <
typename KernelName,
typename KernelType,
int Dims,
1186 typename PropertiesT =
1188 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1192 throwIfActionIsCreated();
1198 using LambdaArgType =
1199 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1200 (void)NumWorkGroups;
1202 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1204 #ifndef __SYCL_DEVICE_ONLY__
1205 processProperties<PropertiesT>(Props);
1208 detail::checkValueRange<Dims>(ExecRange);
1209 MNDRDesc.set(std::move(ExecRange));
1210 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1211 setType(detail::CG::Kernel);
1212 #endif // __SYCL_DEVICE_ONLY__
1215 #ifdef SYCL_LANGUAGE_VERSION
1216 #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]]
1218 #define __SYCL_KERNEL_ATTR__
1223 template <
typename KernelName,
typename KernelType,
typename... Props>
1224 #ifdef __SYCL_DEVICE_ONLY__
1225 [[__sycl_detail__::add_ir_attributes_function(
1227 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1229 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1233 #ifdef __SYCL_DEVICE_ONLY__
1242 template <
typename KernelName,
typename KernelType,
typename... Props>
1243 #ifdef __SYCL_DEVICE_ONLY__
1244 [[__sycl_detail__::add_ir_attributes_function(
1246 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1248 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1252 #ifdef __SYCL_DEVICE_ONLY__
1262 template <
typename KernelName,
typename ElementType,
typename KernelType,
1264 #ifdef __SYCL_DEVICE_ONLY__
1265 [[__sycl_detail__::add_ir_attributes_function(
1266 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1267 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1271 #ifdef __SYCL_DEVICE_ONLY__
1272 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1280 template <
typename KernelName,
typename ElementType,
typename KernelType,
1282 #ifdef __SYCL_DEVICE_ONLY__
1283 [[__sycl_detail__::add_ir_attributes_function(
1284 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1285 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1289 #ifdef __SYCL_DEVICE_ONLY__
1290 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1299 template <
typename KernelName,
typename ElementType,
typename KernelType,
1301 #ifdef __SYCL_DEVICE_ONLY__
1302 [[__sycl_detail__::add_ir_attributes_function(
1303 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1304 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1308 #ifdef __SYCL_DEVICE_ONLY__
1309 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1317 template <
typename KernelName,
typename ElementType,
typename KernelType,
1319 #ifdef __SYCL_DEVICE_ONLY__
1320 [[__sycl_detail__::add_ir_attributes_function(
1321 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1322 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1326 kernel_handler KH) {
1327 #ifdef __SYCL_DEVICE_ONLY__
1328 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1335 template <
typename... Props>
struct KernelPropertiesUnpackerImpl {
1342 template <
typename... TypesToForward,
typename... ArgsTy>
1343 static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
1344 h->kernel_single_task<TypesToForward..., Props...>(Args...);
1347 template <
typename... TypesToForward,
typename... ArgsTy>
1348 static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
1349 h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
1352 template <
typename... TypesToForward,
typename... ArgsTy>
1353 static void kernel_parallel_for_work_group_unpack(handler *h,
1355 h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
1359 template <
typename PropertiesT>
1360 struct KernelPropertiesUnpacker :
public KernelPropertiesUnpackerImpl<> {
1364 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1365 "Template type is not a property list.");
1368 template <
typename... Props>
1369 struct KernelPropertiesUnpacker<
1371 :
public KernelPropertiesUnpackerImpl<Props...> {};
1385 template <
typename KernelType,
typename PropertiesT,
bool HasKernelHandlerArg,
1388 #ifdef __SYCL_DEVICE_ONLY__
1389 detail::CheckDeviceCopyable<KernelType>();
1390 #endif // __SYCL_DEVICE_ONLY__
1391 using MergedPropertiesT =
1392 typename detail::GetMergedKernelProperties<KernelType,
1394 using Unpacker = KernelPropertiesUnpacker<MergedPropertiesT>;
1396 if constexpr (ext::oneapi::experimental::detail::
1397 HasKernelPropertiesGetMethod<
1400 KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
1402 if constexpr (HasKernelHandlerArg) {
1413 template <
typename KernelName,
typename KernelType,
1414 typename PropertiesT =
1417 unpack<KernelType, PropertiesT,
1418 detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
1419 KernelFunc, [&](
auto Unpacker,
auto... args) {
1420 Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
1425 template <
typename KernelName,
typename ElementType,
typename KernelType,
1426 typename PropertiesT =
1429 unpack<KernelType, PropertiesT,
1430 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1431 ElementType>::value>(
1432 KernelFunc, [&](
auto Unpacker,
auto... args) {
1433 Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
1434 KernelType>(args...);
1438 template <
typename KernelName,
typename ElementType,
typename KernelType,
1439 typename PropertiesT =
1442 unpack<KernelType, PropertiesT,
1443 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1444 ElementType>::value>(
1445 KernelFunc, [&](
auto Unpacker,
auto... args) {
1446 Unpacker.template kernel_parallel_for_work_group_unpack<
1447 KernelName, ElementType, KernelType>(args...);
1458 template <
typename KernelName,
typename KernelType,
1459 typename PropertiesT =
1461 void single_task_lambda_impl(PropertiesT Props,
1463 throwIfActionIsCreated();
1464 throwOnLocalAccessorMisuse<KernelName, KernelType>();
1468 typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1469 verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1470 kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(
KernelFunc);
1471 #ifndef __SYCL_DEVICE_ONLY__
1474 MNDRDesc.set(range<1>{1});
1475 processProperties<PropertiesT>(Props);
1476 StoreLambda<NameT, KernelType, 1,
void>(
KernelFunc);
1477 setType(detail::CG::Kernel);
1481 void setStateExplicitKernelBundle();
1482 void setStateSpecConstSet();
1483 bool isStateExplicitKernelBundle()
const;
1485 std::shared_ptr<detail::kernel_bundle_impl>
1486 getOrInsertHandlerKernelBundle(
bool Insert)
const;
1488 void setHandlerKernelBundle(kernel Kernel);
1490 void setHandlerKernelBundle(
1491 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr);
1493 template <
typename FuncT>
1494 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
1496 detail::check_fn_signature<std::remove_reference_t<FuncT>,
1497 void(interop_handle)>::value>
1498 host_task_impl(FuncT &&Func) {
1499 throwIfActionIsCreated();
1501 MNDRDesc.set(range<1>(1));
1502 MArgs = std::move(MAssociatedAccesors);
1504 MHostTask.reset(
new detail::HostTask(std::move(Func)));
1506 setType(detail::CG::CodeplayHostTask);
1510 handler(
const handler &) =
delete;
1511 handler(handler &&) =
delete;
1512 handler &
operator=(
const handler &) =
delete;
1513 handler &
operator=(handler &&) =
delete;
1515 template <auto &SpecName>
1517 typename std::remove_reference_t<decltype(SpecName)>::value_type Value) {
1519 setStateSpecConstSet();
1522 getOrInsertHandlerKernelBundle(
true);
1524 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1526 .set_specialization_constant<SpecName>(Value);
1529 template <auto &SpecName>
1530 typename std::remove_reference_t<decltype(SpecName)>::value_type
1533 if (isStateExplicitKernelBundle())
1535 "Specialization constants cannot be read after "
1536 "explicitly setting the used kernel bundle");
1539 getOrInsertHandlerKernelBundle(
true);
1541 return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1543 .get_specialization_constant<SpecName>();
1557 template <
typename DataT,
int Dims,
access::mode AccMode,
1562 "require() cannot be called on empty accessors");
1563 if (Acc.is_placeholder())
1570 void depends_on(
event Event);
1575 void depends_on(
const std::vector<event> &Events);
1577 template <
typename T>
1580 template <
typename U,
typename T>
1584 static constexpr
bool value =
1585 std::is_trivially_copyable_v<std::remove_reference_t<T>>
1586 #if SYCL_LANGUAGE_VERSION && SYCL_LANGUAGE_VERSION <= 201707
1587 && std::is_standard_layout<std::remove_reference_t<T>>::value
1591 std::is_pointer_v<remove_cv_ref_t<T>>)
1601 template <
typename T>
1602 typename std::enable_if_t<ShouldEnableSetArg<T>::value,
void>
1604 setArgHelper(ArgIndex, std::move(Arg));
1612 setArgHelper(ArgIndex, std::move(Arg));
1615 template <
typename DataT,
int Dims>
1617 setArgHelper(ArgIndex, std::move(Arg));
1625 template <
typename... Ts>
void set_args(Ts &&...Args) {
1626 setArgsHelper(0, std::move(Args)...);
1636 template <
typename KernelName = detail::auto_name,
typename KernelType>
1638 single_task_lambda_impl<KernelName>(
1642 template <
typename KernelName = detail::auto_name,
typename KernelType>
1644 parallel_for_lambda_impl<KernelName>(
1649 template <
typename KernelName = detail::auto_name,
typename KernelType>
1651 parallel_for_lambda_impl<KernelName>(
1656 template <
typename KernelName = detail::auto_name,
typename KernelType>
1658 parallel_for_lambda_impl<KernelName>(
1667 template <
typename FuncT>
1669 "run_on_host_intel() is deprecated, use host_task() instead")
1670 void run_on_host_intel(FuncT Func) {
1671 throwIfActionIsCreated();
1676 MArgs = std::move(MAssociatedAccesors);
1678 setType(detail::CG::RunOnHostIntel);
1682 template <
typename FuncT>
1683 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
1688 host_task_impl(Func);
1707 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
1709 throwIfActionIsCreated();
1713 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1715 (void)WorkItemOffset;
1716 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1717 #ifndef __SYCL_DEVICE_ONLY__
1718 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1719 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1720 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1721 setType(detail::CG::Kernel);
1739 parallel_for_work_group_lambda_impl<KernelName>(
1761 parallel_for_work_group_lambda_impl<KernelName>(
1773 throwIfActionIsCreated();
1775 setHandlerKernelBundle(Kernel);
1780 setType(detail::CG::Kernel);
1781 extractArgsAndReqs();
1782 MKernelName = getKernelName();
1786 parallel_for_impl(NumWorkItems, Kernel);
1790 parallel_for_impl(NumWorkItems, Kernel);
1794 parallel_for_impl(NumWorkItems, Kernel);
1807 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
1809 throwIfActionIsCreated();
1811 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1812 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1813 setType(detail::CG::Kernel);
1814 extractArgsAndReqs();
1815 MKernelName = getKernelName();
1827 throwIfActionIsCreated();
1829 detail::checkValueRange<Dims>(NDRange);
1830 MNDRDesc.set(std::move(NDRange));
1831 setType(detail::CG::Kernel);
1832 extractArgsAndReqs();
1833 MKernelName = getKernelName();
1842 template <
typename KernelName = detail::auto_name,
typename KernelType>
1844 throwIfActionIsCreated();
1846 setHandlerKernelBundle(Kernel);
1852 #ifndef __SYCL_DEVICE_ONLY__
1857 setType(detail::CG::Kernel);
1858 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1859 extractArgsAndReqs();
1860 MKernelName = getKernelName();
1862 StoreLambda<NameT, KernelType, 1,
void>(std::move(
KernelFunc));
1864 detail::CheckDeviceCopyable<KernelType>();
1871 template <
typename FuncT>
1873 void interop_task(FuncT Func) {
1876 setType(detail::CG::CodeplayInteropTask);
1890 throwIfActionIsCreated();
1892 setHandlerKernelBundle(Kernel);
1896 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1899 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1900 #ifndef __SYCL_DEVICE_ONLY__
1901 detail::checkValueRange<Dims>(NumWorkItems);
1902 MNDRDesc.set(std::move(NumWorkItems));
1904 setType(detail::CG::Kernel);
1905 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1906 extractArgsAndReqs();
1907 MKernelName = getKernelName();
1909 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1928 throwIfActionIsCreated();
1930 setHandlerKernelBundle(Kernel);
1934 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1937 (void)WorkItemOffset;
1938 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1939 #ifndef __SYCL_DEVICE_ONLY__
1940 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1941 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1943 setType(detail::CG::Kernel);
1944 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1945 extractArgsAndReqs();
1946 MKernelName = getKernelName();
1948 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1966 throwIfActionIsCreated();
1968 setHandlerKernelBundle(Kernel);
1972 using LambdaArgType =
1973 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1976 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1977 #ifndef __SYCL_DEVICE_ONLY__
1978 detail::checkValueRange<Dims>(NDRange);
1979 MNDRDesc.set(std::move(NDRange));
1981 setType(detail::CG::Kernel);
1982 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1983 extractArgsAndReqs();
1984 MKernelName = getKernelName();
1986 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2008 throwIfActionIsCreated();
2010 setHandlerKernelBundle(Kernel);
2014 using LambdaArgType =
2015 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2017 (void)NumWorkGroups;
2018 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
2019 #ifndef __SYCL_DEVICE_ONLY__
2020 detail::checkValueRange<Dims>(NumWorkGroups);
2021 MNDRDesc.setNumWorkGroups(NumWorkGroups);
2023 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
2024 setType(detail::CG::Kernel);
2025 #endif // __SYCL_DEVICE_ONLY__
2048 throwIfActionIsCreated();
2050 setHandlerKernelBundle(Kernel);
2054 using LambdaArgType =
2055 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2057 (void)NumWorkGroups;
2059 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
2060 #ifndef __SYCL_DEVICE_ONLY__
2063 detail::checkValueRange<Dims>(ExecRange);
2064 MNDRDesc.set(std::move(ExecRange));
2066 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
2067 setType(detail::CG::Kernel);
2068 #endif // __SYCL_DEVICE_ONLY__
2072 typename PropertiesT>
2076 single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
2081 typename PropertiesT>
2086 parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
2091 typename PropertiesT>
2096 parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
2101 typename PropertiesT>
2106 parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
2111 typename PropertiesT,
int Dims>
2116 parallel_for_impl<KernelName>(Range, Properties, std::move(
KernelFunc));
2122 typename PropertiesT,
typename... RestT>
2124 (
sizeof...(RestT) > 1) &&
2128 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2129 std::forward<RestT>(Rest)...);
2136 parallel_for<KernelName>(
2138 std::forward<RestT>(Rest)...);
2142 typename PropertiesT,
typename... RestT>
2144 (
sizeof...(RestT) > 1) &&
2148 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2149 std::forward<RestT>(Rest)...);
2156 parallel_for<KernelName>(
2158 std::forward<RestT>(Rest)...);
2164 int Dims,
typename PropertiesT>
2167 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2168 PropertiesT>(NumWorkGroups, Props,
2173 int Dims,
typename PropertiesT>
2177 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2183 #undef _KERNELFUNCPARAM
2198 std::shared_ptr<T_Dst> Dst) {
2199 if (Src.is_placeholder())
2200 checkIfPlaceholderIsBoundToHandler(Src);
2202 throwIfActionIsCreated();
2203 static_assert(isValidTargetForExplicitOp(AccessTarget),
2204 "Invalid accessor target for the copy method.");
2205 static_assert(isValidModeForSourceAccessor(
AccessMode),
2206 "Invalid accessor mode for the copy method.");
2209 CGData.MSharedPtrStorage.push_back(Dst);
2210 typename std::shared_ptr<T_Dst>::element_type *RawDstPtr = Dst.get();
2211 copy(Src, RawDstPtr);
2227 if (Dst.is_placeholder())
2228 checkIfPlaceholderIsBoundToHandler(Dst);
2230 throwIfActionIsCreated();
2231 static_assert(isValidTargetForExplicitOp(AccessTarget),
2232 "Invalid accessor target for the copy method.");
2233 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2234 "Invalid accessor mode for the copy method.");
2237 CGData.MSharedPtrStorage.push_back(Src);
2238 typename std::shared_ptr<T_Src>::element_type *RawSrcPtr = Src.get();
2239 copy(RawSrcPtr, Dst);
2254 if (Src.is_placeholder())
2255 checkIfPlaceholderIsBoundToHandler(Src);
2257 throwIfActionIsCreated();
2258 static_assert(isValidTargetForExplicitOp(AccessTarget),
2259 "Invalid accessor target for the copy method.");
2260 static_assert(isValidModeForSourceAccessor(
AccessMode),
2261 "Invalid accessor mode for the copy method.");
2262 #ifndef __SYCL_DEVICE_ONLY__
2266 copyAccToPtrHost(Src, Dst);
2270 setType(detail::CG::CopyAccToPtr);
2275 CGData.MRequirements.push_back(AccImpl.get());
2276 MSrcPtr =
static_cast<void *
>(AccImpl.get());
2277 MDstPtr =
static_cast<void *
>(Dst);
2280 CGData.MAccStorage.push_back(std::move(AccImpl));
2296 if (Dst.is_placeholder())
2297 checkIfPlaceholderIsBoundToHandler(Dst);
2299 throwIfActionIsCreated();
2300 static_assert(isValidTargetForExplicitOp(AccessTarget),
2301 "Invalid accessor target for the copy method.");
2302 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2303 "Invalid accessor mode for the copy method.");
2304 #ifndef __SYCL_DEVICE_ONLY__
2308 copyPtrToAccHost(Src, Dst);
2312 setType(detail::CG::CopyPtrToAcc);
2317 CGData.MRequirements.push_back(AccImpl.get());
2318 MSrcPtr =
const_cast<T_Src *
>(Src);
2319 MDstPtr =
static_cast<void *
>(AccImpl.get());
2322 CGData.MAccStorage.push_back(std::move(AccImpl));
2333 typename T_Src,
int Dims_Src,
access::mode AccessMode_Src,
2341 accessor<T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst,
2344 if (Src.is_placeholder())
2345 checkIfPlaceholderIsBoundToHandler(Src);
2346 if (Dst.is_placeholder())
2347 checkIfPlaceholderIsBoundToHandler(Dst);
2349 throwIfActionIsCreated();
2350 static_assert(isValidTargetForExplicitOp(AccessTarget_Src),
2351 "Invalid source accessor target for the copy method.");
2352 static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
2353 "Invalid destination accessor target for the copy method.");
2354 static_assert(isValidModeForSourceAccessor(AccessMode_Src),
2355 "Invalid source accessor mode for the copy method.");
2356 static_assert(isValidModeForDestinationAccessor(AccessMode_Dst),
2357 "Invalid destination accessor mode for the copy method.");
2358 if (Dst.get_size() < Src.get_size())
2359 throw sycl::invalid_object_error(
2360 "The destination accessor size is too small to copy the memory into.",
2361 PI_ERROR_INVALID_OPERATION);
2363 if (copyAccToAccHelper(Src, Dst))
2365 setType(detail::CG::CopyAccToAcc);
2373 CGData.MRequirements.push_back(AccImplSrc.get());
2374 CGData.MRequirements.push_back(AccImplDst.get());
2375 MSrcPtr = AccImplSrc.get();
2376 MDstPtr = AccImplDst.get();
2379 CGData.MAccStorage.push_back(std::move(AccImplSrc));
2380 CGData.MAccStorage.push_back(std::move(AccImplDst));
2392 if (Acc.is_placeholder())
2393 checkIfPlaceholderIsBoundToHandler(Acc);
2395 throwIfActionIsCreated();
2396 static_assert(isValidTargetForExplicitOp(AccessTarget),
2397 "Invalid accessor target for the update_host method.");
2398 setType(detail::CG::UpdateHost);
2403 MDstPtr =
static_cast<void *
>(AccImpl.get());
2404 CGData.MRequirements.push_back(AccImpl.get());
2405 CGData.MAccStorage.push_back(std::move(AccImpl));
2424 assert(!MIsHost &&
"fill() should no longer be callable on a host device.");
2426 if (Dst.is_placeholder())
2427 checkIfPlaceholderIsBoundToHandler(Dst);
2429 throwIfActionIsCreated();
2431 static_assert(isValidTargetForExplicitOp(AccessTarget),
2432 "Invalid accessor target for the fill method.");
2433 if constexpr (isBackendSupportedFillSize(
sizeof(T)) &&
2434 (Dims <= 1 || isImageOrImageArray(AccessTarget))) {
2435 setType(detail::CG::Fill);
2440 MDstPtr =
static_cast<void *
>(AccImpl.get());
2441 CGData.MRequirements.push_back(AccImpl.get());
2442 CGData.MAccStorage.push_back(std::move(AccImpl));
2444 MPattern.resize(
sizeof(T));
2445 auto PatternPtr =
reinterpret_cast<T *
>(MPattern.data());
2446 *PatternPtr = Pattern;
2447 }
else if constexpr (Dims == 0) {
2456 Range, [=](
id<Dims> Index) { Dst[Index] = Pattern; });
2466 template <
typename T>
void fill(
void *Ptr,
const T &Pattern,
size_t Count) {
2467 throwIfActionIsCreated();
2468 static_assert(std::is_trivially_copyable<T>::value,
2469 "Pattern must be trivially copyable");
2470 parallel_for<class __usmfill<T>>(
range<1>(Count), [=](
id<1> Index) {
2471 T *CastedPtr =
static_cast<T *
>(Ptr);
2472 CastedPtr[Index] = Pattern;
2480 throwIfActionIsCreated();
2481 setType(detail::CG::Barrier);
2496 void ext_oneapi_barrier(
const std::vector<event> &WaitList);
2517 void memcpy(
void *Dest, const
void *Src,
size_t Count);
2529 template <typename T>
void copy(const T *Src, T *Dest,
size_t Count) {
2530 this->
memcpy(Dest, Src, Count *
sizeof(T));
2541 void memset(
void *Dest,
int Value,
size_t Count);
2549 void prefetch(
const void *Ptr,
size_t Count);
2557 void mem_advise(
const void *Ptr,
size_t Length,
int Advice);
2576 template <
typename T =
unsigned char,
2577 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2579 size_t SrcPitch,
size_t Width,
size_t Height) {
2580 throwIfActionIsCreated();
2581 if (Width > DestPitch)
2583 "Destination pitch must be greater than or equal "
2584 "to the width specified in 'ext_oneapi_memcpy2d'");
2585 if (Width > SrcPitch)
2587 "Source pitch must be greater than or equal "
2588 "to the width specified in 'ext_oneapi_memcpy2d'");
2591 context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr());
2595 SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
2596 bool DestIsHost = DestAllocType == usm::alloc::unknown ||
2597 DestAllocType == usm::alloc::host;
2604 if (SrcIsHost && DestIsHost) {
2605 commonUSMCopy2DFallbackHostTask<T>(Src, SrcPitch, Dest, DestPitch, Width,
2607 }
else if (SrcIsHost || DestIsHost || supportsUSMMemcpy2D()) {
2608 ext_oneapi_memcpy2d_impl(Dest, DestPitch, Src, SrcPitch, Width, Height);
2610 commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2629 template <
typename T>
2631 size_t DestPitch,
size_t Width,
size_t Height) {
2632 if (Width > DestPitch)
2634 "Destination pitch must be greater than or equal "
2635 "to the width specified in 'ext_oneapi_copy2d'");
2636 if (Width > SrcPitch)
2638 "Source pitch must be greater than or equal "
2639 "to the width specified in 'ext_oneapi_copy2d'");
2642 context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr());
2646 SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
2647 bool DestIsHost = DestAllocType == usm::alloc::unknown ||
2648 DestAllocType == usm::alloc::host;
2655 if (SrcIsHost && DestIsHost) {
2656 commonUSMCopy2DFallbackHostTask<T>(Src, SrcPitch, Dest, DestPitch, Width,
2658 }
else if (SrcIsHost || DestIsHost || supportsUSMMemcpy2D()) {
2659 ext_oneapi_memcpy2d_impl(Dest, DestPitch *
sizeof(T), Src,
2660 SrcPitch *
sizeof(T), Width *
sizeof(T), Height);
2662 commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2682 template <
typename T =
unsigned char,
2683 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2685 size_t Width,
size_t Height) {
2686 throwIfActionIsCreated();
2687 if (Width > DestPitch)
2689 "Destination pitch must be greater than or equal "
2690 "to the width specified in 'ext_oneapi_memset2d'");
2691 T CharVal =
static_cast<T
>(Value);
2693 context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr());
2698 if (DestAllocType == usm::alloc::unknown ||
2699 DestAllocType == usm::alloc::host)
2700 commonUSMFill2DFallbackHostTask(Dest, DestPitch, CharVal, Width, Height);
2701 else if (supportsUSMMemset2D())
2702 ext_oneapi_memset2d_impl(Dest, DestPitch, Value, Width, Height);
2704 commonUSMFill2DFallbackKernel(Dest, DestPitch, CharVal, Width, Height);
2719 template <
typename T>
2721 size_t Width,
size_t Height) {
2722 throwIfActionIsCreated();
2723 static_assert(std::is_trivially_copyable<T>::value,
2724 "Pattern must be trivially copyable");
2725 if (Width > DestPitch)
2727 "Destination pitch must be greater than or equal "
2728 "to the width specified in 'ext_oneapi_fill2d'");
2730 context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr());
2735 if (DestAllocType == usm::alloc::unknown ||
2736 DestAllocType == usm::alloc::host)
2737 commonUSMFill2DFallbackHostTask(Dest, DestPitch, Pattern, Width, Height);
2738 else if (supportsUSMFill2D())
2739 ext_oneapi_fill2d_impl(Dest, DestPitch, &Pattern,
sizeof(T), Width,
2742 commonUSMFill2DFallbackKernel(Dest, DestPitch, Pattern, Width, Height);
2754 template <
typename T,
typename PropertyListT>
2756 const void *Src,
size_t NumBytes =
sizeof(T),
2757 size_t DestOffset = 0) {
2758 if (
sizeof(T) < DestOffset + NumBytes)
2760 "Copy to device_global is out of bounds.");
2762 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
2768 memcpyToHostOnlyDeviceGlobal(&Dest, Src,
sizeof(T), IsDeviceImageScoped,
2769 NumBytes, DestOffset);
2773 memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes, DestOffset);
2785 template <
typename T,
typename PropertyListT>
2789 size_t NumBytes =
sizeof(T),
size_t SrcOffset = 0) {
2790 if (
sizeof(T) < SrcOffset + NumBytes)
2792 "Copy from device_global is out of bounds.");
2794 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
2800 memcpyFromHostOnlyDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
2805 memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
2819 template <
typename T,
typename PropertyListT>
2820 void copy(
const std::remove_all_extents_t<T> *Src,
2822 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
2823 size_t StartIndex = 0) {
2824 this->
memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
2825 StartIndex *
sizeof(std::remove_all_extents_t<T>));
2838 template <
typename T,
typename PropertyListT>
2841 std::remove_all_extents_t<T> *Dest,
2842 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
2843 size_t StartIndex = 0) {
2844 this->
memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
2845 StartIndex *
sizeof(std::remove_all_extents_t<T>));
2849 std::shared_ptr<detail::handler_impl> MImpl;
2850 std::shared_ptr<detail::queue_impl> MQueue;
2856 std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
2857 std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
2859 std::vector<detail::ArgDesc> MArgs;
2863 std::vector<detail::ArgDesc> MAssociatedAccesors;
2866 std::string MKernelName;
2868 std::shared_ptr<detail::kernel_impl> MKernel;
2874 void *MSrcPtr =
nullptr;
2876 void *MDstPtr =
nullptr;
2880 std::vector<char> MPattern;
2882 std::unique_ptr<detail::HostKernelBase> MHostKernel;
2884 std::unique_ptr<detail::HostTask> MHostTask;
2886 std::unique_ptr<detail::InteropTask> MInteropTask;
2889 std::vector<detail::EventImplPtr> MEventsWaitWithBarrier;
2891 bool MIsHost =
false;
2894 bool MIsFinalized =
false;
2900 template <
typename DataT,
int Dims,
access::mode AccMode,
2902 typename PropertyListT>
2910 friend class stream;
2914 template <
typename T,
class BinaryOperation,
int Dims,
size_t Extent,
2915 bool ExplicitIdentity,
typename RedOutVar>
2919 template <
class FunctorTy>
2923 typename PropertiesT,
typename... RestT>
2925 PropertiesT Properties,
2929 typename PropertiesT,
typename... RestT>
2932 PropertiesT Properties, RestT... Rest);
2934 #ifndef __SYCL_DEVICE_ONLY__
2940 friend class ::MockHandler;
2945 template <
class _name,
class _dataT, int32_t _min_capacity,
2946 class _propertiesT,
class>
2955 void ext_intel_read_host_pipe(
const std::string &Name,
void *Ptr,
size_t Size,
2956 bool Block =
false);
2964 void ext_intel_write_host_pipe(
const std::string &Name,
void *Ptr,
2965 size_t Size,
bool Block =
false);
2967 bool DisableRangeRounding();
2969 bool RangeRoundingTrace();
2971 void GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
2974 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
2975 typename KernelType,
2977 KernelType, TransformedArgType>::value> * =
nullptr>
2978 auto getRangeRoundedKernelLambda(KernelType
KernelFunc,
2981 KernelType>(NumWorkItems,
2985 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
2986 typename KernelType,
2988 KernelType, TransformedArgType>::value> * =
nullptr>
2989 auto getRangeRoundedKernelLambda(KernelType
KernelFunc,
2995 const std::shared_ptr<detail::context_impl> &getContextImplPtr()
const;
2998 bool supportsUSMMemcpy2D();
2999 bool supportsUSMFill2D();
3000 bool supportsUSMMemset2D();
3003 id<2> computeFallbackKernelBounds(
size_t Width,
size_t Height);
3007 template <
typename T>
3008 void commonUSMCopy2DFallbackKernel(
const void *Src,
size_t SrcPitch,
3009 void *Dest,
size_t DestPitch,
size_t Width,
3014 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
3015 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
3016 parallel_for<class __usmmemcpy2d<T>>(
3017 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
3018 T *CastedDest =
static_cast<T *
>(Dest);
3019 const T *CastedSrc =
static_cast<const T *
>(Src);
3020 for (uint32_t I = 0; I < Iterations[0]; ++I) {
3021 for (uint32_t J = 0; J < Iterations[1]; ++J) {
3022 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
3023 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
3024 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
3025 CastedSrc[adjustedIndex[0] * SrcPitch + adjustedIndex[1]];
3034 template <
typename T>
3035 void commonUSMCopy2DFallbackHostTask(
const void *Src,
size_t SrcPitch,
3036 void *Dest,
size_t DestPitch,
3037 size_t Width,
size_t Height) {
3041 const T *CastedSrc =
static_cast<const T *
>(Src);
3042 T *CastedDest =
static_cast<T *
>(Dest);
3043 for (
size_t I = 0; I < Height; ++I) {
3044 const T *SrcItBegin = CastedSrc + SrcPitch * I;
3045 T *DestItBegin = CastedDest + DestPitch * I;
3046 std::copy(SrcItBegin, SrcItBegin + Width, DestItBegin);
3053 template <
typename T>
3054 void commonUSMFill2DFallbackKernel(
void *Dest,
size_t DestPitch,
3055 const T &Pattern,
size_t Width,
3060 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
3061 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
3062 parallel_for<class __usmfill2d<T>>(
3063 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
3064 T *CastedDest =
static_cast<T *
>(Dest);
3065 for (uint32_t I = 0; I < Iterations[0]; ++I) {
3066 for (uint32_t J = 0; J < Iterations[1]; ++J) {
3067 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
3068 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
3069 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
3079 template <
typename T>
3080 void commonUSMFill2DFallbackHostTask(
void *Dest,
size_t DestPitch,
3081 const T &Pattern,
size_t Width,
3086 T *CastedDest =
static_cast<T *
>(Dest);
3087 for (
size_t I = 0; I < Height; ++I) {
3088 T *ItBegin = CastedDest + DestPitch * I;
3089 std::fill(ItBegin, ItBegin + Width, Pattern);
3095 void ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
const void *Src,
3096 size_t SrcPitch,
size_t Width,
size_t Height);
3099 void ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
const void *Value,
3100 size_t ValueSize,
size_t Width,
size_t Height);
3103 void ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
3104 size_t Width,
size_t Height);
3107 void memcpyToDeviceGlobal(
const void *DeviceGlobalPtr,
const void *Src,
3108 bool IsDeviceImageScoped,
size_t NumBytes,
3112 void memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
3113 bool IsDeviceImageScoped,
size_t NumBytes,
3117 void memcpyToHostOnlyDeviceGlobal(
const void *DeviceGlobalPtr,
3118 const void *Src,
size_t DeviceGlobalTSize,
3119 bool IsDeviceImageScoped,
size_t NumBytes,
3123 void memcpyFromHostOnlyDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
3124 bool IsDeviceImageScoped,
size_t NumBytes,
3130 typename PropertyListT = property_list>
3131 void checkIfPlaceholderIsBoundToHandler(
3132 accessor<T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>
3134 auto *AccBase =
reinterpret_cast<detail::AccessorBaseHost *
>(&Acc);
3136 detail::AccessorImplHost *Req = AccImpl.get();
3137 if (std::find_if(MAssociatedAccesors.begin(), MAssociatedAccesors.end(),
3138 [&](
const detail::ArgDesc &AD) {
3140 detail::kernel_param_kind_t::kind_accessor &&
3142 AD.MSize == static_cast<int>(AccessTarget);
3143 }) == MAssociatedAccesors.end())
3145 "placeholder accessor must be bound by calling "
3146 "handler::require() before it can be used.");