37 #include <type_traits>
40 #if !SYCL_LANGUAGE_VERSION || SYCL_LANGUAGE_VERSION < 202001
41 #define __SYCL_NONCONST_FUNCTOR__
51 template <
typename T_Src,
typename T_Dst,
int Dims,
57 template <
typename T_Src,
typename T_Dst,
int Dims,
80 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
91 template <
typename RetType,
typename Func,
typename Arg>
96 template <
typename RetType,
typename Func,
typename Arg>
105 template <
typename F,
typename SuggestedArgType>
108 template <typename F, typename SuggestedArgType>
111 template <typename F, typename SuggestedArgType>
123 #if __SYCL_ID_QUERIES_FIT_IN_INT__
124 template <
typename T>
struct NotIntMsg;
126 template <
int Dims>
struct NotIntMsg<
range<Dims>> {
127 constexpr
static const char *Msg =
128 "Provided range is out of integer limits. Pass "
129 "`-fno-sycl-id-queries-fit-in-int' to disable range check.";
132 template <
int Dims>
struct NotIntMsg<
id<Dims>> {
133 constexpr
static const char *Msg =
134 "Provided offset is out of integer limits. Pass "
135 "`-fno-sycl-id-queries-fit-in-int' to disable offset check.";
139 #if __SYCL_ID_QUERIES_FIT_IN_INT__
140 template <
typename T,
typename ValT>
141 typename detail::enable_if_t<std::is_same<ValT, size_t>::value ||
142 std::is_same<ValT, unsigned long long>::value>
143 checkValueRangeImpl(ValT V) {
144 static constexpr
size_t Limit =
147 throw runtime_error(NotIntMsg<T>::Msg, PI_ERROR_INVALID_VALUE);
151 template <
int Dims,
typename T>
152 typename detail::enable_if_t<std::is_same<T, range<Dims>>::value ||
153 std::is_same<T, id<Dims>>::value>
155 #if __SYCL_ID_QUERIES_FIT_IN_INT__
156 for (
size_t Dim = 0; Dim < Dims; ++Dim)
157 checkValueRangeImpl<T>(V[Dim]);
160 unsigned long long Product = 1;
161 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
164 checkValueRangeImpl<T>(Product);
174 #if __SYCL_ID_QUERIES_FIT_IN_INT__
175 checkValueRange<Dims>(R);
176 checkValueRange<Dims>(O);
178 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
179 unsigned long long Sum = R[Dim] + O[Dim];
181 checkValueRangeImpl<range<Dims>>(Sum);
189 template <
int Dims,
typename T>
192 #if __SYCL_ID_QUERIES_FIT_IN_INT__
193 checkValueRange<Dims>(V.get_global_range());
194 checkValueRange<Dims>(V.get_local_range());
195 checkValueRange<Dims>(V.get_offset());
197 checkValueRange<Dims>(V.get_global_range(), V.get_offset());
203 template <
typename TransformedArgType,
int Dims,
typename KernelType>
210 if (Arg[0] >= NumWorkItems[0])
212 Arg.set_allowed_range(NumWorkItems);
221 template <
typename TransformedArgType,
int Dims,
typename KernelType>
228 if (Arg[0] >= NumWorkItems[0])
230 Arg.set_allowed_range(NumWorkItems);
244 template <
typename T,
class BinaryOperation,
int Dims,
size_t Extent,
251 template <
typename KernelName,
typename KernelType,
int Dims,
class Reduction>
253 size_t MaxWGSize, uint32_t NumConcurrentWorkGroups,
256 template <
typename KernelName,
typename KernelType,
int Dims,
class Reduction>
261 template <
typename KernelName,
typename KernelType,
int Dims,
class Reduction>
266 template <
typename KernelName,
typename KernelType,
int Dims,
class Reduction>
271 template <
typename KernelName,
typename KernelType,
class Reduction>
276 template <
typename KernelName,
typename KernelType,
int Dims,
277 typename... Reductions,
size_t... Is>
280 std::tuple<Reductions...> &ReduTuple,
281 std::index_sequence<Is...>);
283 template <
typename KernelName,
typename KernelType,
typename... Reductions,
286 std::tuple<Reductions...> &ReduTuple,
287 std::index_sequence<Is...>);
289 template <
typename KernelName,
class Reduction>
290 std::enable_if_t<!Reduction::is_usm>
293 template <
typename KernelName,
class Reduction>
294 std::enable_if_t<Reduction::is_usm>
297 template <
typename... Reduction,
size_t... Is>
298 std::shared_ptr<event>
300 bool IsHost, std::tuple<Reduction...> &ReduTuple,
301 std::index_sequence<Is...>);
303 template <
typename Reduction,
typename... RestT>
304 std::enable_if_t<!Reduction::is_usm>
306 std::shared_ptr<detail::queue_impl> Queue,
307 bool IsHost, Reduction &Redu, RestT... Rest);
309 __SYCL_EXPORT uint32_t
313 size_t LocalMemBytesPerWorkItem);
315 template <
typename... ReductionT,
size_t... Is>
317 std::index_sequence<Is...>);
319 template <
typename TupleT, std::size_t... Is>
320 std::tuple<std::tuple_element_t<Is, TupleT>...>
368 handler(std::shared_ptr<detail::queue_impl> Queue,
bool IsHost);
379 handler(std::shared_ptr<detail::queue_impl> Queue,
380 std::shared_ptr<detail::queue_impl> PrimaryQueue,
381 std::shared_ptr<detail::queue_impl> SecondaryQueue,
bool IsHost);
386 F *storePlainArg(T &&Arg) {
387 MArgsStorage.emplace_back(
sizeof(T));
388 auto Storage =
reinterpret_cast<F *
>(MArgsStorage.back().data());
403 void throwIfActionIsCreated() {
404 if (detail::CG::None != getType())
405 throw sycl::runtime_error(
"Attempt to set multiple actions for the "
406 "command group. Command group must consist of "
407 "a single kernel or explicit memory operation.",
408 PI_ERROR_INVALID_OPERATION);
415 extractArgsAndReqsFromLambda(
char *LambdaPtr,
size_t KernelArgsNum,
421 extractArgsAndReqsFromLambda(
char *LambdaPtr,
size_t KernelArgsNum,
426 void extractArgsAndReqs();
430 const int Size,
const size_t Index,
size_t &IndexShift,
431 bool IsKernelCreatedFromSource);
434 const int Size,
const size_t Index,
size_t &IndexShift,
435 bool IsKernelCreatedFromSource,
bool IsESIMD);
438 std::string getKernelName();
440 template <
typename LambdaNameT>
bool lambdaAndKernelHaveEqualName() {
446 assert(MKernel &&
"MKernel is not initialized");
448 const std::string KernelName = getKernelName();
449 return LambdaName == KernelName;
469 void addStream(
const std::shared_ptr<detail::stream_impl> &Stream) {
470 MStreamStorage.push_back(Stream);
478 void addReduction(
const std::shared_ptr<const void> &ReduObj);
482 bool is_host() {
return MIsHost; }
489 template <
typename T,
typename... Ts>
490 void setArgsHelper(
int ArgIndex, T &&Arg, Ts &&... Args) {
491 set_arg(ArgIndex, std::move(Arg));
492 setArgsHelper(++ArgIndex, std::move(Args)...);
495 void setArgsHelper(
int) {}
498 template <
typename DataT,
int Dims,
access::mode AccessMode,
500 void setArgHelper(
int ArgIndex,
501 accessor<DataT, Dims, AccessMode, access::target::local,
502 IsPlaceholder> &&Arg) {
508 MLocalAccStorage.push_back(std::move(LocalAccImpl));
509 MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
510 static_cast<int>(access::target::local), ArgIndex);
514 template <
typename DataT,
int Dims,
access::mode AccessMode,
524 MRequirements.push_back(Req);
526 MAccStorage.push_back(std::move(AccImpl));
528 MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
529 static_cast<int>(AccessTarget), ArgIndex);
532 template <
typename T>
void setArgHelper(
int ArgIndex, T &&Arg) {
533 auto StoredArg =
static_cast<void *
>(storePlainArg(Arg));
535 if (!std::is_same<cl_mem, T>::value && std::is_pointer<T>::value) {
536 MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg,
537 sizeof(T), ArgIndex);
539 MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout,
540 StoredArg,
sizeof(T), ArgIndex);
544 void setArgHelper(
int ArgIndex,
sampler &&Arg) {
545 auto StoredArg =
static_cast<void *
>(storePlainArg(Arg));
546 MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg,
550 void verifyKernelInvoc(
const kernel &Kernel) {
552 throw invalid_object_error(
553 "This kernel invocation method cannot be used on the host",
554 PI_ERROR_INVALID_DEVICE);
557 throw invalid_object_error(
"Invalid kernel type, OpenCL expected",
558 PI_ERROR_INVALID_KERNEL);
576 template <
class KernelType,
class NormalizedKernelType,
int Dims>
577 KernelType *ResetHostKernelHelper(
const KernelType &
KernelFunc) {
578 NormalizedKernelType NormalizedKernel(
KernelFunc);
579 auto NormalizedKernelFunc =
584 MHostKernel.reset(HostKernelPtr);
585 return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
590 template <
class KernelType,
typename ArgT,
int Dims>
591 typename std::enable_if<std::is_same<ArgT, sycl::id<Dims>>::value,
593 ResetHostKernel(
const KernelType &
KernelFunc) {
594 struct NormalizedKernelType {
595 KernelType MKernelFunc;
596 NormalizedKernelType(
const KernelType &
KernelFunc)
602 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
607 template <
class KernelType,
typename ArgT,
int Dims>
608 typename std::enable_if<std::is_same<ArgT, sycl::nd_item<Dims>>::value,
610 ResetHostKernel(
const KernelType &
KernelFunc) {
611 struct NormalizedKernelType {
612 KernelType MKernelFunc;
613 NormalizedKernelType(
const KernelType &
KernelFunc)
619 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
624 template <
class KernelType,
typename ArgT,
int Dims>
625 typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, false>>::value,
627 ResetHostKernel(
const KernelType &
KernelFunc) {
628 struct NormalizedKernelType {
629 KernelType MKernelFunc;
630 NormalizedKernelType(
const KernelType &
KernelFunc)
638 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
643 template <
class KernelType,
typename ArgT,
int Dims>
644 typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, true>>::value,
646 ResetHostKernel(
const KernelType &
KernelFunc) {
647 struct NormalizedKernelType {
648 KernelType MKernelFunc;
649 NormalizedKernelType(
const KernelType &
KernelFunc)
657 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
662 template <
class KernelType,
typename ArgT,
int Dims>
663 typename std::enable_if_t<std::is_same<ArgT, void>::value, KernelType *>
664 ResetHostKernel(
const KernelType &
KernelFunc) {
665 struct NormalizedKernelType {
666 KernelType MKernelFunc;
667 NormalizedKernelType(
const KernelType &
KernelFunc)
674 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
682 template <
class KernelType,
typename ArgT,
int Dims>
683 typename std::enable_if<std::is_same<ArgT, sycl::group<Dims>>::value,
685 ResetHostKernel(
const KernelType &
KernelFunc) {
688 return (KernelType *)(MHostKernel->getPtr());
698 void verifyUsedKernelBundle(
const std::string &KernelName);
706 template <
typename KernelName,
typename KernelType,
int Dims,
707 typename LambdaArgType>
711 constexpr
bool IsCallableWithKernelHandler =
713 LambdaArgType>::value;
715 if (IsCallableWithKernelHandler && MIsHost) {
716 throw cl::sycl::feature_not_supported(
717 "kernel_handler is not yet supported by host device.",
718 PI_ERROR_INVALID_OPERATION);
720 KernelType *KernelPtr =
721 ResetHostKernel<KernelType, LambdaArgType, Dims>(
KernelFunc);
726 if (KI::getName() !=
nullptr && KI::getName()[0] !=
'\0') {
729 extractArgsAndReqsFromLambda(
reinterpret_cast<char *
>(KernelPtr),
730 KI::getNumParams(), &KI::getParamDesc(0),
732 MKernelName = KI::getName();
733 MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName());
738 MArgs = std::move(MAssociatedAccesors);
743 if (IsCallableWithKernelHandler) {
744 getOrInsertHandlerKernelBundle(
true);
752 template <
int Dims_Src,
int Dims_Dst>
755 if (Dims_Src > Dims_Dst)
757 for (
size_t I = 0; I < Dims_Src; ++I)
767 static id<1> getDelinearizedIndex(
const range<1> Range,
const size_t Index) {
770 static id<2> getDelinearizedIndex(
const range<2> Range,
const size_t Index) {
773 static id<3> getDelinearizedIndex(
const range<3> Range,
const size_t Index) {
782 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
794 parallel_for<
class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc,
795 TDst, DimDst, ModeDst, TargetDst,
797 (LinearizedRange, [=](
id<1> Id) {
798 size_t Index = Id[0];
801 Dst[DstId] = Src[SrcId];
813 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
823 single_task<
class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc,
824 TDst, DimDst, ModeDst, TargetDst,
825 IsPHSrc, IsPHDst>> ([=]() {
831 #ifndef __SYCL_DEVICE_ONLY__
832 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
843 parallel_for<class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>
847 (
reinterpret_cast<TSrcNonConst *
>(Dst))[LinearIndex] = Src[Index];
856 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
861 single_task<class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>
864 *(
reinterpret_cast<TSrcNonConst *
>(Dst)) = *(Src.
get_pointer());
872 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
875 copyPtrToAccHost(TSrc *Src,
878 parallel_for<class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>
881 Dst[Index] = (
reinterpret_cast<const TDst *
>(Src))[LinearIndex];
890 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
893 copyPtrToAccHost(TSrc *Src,
895 single_task<class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>
897 *(Dst.
get_pointer()) = *(
reinterpret_cast<const TDst *
>(Src));
900 #endif // __SYCL_DEVICE_ONLY__
902 constexpr
static bool isConstOrGlobal(
access::target AccessTarget) {
904 AccessTarget == access::target::constant_buffer;
907 constexpr
static bool isImageOrImageArray(
access::target AccessTarget) {
908 return AccessTarget == access::target::image ||
909 AccessTarget == access::target::image_array;
912 constexpr
static bool
914 return isConstOrGlobal(AccessTarget) || isImageOrImageArray(AccessTarget);
917 constexpr
static bool isValidModeForSourceAccessor(
access::mode AccessMode) {
918 return AccessMode == access::mode::read ||
919 AccessMode == access::mode::read_write;
922 constexpr
static bool
923 isValidModeForDestinationAccessor(
access::mode AccessMode) {
925 AccessMode == access::mode::read_write ||
926 AccessMode == access::mode::discard_write ||
927 AccessMode == access::mode::discard_read_write;
930 template <
int Dims,
typename LambdaArgType>
struct TransformUserItemType {
931 using type =
typename std::conditional<
932 std::is_convertible<nd_item<Dims>, LambdaArgType>::value,
nd_item<Dims>,
933 typename std::conditional<
934 std::is_convertible<item<Dims>, LambdaArgType>::value,
item<Dims>,
935 LambdaArgType>::type>::type;
949 template <
typename KernelName,
typename KernelType,
int Dims>
950 void parallel_for_lambda_impl(
range<Dims> NumWorkItems,
952 throwIfActionIsCreated();
958 using TransformedArgType =
typename std::conditional<
959 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
960 typename TransformUserItemType<Dims, LambdaArgType>::type>::type;
970 #if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
971 !defined(DPCPP_HOST_DEVICE_OPENMP) && \
972 !defined(DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
974 size_t MinFactorX = 16;
976 size_t GoodFactorX = 32;
978 size_t MinRangeX = 1024;
982 this->GetRangeRoundingSettings(MinFactorX, GoodFactorX, MinRangeX);
1000 std::string KName =
typeid(NameT *).name();
1002 bool DisableRounding =
1003 this->DisableRangeRounding() ||
1004 (KI::getName() ==
nullptr || KI::getName()[0] ==
'\0');
1009 if (!DisableRounding && (NumWorkItems[0] >= MinRangeX) &&
1010 (NumWorkItems[0] % MinFactorX != 0)) {
1016 ((NumWorkItems[0] + GoodFactorX - 1) / GoodFactorX) * GoodFactorX;
1017 if (this->RangeRoundingTrace())
1018 std::cout <<
"parallel_for range adjusted from " << NumWorkItems[0]
1019 <<
" to " << NewValX << std::endl;
1023 getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
1026 using KName = std::conditional_t<std::is_same<KernelType, NameT>::value,
1027 decltype(Wrapper), NameWT>;
1030 AdjustedRange.set_range_dim0(NewValX);
1031 kernel_parallel_for_wrapper<KName, TransformedArgType>(Wrapper);
1032 #ifndef __SYCL_DEVICE_ONLY__
1033 detail::checkValueRange<Dims>(AdjustedRange);
1034 MNDRDesc.set(std::move(AdjustedRange));
1035 StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
1036 std::move(Wrapper));
1037 setType(detail::CG::Kernel);
1040 #endif // !__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ &&
1045 kernel_parallel_for_wrapper<NameT, TransformedArgType>(
KernelFunc);
1046 #ifndef __SYCL_DEVICE_ONLY__
1047 detail::checkValueRange<Dims>(NumWorkItems);
1048 MNDRDesc.set(std::move(NumWorkItems));
1049 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1051 setType(detail::CG::Kernel);
1065 throwIfActionIsCreated();
1066 verifyKernelInvoc(Kernel);
1068 detail::checkValueRange<Dims>(NumWorkItems);
1069 MNDRDesc.set(std::move(NumWorkItems));
1070 setType(detail::CG::Kernel);
1071 extractArgsAndReqs();
1072 MKernelName = getKernelName();
1075 #ifdef SYCL_LANGUAGE_VERSION
1076 #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]]
1078 #define __SYCL_KERNEL_ATTR__
1082 template <
typename KernelName,
typename KernelType>
1084 #ifdef __SYCL_NONCONST_FUNCTOR__
1087 kernel_single_task(
const KernelType &
KernelFunc) {
1089 #ifdef __SYCL_DEVICE_ONLY__
1098 template <
typename KernelName,
typename KernelType>
1100 #ifdef __SYCL_NONCONST_FUNCTOR__
1101 kernel_single_task(KernelType
KernelFunc, kernel_handler KH) {
1103 kernel_single_task(
const KernelType &
KernelFunc, kernel_handler KH) {
1105 #ifdef __SYCL_DEVICE_ONLY__
1115 template <
typename KernelName,
typename ElementType,
typename KernelType>
1117 #ifdef __SYCL_NONCONST_FUNCTOR__
1120 kernel_parallel_for(
const KernelType &
KernelFunc) {
1122 #ifdef __SYCL_DEVICE_ONLY__
1123 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1131 template <
typename KernelName,
typename ElementType,
typename KernelType>
1133 #ifdef __SYCL_NONCONST_FUNCTOR__
1134 kernel_parallel_for(KernelType
KernelFunc, kernel_handler KH) {
1136 kernel_parallel_for(
const KernelType &
KernelFunc, kernel_handler KH) {
1138 #ifdef __SYCL_DEVICE_ONLY__
1139 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1148 template <
typename KernelName,
typename ElementType,
typename KernelType>
1150 #ifdef __SYCL_NONCONST_FUNCTOR__
1151 kernel_parallel_for_work_group(KernelType
KernelFunc) {
1153 kernel_parallel_for_work_group(
const KernelType &
KernelFunc) {
1155 #ifdef __SYCL_DEVICE_ONLY__
1156 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1164 template <
typename KernelName,
typename ElementType,
typename KernelType>
1166 #ifdef __SYCL_NONCONST_FUNCTOR__
1167 kernel_parallel_for_work_group(KernelType
KernelFunc, kernel_handler KH) {
1169 kernel_parallel_for_work_group(
const KernelType &
KernelFunc,
1170 kernel_handler KH) {
1172 #ifdef __SYCL_DEVICE_ONLY__
1173 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1188 template <
typename KernelName,
typename KernelType>
1189 std::enable_if_t<detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>
1190 #ifdef __SYCL_NONCONST_FUNCTOR__
1191 kernel_single_task_wrapper(KernelType
KernelFunc) {
1193 kernel_single_task_wrapper(
const KernelType &
KernelFunc) {
1195 #ifdef __SYCL_DEVICE_ONLY__
1196 detail::CheckDeviceCopyable<KernelType>();
1197 #endif // __SYCL_DEVICE_ONLY__
1199 kernel_single_task<KernelName>(
KernelFunc, KH);
1202 template <
typename KernelName,
typename KernelType>
1203 std::enable_if_t<!detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>
1204 #ifdef __SYCL_NONCONST_FUNCTOR__
1205 kernel_single_task_wrapper(KernelType
KernelFunc) {
1207 kernel_single_task_wrapper(
const KernelType &
KernelFunc) {
1209 #ifdef __SYCL_DEVICE_ONLY__
1210 detail::CheckDeviceCopyable<KernelType>();
1211 #endif // __SYCL_DEVICE_ONLY__
1217 template <
typename KernelName,
typename ElementType,
typename KernelType>
1219 detail::KernelLambdaHasKernelHandlerArgT<KernelType, ElementType>::value>
1220 #ifdef __SYCL_NONCONST_FUNCTOR__
1221 kernel_parallel_for_wrapper(KernelType
KernelFunc) {
1223 kernel_parallel_for_wrapper(
const KernelType &
KernelFunc) {
1225 #ifdef __SYCL_DEVICE_ONLY__
1226 detail::CheckDeviceCopyable<KernelType>();
1227 #endif // __SYCL_DEVICE_ONLY__
1229 kernel_parallel_for<KernelName, ElementType>(
KernelFunc, KH);
1232 template <
typename KernelName,
typename ElementType,
typename KernelType>
1234 !detail::KernelLambdaHasKernelHandlerArgT<KernelType, ElementType>::value>
1235 #ifdef __SYCL_NONCONST_FUNCTOR__
1236 kernel_parallel_for_wrapper(KernelType
KernelFunc) {
1238 kernel_parallel_for_wrapper(
const KernelType &
KernelFunc) {
1240 #ifdef __SYCL_DEVICE_ONLY__
1241 detail::CheckDeviceCopyable<KernelType>();
1242 #endif // __SYCL_DEVICE_ONLY__
1243 kernel_parallel_for<KernelName, ElementType>(
KernelFunc);
1248 template <
typename KernelName,
typename ElementType,
typename KernelType>
1250 detail::KernelLambdaHasKernelHandlerArgT<KernelType, ElementType>::value>
1251 #ifdef __SYCL_NONCONST_FUNCTOR__
1252 kernel_parallel_for_work_group_wrapper(KernelType
KernelFunc) {
1254 kernel_parallel_for_work_group_wrapper(
const KernelType &
KernelFunc) {
1256 #ifdef __SYCL_DEVICE_ONLY__
1257 detail::CheckDeviceCopyable<KernelType>();
1258 #endif // __SYCL_DEVICE_ONLY__
1260 kernel_parallel_for_work_group<KernelName, ElementType>(
KernelFunc, KH);
1263 template <
typename KernelName,
typename ElementType,
typename KernelType>
1265 !detail::KernelLambdaHasKernelHandlerArgT<KernelType, ElementType>::value>
1266 #ifdef __SYCL_NONCONST_FUNCTOR__
1267 kernel_parallel_for_work_group_wrapper(KernelType
KernelFunc) {
1269 kernel_parallel_for_work_group_wrapper(
const KernelType &
KernelFunc) {
1271 #ifdef __SYCL_DEVICE_ONLY__
1272 detail::CheckDeviceCopyable<KernelType>();
1273 #endif // __SYCL_DEVICE_ONLY__
1274 kernel_parallel_for_work_group<KernelName, ElementType>(
KernelFunc);
1277 std::shared_ptr<detail::handler_impl> getHandlerImpl()
const;
1278 std::shared_ptr<detail::handler_impl> evictHandlerImpl()
const;
1280 void setStateExplicitKernelBundle();
1281 void setStateSpecConstSet();
1282 bool isStateExplicitKernelBundle()
const;
1284 std::shared_ptr<detail::kernel_bundle_impl>
1285 getOrInsertHandlerKernelBundle(
bool Insert)
const;
1287 void setHandlerKernelBundle(
1288 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr);
1290 template <
typename FuncT>
1292 detail::check_fn_signature<detail::remove_reference_t<FuncT>,
1294 detail::check_fn_signature<detail::remove_reference_t<FuncT>,
1295 void(interop_handle)>::value>
1296 host_task_impl(FuncT &&Func) {
1297 throwIfActionIsCreated();
1299 MNDRDesc.set(range<1>(1));
1300 MArgs = std::move(MAssociatedAccesors);
1302 MHostTask.reset(
new detail::HostTask(std::move(Func)));
1304 setType(detail::CG::CodeplayHostTask);
1308 handler(
const handler &) =
delete;
1309 handler(handler &&) =
delete;
1310 handler &operator=(
const handler &) =
delete;
1311 handler &operator=(handler &&) =
delete;
1313 #if __cplusplus >= 201703L
1314 template <auto &SpecName>
1315 void set_specialization_constant(
1318 setStateSpecConstSet();
1321 getOrInsertHandlerKernelBundle(
true);
1323 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1325 .set_specialization_constant<SpecName>(Value);
1328 template <auto &SpecName>
1330 get_specialization_constant()
const {
1332 if (isStateExplicitKernelBundle())
1334 "Specialization constants cannot be read after "
1335 "explicitly setting the used kernel bundle");
1338 getOrInsertHandlerKernelBundle(
true);
1340 return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1342 .get_specialization_constant<SpecName>();
1348 use_kernel_bundle(
const kernel_bundle<bundle_state::executable> &ExecBundle);
1357 template <
typename DataT,
int Dims,
access::mode AccMode,
1362 #ifndef __SYCL_DEVICE_ONLY__
1372 void depends_on(
event Event);
1377 void depends_on(
const std::vector<event> &Events);
1379 template <
typename T>
1380 using remove_cv_ref_t =
1383 template <
typename U,
typename T>
1387 static constexpr
bool value =
1388 std::is_trivially_copyable<detail::remove_reference_t<T>>::value
1389 #if SYCL_LANGUAGE_VERSION && SYCL_LANGUAGE_VERSION <= 201707
1390 && std::is_standard_layout<detail::remove_reference_t<T>>::value
1394 std::is_pointer<remove_cv_ref_t<T>>::value)
1404 template <
typename T>
1407 setArgHelper(ArgIndex, std::move(Arg));
1410 template <
typename DataT,
int Dims,
access::mode AccessMode,
1415 setArgHelper(ArgIndex, std::move(Arg));
1423 template <
typename... Ts>
void set_args(Ts &&... Args) {
1424 setArgsHelper(0, std::move(Args)...);
1434 template <
typename KernelName = detail::auto_name,
typename KernelType>
1435 #ifdef __SYCL_NONCONST_FUNCTOR__
1438 void single_task(
const KernelType &
KernelFunc) {
1440 throwIfActionIsCreated();
1444 kernel_single_task_wrapper<NameT>(
KernelFunc);
1445 #ifndef __SYCL_DEVICE_ONLY__
1450 StoreLambda<NameT, KernelType, 1,
void>(
KernelFunc);
1451 setType(detail::CG::Kernel);
1455 template <
typename KernelName = detail::auto_name,
typename KernelType>
1456 #ifdef __SYCL_NONCONST_FUNCTOR__
1461 parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(
KernelFunc));
1464 template <
typename KernelName = detail::auto_name,
typename KernelType>
1465 #ifdef __SYCL_NONCONST_FUNCTOR__
1470 parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(
KernelFunc));
1473 template <
typename KernelName = detail::auto_name,
typename KernelType>
1474 #ifdef __SYCL_NONCONST_FUNCTOR__
1479 parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(
KernelFunc));
1486 template <
typename FuncT>
1488 "run_on_host_intel() is deprecated, use host_task() instead")
1489 void run_on_host_intel(FuncT Func) {
1490 throwIfActionIsCreated();
1495 MArgs = std::move(MAssociatedAccesors);
1497 setType(detail::CG::RunOnHostIntel);
1501 template <
typename FuncT>
1508 host_task_impl(Func);
1513 #ifdef __SYCL_NONCONST_FUNCTOR__
1514 #define _KERNELFUNCPARAM(a) KernelType a
1516 #define _KERNELFUNCPARAM(a) const KernelType &a
1532 template <
typename KernelName = detail::auto_name,
typename KernelType,
1535 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
1537 throwIfActionIsCreated();
1543 (void)WorkItemOffset;
1544 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1545 #ifndef __SYCL_DEVICE_ONLY__
1546 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1547 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1548 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1549 setType(detail::CG::Kernel);
1569 throwIfActionIsCreated();
1573 using LambdaArgType =
1577 using TransformedArgType =
1578 typename TransformUserItemType<Dims, LambdaArgType>::type;
1579 (void)ExecutionRange;
1580 kernel_parallel_for_wrapper<NameT, TransformedArgType>(
KernelFunc);
1581 #ifndef __SYCL_DEVICE_ONLY__
1582 detail::checkValueRange<Dims>(ExecutionRange);
1583 MNDRDesc.set(std::move(ExecutionRange));
1584 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1586 setType(detail::CG::Kernel);
1599 int Dims,
typename Reduction>
1602 std::shared_ptr<detail::queue_impl> QueueCopy = MQueue;
1606 constexpr
bool IsTreeReduction =
1607 !Reduction::has_fast_reduce && !Reduction::has_fast_atomics;
1608 size_t OneElemSize =
1609 IsTreeReduction ?
sizeof(
typename Reduction::result_type) : 0;
1610 uint32_t NumConcurrentWorkGroups =
1611 #ifdef __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS
1612 __SYCL_REDUCTION_NUM_CONCURRENT_WORKGROUPS;
1621 ext::oneapi::detail::reduCGFunc<KernelName>(
1622 *
this,
KernelFunc, Range, MaxWGSize, NumConcurrentWorkGroups, Redu);
1623 if (Reduction::is_usm ||
1624 (Reduction::has_fast_atomics && Redu.initializeToIdentity()) ||
1625 (!Reduction::has_fast_atomics && Redu.hasUserDiscardWriteAccessor())) {
1627 handler CopyHandler(QueueCopy, MIsHost);
1628 CopyHandler.saveCodeLoc(MCodeLoc);
1629 ext::oneapi::detail::reduSaveFinalResultToUserMem<KernelName>(CopyHandler,
1631 MLastEvent = CopyHandler.finalize();
1649 int Dims,
typename Reduction>
1653 std::shared_ptr<detail::queue_impl> QueueCopy = MQueue;
1654 ext::oneapi::detail::reduCGFunc<KernelName>(*
this,
KernelFunc, Range, Redu);
1656 if (Reduction::is_usm || Redu.initializeToIdentity()) {
1658 handler CopyHandler(QueueCopy, MIsHost);
1659 CopyHandler.saveCodeLoc(MCodeLoc);
1660 ext::oneapi::detail::reduSaveFinalResultToUserMem<KernelName>(CopyHandler,
1662 MLastEvent = CopyHandler.finalize();
1683 int Dims,
typename Reduction>
1688 std::shared_ptr<detail::queue_impl> QueueCopy = MQueue;
1691 if (D.
has(aspect::atomic64)) {
1693 ext::oneapi::detail::reduCGFuncAtomic64<KernelName>(*
this,
KernelFunc,
1696 if (Reduction::is_usm || Redu.initializeToIdentity()) {
1698 handler CopyHandler(QueueCopy, MIsHost);
1699 CopyHandler.saveCodeLoc(MCodeLoc);
1700 ext::oneapi::detail::reduSaveFinalResultToUserMem<KernelName>(
1702 MLastEvent = CopyHandler.finalize();
1705 parallel_for_Impl<KernelName>(Range, Redu,
KernelFunc);
1724 int Dims,
typename Reduction>
1726 !Reduction::has_atomic_add_float64>
1730 parallel_for_Impl<KernelName>(Range, Redu,
KernelFunc);
1733 template <
typename KernelName,
typename KernelType,
int Dims,
1759 constexpr
bool HFR = Reduction::has_fast_reduce;
1760 size_t OneElemSize = HFR ? 0 :
sizeof(
typename Reduction::result_type);
1767 throw sycl::runtime_error(
"The implementation handling parallel_for with"
1768 " reduction requires work group size not bigger"
1770 std::to_string(MaxWGSize),
1771 PI_ERROR_INVALID_WORK_GROUP_SIZE);
1774 ext::oneapi::detail::reduCGFunc<KernelName>(*
this,
KernelFunc, Range, Redu);
1775 std::shared_ptr<detail::queue_impl> QueueCopy = MQueue;
1784 throw sycl::runtime_error(
"The implementation handling parallel_for with "
1785 "reduction requires the maximal work group "
1786 "size to be greater than 1 to converge. "
1787 "The maximal work group size depends on the "
1788 "device and the size of the objects passed to "
1790 PI_ERROR_INVALID_WORK_GROUP_SIZE);
1792 while (NWorkItems > 1) {
1793 handler AuxHandler(QueueCopy, MIsHost);
1794 AuxHandler.saveCodeLoc(MCodeLoc);
1796 NWorkItems = ext::oneapi::detail::reduAuxCGFunc<KernelName, KernelType>(
1797 AuxHandler, NWorkItems, MaxWGSize, Redu);
1798 MLastEvent = AuxHandler.finalize();
1801 if (Reduction::is_usm || Redu.hasUserDiscardWriteAccessor()) {
1802 handler CopyHandler(QueueCopy, MIsHost);
1803 CopyHandler.saveCodeLoc(MCodeLoc);
1804 ext::oneapi::detail::reduSaveFinalResultToUserMem<KernelName>(CopyHandler,
1806 MLastEvent = CopyHandler.finalize();
1847 (
sizeof...(RestT) >= 3 &&
1850 std::tuple<RestT...> ArgsTuple(Rest...);
1851 constexpr
size_t NumArgs =
sizeof...(RestT);
1853 auto ReduIndices = std::make_index_sequence<NumArgs - 1>();
1857 size_t LocalMemPerWorkItem =
1865 throw sycl::runtime_error(
"The implementation handling parallel_for with"
1866 " reduction requires work group size not bigger"
1868 std::to_string(MaxWGSize),
1869 PI_ERROR_INVALID_WORK_GROUP_SIZE);
1871 ext::oneapi::detail::reduCGFunc<KernelName>(*
this,
KernelFunc, Range,
1872 ReduTuple, ReduIndices);
1873 std::shared_ptr<detail::queue_impl> QueueCopy = MQueue;
1877 while (NWorkItems > 1) {
1878 handler AuxHandler(QueueCopy, MIsHost);
1879 AuxHandler.saveCodeLoc(MCodeLoc);
1882 ext::oneapi::detail::reduAuxCGFunc<KernelName, decltype(KernelFunc)>(
1883 AuxHandler, NWorkItems, MaxWGSize, ReduTuple, ReduIndices);
1884 MLastEvent = AuxHandler.finalize();
1888 QueueCopy, MIsHost, ReduTuple, ReduIndices);
1890 MLastEvent = *CopyEvent;
1907 throwIfActionIsCreated();
1911 using LambdaArgType =
1913 (void)NumWorkGroups;
1914 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
1915 #ifndef __SYCL_DEVICE_ONLY__
1916 detail::checkValueRange<Dims>(NumWorkGroups);
1917 MNDRDesc.setNumWorkGroups(NumWorkGroups);
1918 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1919 setType(detail::CG::Kernel);
1920 #endif // __SYCL_DEVICE_ONLY__
1940 throwIfActionIsCreated();
1944 using LambdaArgType =
1946 (void)NumWorkGroups;
1947 (void)WorkGroupSize;
1948 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
1949 #ifndef __SYCL_DEVICE_ONLY__
1952 detail::checkValueRange<Dims>(ExecRange);
1953 MNDRDesc.set(std::move(ExecRange));
1954 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1955 setType(detail::CG::Kernel);
1956 #endif // __SYCL_DEVICE_ONLY__
1966 throwIfActionIsCreated();
1967 verifyKernelInvoc(Kernel);
1974 setType(detail::CG::Kernel);
1975 extractArgsAndReqs();
1976 MKernelName = getKernelName();
1980 parallel_for_impl(NumWorkItems, Kernel);
1984 parallel_for_impl(NumWorkItems, Kernel);
1988 parallel_for_impl(NumWorkItems, Kernel);
2001 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
2003 throwIfActionIsCreated();
2004 verifyKernelInvoc(Kernel);
2006 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2007 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
2008 setType(detail::CG::Kernel);
2009 extractArgsAndReqs();
2010 MKernelName = getKernelName();
2022 throwIfActionIsCreated();
2023 verifyKernelInvoc(Kernel);
2025 detail::checkValueRange<Dims>(NDRange);
2026 MNDRDesc.set(std::move(NDRange));
2027 setType(detail::CG::Kernel);
2028 extractArgsAndReqs();
2029 MKernelName = getKernelName();
2038 template <
typename KernelName = detail::auto_name,
typename KernelType>
2040 throwIfActionIsCreated();
2048 #ifndef __SYCL_DEVICE_ONLY__
2053 setType(detail::CG::Kernel);
2054 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2055 extractArgsAndReqs();
2056 MKernelName = getKernelName();
2058 StoreLambda<NameT, KernelType, 1,
void>(std::move(
KernelFunc));
2060 detail::CheckDeviceCopyable<KernelType>();
2067 template <
typename FuncT>
2069 void interop_task(FuncT Func) {
2072 setType(detail::CG::CodeplayInteropTask);
2086 throwIfActionIsCreated();
2095 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
2096 #ifndef __SYCL_DEVICE_ONLY__
2097 detail::checkValueRange<Dims>(NumWorkItems);
2098 MNDRDesc.set(std::move(NumWorkItems));
2100 setType(detail::CG::Kernel);
2101 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2102 extractArgsAndReqs();
2103 MKernelName = getKernelName();
2105 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2124 throwIfActionIsCreated();
2133 (void)WorkItemOffset;
2134 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
2135 #ifndef __SYCL_DEVICE_ONLY__
2136 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2137 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
2139 setType(detail::CG::Kernel);
2140 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2141 extractArgsAndReqs();
2142 MKernelName = getKernelName();
2144 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2162 throwIfActionIsCreated();
2168 using LambdaArgType =
2172 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
2173 #ifndef __SYCL_DEVICE_ONLY__
2174 detail::checkValueRange<Dims>(NDRange);
2175 MNDRDesc.set(std::move(NDRange));
2177 setType(detail::CG::Kernel);
2178 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2179 extractArgsAndReqs();
2180 MKernelName = getKernelName();
2182 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2204 throwIfActionIsCreated();
2210 using LambdaArgType =
2213 (void)NumWorkGroups;
2214 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
2215 #ifndef __SYCL_DEVICE_ONLY__
2216 detail::checkValueRange<Dims>(NumWorkGroups);
2217 MNDRDesc.setNumWorkGroups(NumWorkGroups);
2219 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
2220 setType(detail::CG::Kernel);
2221 #endif // __SYCL_DEVICE_ONLY__
2244 throwIfActionIsCreated();
2250 using LambdaArgType =
2253 (void)NumWorkGroups;
2254 (void)WorkGroupSize;
2255 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
2256 #ifndef __SYCL_DEVICE_ONLY__
2259 detail::checkValueRange<Dims>(ExecRange);
2260 MNDRDesc.set(std::move(ExecRange));
2262 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
2263 setType(detail::CG::Kernel);
2264 #endif // __SYCL_DEVICE_ONLY__
2268 #undef _KERNELFUNCPARAM
2279 template <
typename T_Src,
typename T_Dst,
int Dims,
access::mode AccessMode,
2283 std::shared_ptr<T_Dst> Dst) {
2284 throwIfActionIsCreated();
2285 static_assert(isValidTargetForExplicitOp(AccessTarget),
2286 "Invalid accessor target for the copy method.");
2287 static_assert(isValidModeForSourceAccessor(AccessMode),
2288 "Invalid accessor mode for the copy method.");
2291 MSharedPtrStorage.push_back(Dst);
2292 typename std::shared_ptr<T_Dst>::element_type *RawDstPtr = Dst.get();
2293 copy(Src, RawDstPtr);
2303 template <
typename T_Src,
typename T_Dst,
int Dims,
access::mode AccessMode,
2309 throwIfActionIsCreated();
2310 static_assert(isValidTargetForExplicitOp(AccessTarget),
2311 "Invalid accessor target for the copy method.");
2312 static_assert(isValidModeForDestinationAccessor(AccessMode),
2313 "Invalid accessor mode for the copy method.");
2316 MSharedPtrStorage.push_back(Src);
2317 typename std::shared_ptr<T_Src>::element_type *RawSrcPtr = Src.get();
2318 copy(RawSrcPtr, Dst);
2328 template <
typename T_Src,
typename T_Dst,
int Dims,
access::mode AccessMode,
2333 throwIfActionIsCreated();
2334 static_assert(isValidTargetForExplicitOp(AccessTarget),
2335 "Invalid accessor target for the copy method.");
2336 static_assert(isValidModeForSourceAccessor(AccessMode),
2337 "Invalid accessor mode for the copy method.");
2338 #ifndef __SYCL_DEVICE_ONLY__
2342 copyAccToPtrHost(Src, Dst);
2346 setType(detail::CG::CopyAccToPtr);
2351 MRequirements.push_back(AccImpl.get());
2352 MSrcPtr =
static_cast<void *
>(AccImpl.get());
2353 MDstPtr =
static_cast<void *
>(Dst);
2356 MAccStorage.push_back(std::move(AccImpl));
2366 template <
typename T_Src,
typename T_Dst,
int Dims,
access::mode AccessMode,
2372 throwIfActionIsCreated();
2373 static_assert(isValidTargetForExplicitOp(AccessTarget),
2374 "Invalid accessor target for the copy method.");
2375 static_assert(isValidModeForDestinationAccessor(AccessMode),
2376 "Invalid accessor mode for the copy method.");
2377 #ifndef __SYCL_DEVICE_ONLY__
2381 copyPtrToAccHost(Src, Dst);
2385 setType(detail::CG::CopyPtrToAcc);
2390 MRequirements.push_back(AccImpl.get());
2391 MSrcPtr =
const_cast<T_Src *
>(Src);
2392 MDstPtr =
static_cast<void *
>(AccImpl.get());
2395 MAccStorage.push_back(std::move(AccImpl));
2406 typename T_Src,
int Dims_Src,
access::mode AccessMode_Src,
2414 accessor<T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst,
2417 throwIfActionIsCreated();
2418 static_assert(isValidTargetForExplicitOp(AccessTarget_Src),
2419 "Invalid source accessor target for the copy method.");
2420 static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
2421 "Invalid destination accessor target for the copy method.");
2422 static_assert(isValidModeForSourceAccessor(AccessMode_Src),
2423 "Invalid source accessor mode for the copy method.");
2424 static_assert(isValidModeForDestinationAccessor(AccessMode_Dst),
2425 "Invalid destination accessor mode for the copy method.");
2426 if (Dst.
get_size() < Src.get_size())
2427 throw sycl::invalid_object_error(
2428 "The destination accessor size is too small to copy the memory into.",
2429 PI_ERROR_INVALID_OPERATION);
2431 if (copyAccToAccHelper(Src, Dst))
2433 setType(detail::CG::CopyAccToAcc);
2441 MRequirements.push_back(AccImplSrc.get());
2442 MRequirements.push_back(AccImplDst.get());
2443 MSrcPtr = AccImplSrc.get();
2444 MDstPtr = AccImplDst.get();
2447 MAccStorage.push_back(std::move(AccImplSrc));
2448 MAccStorage.push_back(std::move(AccImplDst));
2455 template <
typename T,
int Dims,
access::mode AccessMode,
2460 throwIfActionIsCreated();
2461 static_assert(isValidTargetForExplicitOp(AccessTarget),
2462 "Invalid accessor target for the update_host method.");
2463 setType(detail::CG::UpdateHost);
2468 MDstPtr =
static_cast<void *
>(AccImpl.get());
2469 MRequirements.push_back(AccImpl.get());
2470 MAccStorage.push_back(std::move(AccImpl));
2481 template <
typename T,
int Dims,
access::mode AccessMode,
2489 throwIfActionIsCreated();
2491 static_assert(isValidTargetForExplicitOp(AccessTarget),
2492 "Invalid accessor target for the fill method.");
2493 if (!MIsHost && (((Dims == 1) && isConstOrGlobal(AccessTarget)) ||
2494 isImageOrImageArray(AccessTarget))) {
2495 setType(detail::CG::Fill);
2500 MDstPtr =
static_cast<void *
>(AccImpl.get());
2501 MRequirements.push_back(AccImpl.get());
2502 MAccStorage.push_back(std::move(AccImpl));
2504 MPattern.resize(
sizeof(T));
2505 auto PatternPtr =
reinterpret_cast<T *
>(MPattern.data());
2506 *PatternPtr = Pattern;
2512 parallel_for<
class __fill<T, Dims, AccessMode, AccessTarget,
2513 IsPlaceholder>>(Range, [=](
id<Dims> Index) {
2514 Dst[Index] = Pattern;
2525 template <
typename T>
void fill(
void *Ptr,
const T &Pattern,
size_t Count) {
2526 throwIfActionIsCreated();
2527 static_assert(std::is_trivially_copyable<T>::value,
2528 "Pattern must be trivially copyable");
2529 parallel_for<class __usmfill<T>>(
range<1>(Count), [=](
id<1> Index) {
2530 T *CastedPtr =
static_cast<T *
>(Ptr);
2531 CastedPtr[Index] = Pattern;
2539 throwIfActionIsCreated();
2540 setType(detail::CG::Barrier);
2555 void ext_oneapi_barrier(
const std::vector<event> &WaitList);
2575 void memcpy(
void *Dest, const
void *Src,
size_t Count);
2586 template <typename T>
void copy(const T *Src, T *Dest,
size_t Count) {
2587 this->
memcpy(Dest, Src, Count *
sizeof(T));
2598 void memset(
void *Dest,
int Value,
size_t Count);
2606 void prefetch(
const void *Ptr,
size_t Count);
2614 void mem_advise(
const void *Ptr,
size_t Length,
int Advice);
2617 std::shared_ptr<detail::queue_impl> MQueue;
2622 std::vector<std::vector<char>> MArgsStorage;
2623 std::vector<detail::AccessorImplPtr> MAccStorage;
2624 std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
2625 std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
2626 mutable std::vector<std::shared_ptr<const void>> MSharedPtrStorage;
2628 std::vector<detail::ArgDesc> MArgs;
2632 std::vector<detail::ArgDesc> MAssociatedAccesors;
2634 std::vector<detail::Requirement *> MRequirements;
2637 std::string MKernelName;
2639 std::shared_ptr<detail::kernel_impl> MKernel;
2645 void *MSrcPtr =
nullptr;
2647 void *MDstPtr =
nullptr;
2651 std::vector<char> MPattern;
2653 std::unique_ptr<detail::HostKernelBase> MHostKernel;
2655 std::unique_ptr<detail::HostTask> MHostTask;
2658 std::unique_ptr<detail::InteropTask> MInteropTask;
2660 std::vector<detail::EventImplPtr> MEvents;
2663 std::vector<detail::EventImplPtr> MEventsWaitWithBarrier;
2665 bool MIsHost =
false;
2668 bool MIsFinalized =
false;
2674 template <
typename DataT,
int Dims,
access::mode AccMode,
2676 typename PropertyListT>
2688 template <
typename T,
class BinaryOperation,
int Dims,
size_t Extent,
2694 template <
typename Reduction,
typename... RestT>
2697 std::vector<event> &Events, std::shared_ptr<detail::queue_impl> Queue,
2698 bool IsHost, Reduction &, RestT...);
2704 friend class ::MockHandler;
2707 bool DisableRangeRounding();
2709 bool RangeRoundingTrace();
2711 void GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
2714 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
2715 typename KernelType,
2717 KernelType, TransformedArgType>::value> * =
nullptr>
2718 auto getRangeRoundedKernelLambda(KernelType
KernelFunc,
2721 KernelType>(NumWorkItems,
2725 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
2726 typename KernelType,
2728 KernelType, TransformedArgType>::value> * =
nullptr>
2729 auto getRangeRoundedKernelLambda(KernelType
KernelFunc,