41 #include <type_traits>
44 #if !SYCL_LANGUAGE_VERSION || SYCL_LANGUAGE_VERSION < 202001
45 #define __SYCL_NONCONST_FUNCTOR__
50 #ifdef __SYCL_NONCONST_FUNCTOR__
51 #define _KERNELFUNCPARAMTYPE KernelType
53 #define _KERNELFUNCPARAMTYPE const KernelType &
55 #define _KERNELFUNCPARAM(a) _KERNELFUNCPARAMTYPE a
66 template <
typename T_Src,
typename T_Dst,
int Dims,
71 template <
typename T_Src,
typename T_Dst,
int Dims,
93 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
103 class image_accessor;
104 template <
typename RetType,
typename Func,
typename Arg>
109 template <
typename RetType,
typename Func,
typename Arg>
118 template <
typename F,
typename SuggestedArgType>
121 template <typename F, typename SuggestedArgType>
124 template <typename F, typename SuggestedArgType>
136 #if __SYCL_ID_QUERIES_FIT_IN_INT__
137 template <
typename T>
struct NotIntMsg;
139 template <
int Dims>
struct NotIntMsg<
range<Dims>> {
140 constexpr
static const char *Msg =
141 "Provided range is out of integer limits. Pass "
142 "`-fno-sycl-id-queries-fit-in-int' to disable range check.";
145 template <
int Dims>
struct NotIntMsg<
id<Dims>> {
146 constexpr
static const char *Msg =
147 "Provided offset is out of integer limits. Pass "
148 "`-fno-sycl-id-queries-fit-in-int' to disable offset check.";
154 template <
typename KernelType,
typename PropertiesT,
typename Cond =
void>
158 template <
typename KernelType,
typename PropertiesT>
160 KernelType, PropertiesT,
162 HasKernelPropertiesGetMethod<KernelType>::value>> {
168 "get(sycl::ext::oneapi::experimental::properties_tag) member in kernel "
169 "functor class must return a valid property list.");
174 #if __SYCL_ID_QUERIES_FIT_IN_INT__
175 template <
typename T,
typename ValT>
177 std::is_same<ValT, unsigned long long>::value>
178 checkValueRangeImpl(ValT V) {
179 static constexpr
size_t Limit =
182 throw runtime_error(NotIntMsg<T>::Msg, PI_ERROR_INVALID_VALUE);
186 template <
int Dims,
typename T>
188 std::is_same<T, id<Dims>>::value>
190 #if __SYCL_ID_QUERIES_FIT_IN_INT__
191 for (
size_t Dim = 0; Dim < Dims; ++Dim)
192 checkValueRangeImpl<T>(V[Dim]);
195 unsigned long long Product = 1;
196 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
199 checkValueRangeImpl<T>(Product);
209 #if __SYCL_ID_QUERIES_FIT_IN_INT__
210 checkValueRange<Dims>(R);
211 checkValueRange<Dims>(O);
213 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
214 unsigned long long Sum = R[Dim] + O[Dim];
216 checkValueRangeImpl<range<Dims>>(Sum);
224 template <
int Dims,
typename T>
227 #if __SYCL_ID_QUERIES_FIT_IN_INT__
228 checkValueRange<Dims>(V.get_global_range());
229 checkValueRange<Dims>(V.get_local_range());
230 checkValueRange<Dims>(V.get_offset());
232 checkValueRange<Dims>(V.get_global_range(), V.get_offset());
238 template <
typename TransformedArgType,
int Dims,
typename KernelType>
245 if (Arg[0] >= NumWorkItems[0])
247 Arg.set_allowed_range(NumWorkItems);
256 template <
typename TransformedArgType,
int Dims,
typename KernelType>
262 void operator()(TransformedArgType Arg, kernel_handler KH)
const {
263 if (Arg[0] >= NumWorkItems[0])
265 Arg.set_allowed_range(NumWorkItems);
275 using sycl::detail::queue_impl;
318 handler(std::shared_ptr<detail::queue_impl> Queue,
bool IsHost);
329 handler(std::shared_ptr<detail::queue_impl> Queue,
330 std::shared_ptr<detail::queue_impl> PrimaryQueue,
331 std::shared_ptr<detail::queue_impl> SecondaryQueue,
bool IsHost);
337 MArgsStorage.emplace_back(
sizeof(T));
338 auto Storage =
reinterpret_cast<F *
>(MArgsStorage.back().data());
347 void throwIfActionIsCreated() {
348 if (detail::CG::None != getType())
349 throw sycl::runtime_error(
"Attempt to set multiple actions for the "
350 "command group. Command group must consist of "
351 "a single kernel or explicit memory operation.",
352 PI_ERROR_INVALID_OPERATION);
358 extractArgsAndReqsFromLambda(
char *LambdaPtr,
size_t KernelArgsNum,
363 void extractArgsAndReqs();
366 const int Size,
const size_t Index,
size_t &IndexShift,
367 bool IsKernelCreatedFromSource,
bool IsESIMD);
370 std::string getKernelName();
372 template <
typename LambdaNameT>
bool lambdaAndKernelHaveEqualName() {
378 assert(MKernel &&
"MKernel is not initialized");
380 const std::string KernelName = getKernelName();
381 return LambdaName == KernelName;
401 void addStream(
const std::shared_ptr<detail::stream_impl> &Stream) {
402 MStreamStorage.push_back(Stream);
410 void addReduction(
const std::shared_ptr<const void> &ReduObj);
415 bool is_host() {
return MIsHost; }
417 #ifdef __SYCL_DEVICE_ONLY__
429 template <
typename T,
typename... Ts>
430 void setArgsHelper(
int ArgIndex, T &&Arg, Ts &&...Args) {
431 set_arg(ArgIndex, std::move(Arg));
432 setArgsHelper(++ArgIndex, std::move(Args)...);
435 void setArgsHelper(
int) {}
437 void setLocalAccessorArgHelper(
int ArgIndex,
442 MLocalAccStorage.push_back(std::move(LocalAccImpl));
443 MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
444 static_cast<int>(access::target::local), ArgIndex);
450 void setArgHelper(
int ArgIndex,
453 #ifndef __SYCL_DEVICE_ONLY__
454 setLocalAccessorArgHelper(ArgIndex, Arg);
459 template <
typename DataT,
int Dims>
461 #ifndef __SYCL_DEVICE_ONLY__
462 setLocalAccessorArgHelper(ArgIndex, Arg);
477 MRequirements.push_back(Req);
479 MAccStorage.push_back(std::move(AccImpl));
481 MArgs.emplace_back(detail::kernel_param_kind_t::kind_accessor, Req,
482 static_cast<int>(AccessTarget), ArgIndex);
485 template <
typename T>
void setArgHelper(
int ArgIndex, T &&Arg) {
488 if (!std::is_same<cl_mem, T>::value && std::is_pointer<T>::value) {
489 MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg,
490 sizeof(T), ArgIndex);
492 MArgs.emplace_back(detail::kernel_param_kind_t::kind_std_layout,
493 StoredArg,
sizeof(T), ArgIndex);
497 void setArgHelper(
int ArgIndex, sampler &&Arg) {
499 MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg,
500 sizeof(sampler), ArgIndex);
504 void verifyKernelInvoc(
const kernel &Kernel) {
505 std::ignore = Kernel;
523 template <
class KernelType,
class NormalizedKernelType,
int Dims>
524 KernelType *ResetHostKernelHelper(
const KernelType &
KernelFunc) {
525 NormalizedKernelType NormalizedKernel(
KernelFunc);
526 auto NormalizedKernelFunc =
527 std::function<void(
const sycl::nd_item<Dims> &)>(NormalizedKernel);
530 sycl::nd_item<Dims>, Dims>(NormalizedKernelFunc);
531 MHostKernel.reset(HostKernelPtr);
532 return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
537 template <
class KernelType,
typename ArgT,
int Dims>
538 typename std::enable_if<std::is_same<ArgT, sycl::id<Dims>>::value,
540 ResetHostKernel(
const KernelType &
KernelFunc) {
541 struct NormalizedKernelType {
542 KernelType MKernelFunc;
543 NormalizedKernelType(
const KernelType &
KernelFunc)
549 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
554 template <
class KernelType,
typename ArgT,
int Dims>
555 typename std::enable_if<std::is_same<ArgT, sycl::nd_item<Dims>>::value,
557 ResetHostKernel(
const KernelType &
KernelFunc) {
558 struct NormalizedKernelType {
559 KernelType MKernelFunc;
560 NormalizedKernelType(
const KernelType &
KernelFunc)
566 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
571 template <
class KernelType,
typename ArgT,
int Dims>
572 typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, false>>::value,
574 ResetHostKernel(
const KernelType &
KernelFunc) {
575 struct NormalizedKernelType {
576 KernelType MKernelFunc;
577 NormalizedKernelType(
const KernelType &
KernelFunc)
580 sycl::item<Dims, false> Item = detail::Builder::createItem<Dims, false>(
585 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
590 template <
class KernelType,
typename ArgT,
int Dims>
591 typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, true>>::value,
593 ResetHostKernel(
const KernelType &
KernelFunc) {
594 struct NormalizedKernelType {
595 KernelType MKernelFunc;
596 NormalizedKernelType(
const KernelType &
KernelFunc)
599 sycl::item<Dims, true> Item = detail::Builder::createItem<Dims, true>(
604 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
609 template <
class KernelType,
typename ArgT,
int Dims>
610 typename std::enable_if_t<std::is_same<ArgT, void>::value, KernelType *>
611 ResetHostKernel(
const KernelType &
KernelFunc) {
612 struct NormalizedKernelType {
613 KernelType MKernelFunc;
614 NormalizedKernelType(
const KernelType &
KernelFunc)
621 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
629 template <
class KernelType,
typename ArgT,
int Dims>
630 typename std::enable_if<std::is_same<ArgT, sycl::group<Dims>>::value,
632 ResetHostKernel(
const KernelType &
KernelFunc) {
635 return (KernelType *)(MHostKernel->getPtr());
645 void verifyUsedKernelBundle(
const std::string &KernelName);
653 template <
typename KernelName,
typename KernelType,
int Dims,
654 typename LambdaArgType>
658 constexpr
bool IsCallableWithKernelHandler =
660 LambdaArgType>::value;
662 if (IsCallableWithKernelHandler && MIsHost) {
663 throw sycl::feature_not_supported(
664 "kernel_handler is not yet supported by host device.",
665 PI_ERROR_INVALID_OPERATION);
668 KernelType *KernelPtr =
669 ResetHostKernel<KernelType, LambdaArgType, Dims>(
KernelFunc);
671 using KI = sycl::detail::KernelInfo<KernelName>;
672 constexpr
bool KernelHasName =
673 KI::getName() !=
nullptr && KI::getName()[0] !=
'\0';
679 !KernelHasName ||
sizeof(
KernelFunc) == KI::getKernelSize(),
680 "Unexpected kernel lambda size. This can be caused by an "
681 "external host compiler producing a lambda with an "
682 "unexpected layout. This is a limitation of the compiler."
683 "In many cases the difference is related to capturing constexpr "
684 "variables. In such cases removing constexpr specifier aligns the "
685 "captures between the host compiler and the device compiler."
687 "In case of MSVC, passing "
688 "-fsycl-host-compiler-options='/std:c++latest' "
696 extractArgsAndReqsFromLambda(
reinterpret_cast<char *
>(KernelPtr),
697 KI::getNumParams(), &KI::getParamDesc(0),
699 MKernelName = KI::getName();
700 MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName());
705 MArgs = std::move(MAssociatedAccesors);
710 if (IsCallableWithKernelHandler) {
711 getOrInsertHandlerKernelBundle(
true);
719 template <
int Dims_Src,
int Dims_Dst>
722 if (Dims_Src > Dims_Dst)
724 for (
size_t I = 0; I < Dims_Src; ++I)
735 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
743 IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
748 class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
749 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
750 LinearizedRange, [=](
id<1> Id) {
751 size_t Index = Id[0];
754 Dst[DstId] = Src[SrcId];
766 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
777 class __copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
778 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
779 [=]() { *(Dst.get_pointer()) = *(Src.get_pointer()); });
783 #ifndef __SYCL_DEVICE_ONLY__
789 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
796 class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
800 (
reinterpret_cast<TSrcNonConst *
>(Dst))[LinearIndex] = Src[Index];
809 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
814 single_task<class __copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
817 *(
reinterpret_cast<TSrcNonConst *
>(Dst)) = *(Src.get_pointer());
825 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
828 copyPtrToAccHost(TSrc *Src,
832 class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
835 Dst[Index] = (
reinterpret_cast<const TDst *
>(Src))[LinearIndex];
844 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
847 copyPtrToAccHost(TSrc *Src,
849 single_task<class __copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
851 *(Dst.get_pointer()) = *(
reinterpret_cast<const TDst *
>(Src));
856 constexpr
static bool isConstOrGlobal(
access::target AccessTarget) {
857 return AccessTarget == access::target::device ||
858 AccessTarget == access::target::constant_buffer;
861 constexpr
static bool isImageOrImageArray(
access::target AccessTarget) {
862 return AccessTarget == access::target::image ||
863 AccessTarget == access::target::image_array;
866 constexpr
static bool
868 return isConstOrGlobal(AccessTarget) || isImageOrImageArray(AccessTarget);
876 constexpr
static bool
881 AccessMode == access::mode::discard_read_write;
884 template <
int Dims,
typename LambdaArgType>
struct TransformUserItemType {
885 using type =
typename std::conditional<
886 std::is_convertible<nd_item<Dims>, LambdaArgType>::value,
nd_item<Dims>,
887 typename std::conditional<
888 std::is_convertible<item<Dims>, LambdaArgType>::value,
item<Dims>,
889 LambdaArgType>::type>::type;
903 template <
typename KernelName,
typename KernelType,
int Dims,
904 typename PropertiesT =
906 void parallel_for_lambda_impl(
range<Dims> NumWorkItems,
908 throwIfActionIsCreated();
909 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
914 using TransformedArgType =
typename std::conditional<
915 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
916 typename TransformUserItemType<Dims, LambdaArgType>::type>::type;
928 #if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
929 !defined(DPCPP_HOST_DEVICE_OPENMP) && \
930 !defined(DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
932 size_t MinFactorX = 16;
934 size_t GoodFactorX = 32;
936 size_t MinRangeX = 1024;
940 this->GetRangeRoundingSettings(MinFactorX, GoodFactorX, MinRangeX);
958 std::string KName =
typeid(NameT *).name();
960 bool DisableRounding =
961 this->DisableRangeRounding() ||
962 (KI::getName() ==
nullptr || KI::getName()[0] ==
'\0');
967 if (!DisableRounding && (NumWorkItems[0] >= MinRangeX) &&
968 (NumWorkItems[0] % MinFactorX != 0)) {
974 ((NumWorkItems[0] + GoodFactorX - 1) / GoodFactorX) * GoodFactorX;
975 if (this->RangeRoundingTrace())
976 std::cout <<
"parallel_for range adjusted from " << NumWorkItems[0]
977 <<
" to " << NewValX << std::endl;
981 getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
984 using KName = std::conditional_t<std::is_same<KernelType, NameT>::value,
985 decltype(Wrapper), NameWT>;
988 AdjustedRange.set_range_dim0(NewValX);
989 kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
990 PropertiesT>(Wrapper);
991 #ifndef __SYCL_DEVICE_ONLY__
992 detail::checkValueRange<Dims>(AdjustedRange);
993 MNDRDesc.set(std::move(AdjustedRange));
994 StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
996 setType(detail::CG::Kernel);
1004 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1006 #ifndef __SYCL_DEVICE_ONLY__
1007 detail::checkValueRange<Dims>(NumWorkItems);
1008 MNDRDesc.set(std::move(NumWorkItems));
1009 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1011 setType(detail::CG::Kernel);
1029 template <
typename KernelName,
typename KernelType,
int Dims,
1030 typename PropertiesT>
1031 void parallel_for_impl(
nd_range<Dims> ExecutionRange, PropertiesT,
1033 throwIfActionIsCreated();
1039 using LambdaArgType =
1040 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1043 using TransformedArgType =
1044 typename TransformUserItemType<Dims, LambdaArgType>::type;
1045 (void)ExecutionRange;
1046 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1048 #ifndef __SYCL_DEVICE_ONLY__
1049 detail::checkValueRange<Dims>(ExecutionRange);
1050 MNDRDesc.set(std::move(ExecutionRange));
1051 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1053 setType(detail::CG::Kernel);
1066 throwIfActionIsCreated();
1068 detail::checkValueRange<Dims>(NumWorkItems);
1069 MNDRDesc.set(std::move(NumWorkItems));
1070 setType(detail::CG::Kernel);
1071 extractArgsAndReqs();
1072 MKernelName = getKernelName();
1085 template <
typename KernelName,
typename KernelType,
int Dims,
1086 typename PropertiesT =
1088 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1090 throwIfActionIsCreated();
1096 using LambdaArgType =
1097 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1098 (void)NumWorkGroups;
1099 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1101 #ifndef __SYCL_DEVICE_ONLY__
1102 detail::checkValueRange<Dims>(NumWorkGroups);
1103 MNDRDesc.setNumWorkGroups(NumWorkGroups);
1104 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1105 setType(detail::CG::Kernel);
1121 template <
typename KernelName,
typename KernelType,
int Dims,
1122 typename PropertiesT =
1124 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1127 throwIfActionIsCreated();
1133 using LambdaArgType =
1134 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1135 (void)NumWorkGroups;
1137 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1139 #ifndef __SYCL_DEVICE_ONLY__
1142 detail::checkValueRange<Dims>(ExecRange);
1143 MNDRDesc.set(std::move(ExecRange));
1144 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1145 setType(detail::CG::Kernel);
1149 #ifdef SYCL_LANGUAGE_VERSION
1150 #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]]
1152 #define __SYCL_KERNEL_ATTR__
1157 template <
typename KernelName,
typename KernelType,
typename... Props>
1158 #ifdef __SYCL_DEVICE_ONLY__
1159 [[__sycl_detail__::add_ir_attributes_function(
1160 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1161 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1165 #ifdef __SYCL_DEVICE_ONLY__
1174 template <
typename KernelName,
typename KernelType,
typename... Props>
1175 #ifdef __SYCL_DEVICE_ONLY__
1176 [[__sycl_detail__::add_ir_attributes_function(
1177 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1178 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1182 #ifdef __SYCL_DEVICE_ONLY__
1192 template <
typename KernelName,
typename ElementType,
typename KernelType,
1194 #ifdef __SYCL_DEVICE_ONLY__
1195 [[__sycl_detail__::add_ir_attributes_function(
1196 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1197 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1201 #ifdef __SYCL_DEVICE_ONLY__
1202 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1210 template <
typename KernelName,
typename ElementType,
typename KernelType,
1212 #ifdef __SYCL_DEVICE_ONLY__
1213 [[__sycl_detail__::add_ir_attributes_function(
1214 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1215 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1219 #ifdef __SYCL_DEVICE_ONLY__
1220 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1229 template <
typename KernelName,
typename ElementType,
typename KernelType,
1231 #ifdef __SYCL_DEVICE_ONLY__
1232 [[__sycl_detail__::add_ir_attributes_function(
1233 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1234 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1238 #ifdef __SYCL_DEVICE_ONLY__
1239 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1247 template <
typename KernelName,
typename ElementType,
typename KernelType,
1249 #ifdef __SYCL_DEVICE_ONLY__
1250 [[__sycl_detail__::add_ir_attributes_function(
1251 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name...,
1252 ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]]
1256 kernel_handler KH) {
1257 #ifdef __SYCL_DEVICE_ONLY__
1258 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1265 template <
typename... Props>
struct KernelPropertiesUnpackerImpl {
1272 template <
typename... TypesToForward,
typename... ArgsTy>
1273 static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
1274 h->kernel_single_task<TypesToForward..., Props...>(Args...);
1277 template <
typename... TypesToForward,
typename... ArgsTy>
1278 static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
1279 h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
1282 template <
typename... TypesToForward,
typename... ArgsTy>
1283 static void kernel_parallel_for_work_group_unpack(handler *h,
1285 h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
1289 template <
typename PropertiesT>
1290 struct KernelPropertiesUnpacker :
public KernelPropertiesUnpackerImpl<> {
1294 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1295 "Template type is not a property list.");
1298 template <
typename... Props>
1299 struct KernelPropertiesUnpacker<
1301 :
public KernelPropertiesUnpackerImpl<Props...> {};
1315 template <
typename KernelType,
typename PropertiesT,
bool HasKernelHandlerArg,
1318 #ifdef __SYCL_DEVICE_ONLY__
1319 detail::CheckDeviceCopyable<KernelType>();
1321 using MergedPropertiesT =
1322 typename detail::GetMergedKernelProperties<KernelType,
1324 using Unpacker = KernelPropertiesUnpacker<MergedPropertiesT>;
1325 if constexpr (HasKernelHandlerArg) {
1336 template <
typename KernelName,
typename KernelType,
1337 typename PropertiesT =
1340 unpack<KernelType, PropertiesT,
1341 detail::KernelLambdaHasKernelHandlerArgT<KernelType>::value>(
1342 KernelFunc, [&](
auto Unpacker,
auto... args) {
1343 Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
1348 template <
typename KernelName,
typename ElementType,
typename KernelType,
1349 typename PropertiesT =
1352 unpack<KernelType, PropertiesT,
1353 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1354 ElementType>::value>(
1355 KernelFunc, [&](
auto Unpacker,
auto... args) {
1356 Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
1357 KernelType>(args...);
1361 template <
typename KernelName,
typename ElementType,
typename KernelType,
1362 typename PropertiesT =
1365 unpack<KernelType, PropertiesT,
1366 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1367 ElementType>::value>(
1368 KernelFunc, [&](
auto Unpacker,
auto... args) {
1369 Unpacker.template kernel_parallel_for_work_group_unpack<
1370 KernelName, ElementType, KernelType>(args...);
1381 template <
typename KernelName,
typename KernelType,
1382 typename PropertiesT =
1385 throwIfActionIsCreated();
1389 typename detail::get_kernel_name_t<KernelName, KernelType>::name;
1390 verifyUsedKernelBundle(detail::KernelInfo<NameT>::getName());
1391 kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(
KernelFunc);
1392 #ifndef __SYCL_DEVICE_ONLY__
1395 MNDRDesc.set(range<1>{1});
1397 StoreLambda<NameT, KernelType, 1,
void>(
KernelFunc);
1398 setType(detail::CG::Kernel);
1402 void setStateExplicitKernelBundle();
1403 void setStateSpecConstSet();
1404 bool isStateExplicitKernelBundle()
const;
1406 std::shared_ptr<detail::kernel_bundle_impl>
1407 getOrInsertHandlerKernelBundle(
bool Insert)
const;
1409 void setHandlerKernelBundle(kernel Kernel);
1411 void setHandlerKernelBundle(
1412 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr);
1414 template <
typename FuncT>
1416 detail::check_fn_signature<detail::remove_reference_t<FuncT>,
1418 detail::check_fn_signature<detail::remove_reference_t<FuncT>,
1419 void(interop_handle)>::value>
1420 host_task_impl(FuncT &&Func) {
1421 throwIfActionIsCreated();
1423 MNDRDesc.set(range<1>(1));
1424 MArgs = std::move(MAssociatedAccesors);
1426 MHostTask.reset(
new detail::HostTask(std::move(Func)));
1428 setType(detail::CG::CodeplayHostTask);
1437 template <auto &SpecName>
1441 setStateSpecConstSet();
1444 getOrInsertHandlerKernelBundle(
true);
1446 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1448 .set_specialization_constant<SpecName>(Value);
1451 template <auto &SpecName>
1455 if (isStateExplicitKernelBundle())
1457 "Specialization constants cannot be read after "
1458 "explicitly setting the used kernel bundle");
1461 getOrInsertHandlerKernelBundle(
true);
1463 return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1465 .get_specialization_constant<SpecName>();
1479 template <
typename DataT,
int Dims,
access::mode AccMode,
1482 if (Acc.is_placeholder())
1489 void depends_on(
event Event);
1494 void depends_on(
const std::vector<event> &Events);
1496 template <
typename T>
1500 template <
typename U,
typename T>
1504 static constexpr
bool value =
1505 std::is_trivially_copyable<detail::remove_reference_t<T>>::value
1506 #if SYCL_LANGUAGE_VERSION && SYCL_LANGUAGE_VERSION <= 201707
1507 && std::is_standard_layout<detail::remove_reference_t<T>>::value
1511 std::is_pointer<remove_cv_ref_t<T>>::value)
1521 template <
typename T>
1524 setArgHelper(ArgIndex, std::move(Arg));
1532 setArgHelper(ArgIndex, std::move(Arg));
1535 template <
typename DataT,
int Dims>
1537 setArgHelper(ArgIndex, std::move(Arg));
1545 template <
typename... Ts>
void set_args(Ts &&...Args) {
1546 setArgsHelper(0, std::move(Args)...);
1556 template <
typename KernelName = detail::auto_name,
typename KernelType>
1558 single_task_lambda_impl<KernelName>(
KernelFunc);
1561 template <
typename KernelName = detail::auto_name,
typename KernelType>
1563 parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(
KernelFunc));
1566 template <
typename KernelName = detail::auto_name,
typename KernelType>
1568 parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(
KernelFunc));
1571 template <
typename KernelName = detail::auto_name,
typename KernelType>
1573 parallel_for_lambda_impl<KernelName>(NumWorkItems, std::move(
KernelFunc));
1580 template <
typename FuncT>
1582 "run_on_host_intel() is deprecated, use host_task() instead")
1583 void run_on_host_intel(FuncT Func) {
1584 throwIfActionIsCreated();
1589 MArgs = std::move(MAssociatedAccesors);
1591 setType(detail::CG::RunOnHostIntel);
1595 template <
typename FuncT>
1602 host_task_impl(Func);
1621 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
1623 throwIfActionIsCreated();
1627 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1629 (void)WorkItemOffset;
1630 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1631 #ifndef __SYCL_DEVICE_ONLY__
1632 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1633 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1634 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1635 setType(detail::CG::Kernel);
1653 parallel_for_work_group_lambda_impl<KernelName>(NumWorkGroups,
KernelFunc);
1673 parallel_for_work_group_lambda_impl<KernelName>(NumWorkGroups,
1684 throwIfActionIsCreated();
1686 setHandlerKernelBundle(Kernel);
1691 setType(detail::CG::Kernel);
1692 extractArgsAndReqs();
1693 MKernelName = getKernelName();
1697 parallel_for_impl(NumWorkItems, Kernel);
1701 parallel_for_impl(NumWorkItems, Kernel);
1705 parallel_for_impl(NumWorkItems, Kernel);
1718 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
1720 throwIfActionIsCreated();
1722 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1723 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1724 setType(detail::CG::Kernel);
1725 extractArgsAndReqs();
1726 MKernelName = getKernelName();
1738 throwIfActionIsCreated();
1740 detail::checkValueRange<Dims>(NDRange);
1741 MNDRDesc.set(std::move(NDRange));
1742 setType(detail::CG::Kernel);
1743 extractArgsAndReqs();
1744 MKernelName = getKernelName();
1753 template <
typename KernelName = detail::auto_name,
typename KernelType>
1755 throwIfActionIsCreated();
1757 setHandlerKernelBundle(Kernel);
1763 #ifndef __SYCL_DEVICE_ONLY__
1768 setType(detail::CG::Kernel);
1769 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1770 extractArgsAndReqs();
1771 MKernelName = getKernelName();
1773 StoreLambda<NameT, KernelType, 1,
void>(std::move(
KernelFunc));
1775 detail::CheckDeviceCopyable<KernelType>();
1782 template <
typename FuncT>
1784 void interop_task(FuncT Func) {
1787 setType(detail::CG::CodeplayInteropTask);
1801 throwIfActionIsCreated();
1803 setHandlerKernelBundle(Kernel);
1807 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1810 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1811 #ifndef __SYCL_DEVICE_ONLY__
1812 detail::checkValueRange<Dims>(NumWorkItems);
1813 MNDRDesc.set(std::move(NumWorkItems));
1815 setType(detail::CG::Kernel);
1816 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1817 extractArgsAndReqs();
1818 MKernelName = getKernelName();
1820 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1839 throwIfActionIsCreated();
1841 setHandlerKernelBundle(Kernel);
1845 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1848 (void)WorkItemOffset;
1849 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1850 #ifndef __SYCL_DEVICE_ONLY__
1851 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
1852 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
1854 setType(detail::CG::Kernel);
1855 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1856 extractArgsAndReqs();
1857 MKernelName = getKernelName();
1859 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1877 throwIfActionIsCreated();
1879 setHandlerKernelBundle(Kernel);
1883 using LambdaArgType =
1884 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1887 kernel_parallel_for_wrapper<NameT, LambdaArgType>(
KernelFunc);
1888 #ifndef __SYCL_DEVICE_ONLY__
1889 detail::checkValueRange<Dims>(NDRange);
1890 MNDRDesc.set(std::move(NDRange));
1892 setType(detail::CG::Kernel);
1893 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
1894 extractArgsAndReqs();
1895 MKernelName = getKernelName();
1897 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
1919 throwIfActionIsCreated();
1921 setHandlerKernelBundle(Kernel);
1925 using LambdaArgType =
1926 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1928 (void)NumWorkGroups;
1929 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
1930 #ifndef __SYCL_DEVICE_ONLY__
1931 detail::checkValueRange<Dims>(NumWorkGroups);
1932 MNDRDesc.setNumWorkGroups(NumWorkGroups);
1934 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1935 setType(detail::CG::Kernel);
1959 throwIfActionIsCreated();
1961 setHandlerKernelBundle(Kernel);
1965 using LambdaArgType =
1966 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1968 (void)NumWorkGroups;
1970 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(
KernelFunc);
1971 #ifndef __SYCL_DEVICE_ONLY__
1974 detail::checkValueRange<Dims>(ExecRange);
1975 MNDRDesc.set(std::move(ExecRange));
1977 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(
KernelFunc));
1978 setType(detail::CG::Kernel);
1983 typename PropertiesT>
1987 single_task_lambda_impl<KernelName, KernelType, PropertiesT>(
KernelFunc);
1991 typename PropertiesT>
1996 parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
2001 typename PropertiesT>
2006 parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
2011 typename PropertiesT>
2016 parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
2021 typename PropertiesT,
int Dims>
2026 parallel_for_impl<KernelName>(Range, Properties, std::move(
KernelFunc));
2032 typename PropertiesT,
typename... RestT>
2034 (
sizeof...(RestT) > 1) &&
2038 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2039 std::forward<RestT>(Rest)...);
2046 parallel_for<KernelName>(
2048 std::forward<RestT>(Rest)...);
2052 typename PropertiesT,
typename... RestT>
2054 (
sizeof...(RestT) > 1) &&
2058 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2059 std::forward<RestT>(Rest)...);
2066 parallel_for<KernelName>(
2068 std::forward<RestT>(Rest)...);
2074 int Dims,
typename PropertiesT>
2077 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2082 int Dims,
typename PropertiesT>
2086 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2087 PropertiesT>(NumWorkGroups,
2092 #undef _KERNELFUNCPARAM
2107 std::shared_ptr<T_Dst> Dst) {
2108 throwIfActionIsCreated();
2109 static_assert(isValidTargetForExplicitOp(AccessTarget),
2110 "Invalid accessor target for the copy method.");
2111 static_assert(isValidModeForSourceAccessor(
AccessMode),
2112 "Invalid accessor mode for the copy method.");
2115 MSharedPtrStorage.push_back(Dst);
2116 typename std::shared_ptr<T_Dst>::element_type *RawDstPtr = Dst.get();
2117 copy(Src, RawDstPtr);
2133 throwIfActionIsCreated();
2134 static_assert(isValidTargetForExplicitOp(AccessTarget),
2135 "Invalid accessor target for the copy method.");
2136 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2137 "Invalid accessor mode for the copy method.");
2140 MSharedPtrStorage.push_back(Src);
2141 typename std::shared_ptr<T_Src>::element_type *RawSrcPtr = Src.get();
2142 copy(RawSrcPtr, Dst);
2157 throwIfActionIsCreated();
2158 static_assert(isValidTargetForExplicitOp(AccessTarget),
2159 "Invalid accessor target for the copy method.");
2160 static_assert(isValidModeForSourceAccessor(
AccessMode),
2161 "Invalid accessor mode for the copy method.");
2162 #ifndef __SYCL_DEVICE_ONLY__
2166 copyAccToPtrHost(Src, Dst);
2170 setType(detail::CG::CopyAccToPtr);
2175 MRequirements.push_back(AccImpl.get());
2176 MSrcPtr =
static_cast<void *
>(AccImpl.get());
2177 MDstPtr =
static_cast<void *
>(Dst);
2180 MAccStorage.push_back(std::move(AccImpl));
2196 throwIfActionIsCreated();
2197 static_assert(isValidTargetForExplicitOp(AccessTarget),
2198 "Invalid accessor target for the copy method.");
2199 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2200 "Invalid accessor mode for the copy method.");
2201 #ifndef __SYCL_DEVICE_ONLY__
2205 copyPtrToAccHost(Src, Dst);
2209 setType(detail::CG::CopyPtrToAcc);
2214 MRequirements.push_back(AccImpl.get());
2215 MSrcPtr =
const_cast<T_Src *
>(Src);
2216 MDstPtr =
static_cast<void *
>(AccImpl.get());
2219 MAccStorage.push_back(std::move(AccImpl));
2230 typename T_Src,
int Dims_Src,
access::mode AccessMode_Src,
2238 accessor<T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst,
2241 throwIfActionIsCreated();
2242 static_assert(isValidTargetForExplicitOp(AccessTarget_Src),
2243 "Invalid source accessor target for the copy method.");
2244 static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
2245 "Invalid destination accessor target for the copy method.");
2246 static_assert(isValidModeForSourceAccessor(AccessMode_Src),
2247 "Invalid source accessor mode for the copy method.");
2248 static_assert(isValidModeForDestinationAccessor(AccessMode_Dst),
2249 "Invalid destination accessor mode for the copy method.");
2250 if (Dst.get_size() < Src.get_size())
2251 throw sycl::invalid_object_error(
2252 "The destination accessor size is too small to copy the memory into.",
2253 PI_ERROR_INVALID_OPERATION);
2255 if (copyAccToAccHelper(Src, Dst))
2257 setType(detail::CG::CopyAccToAcc);
2265 MRequirements.push_back(AccImplSrc.get());
2266 MRequirements.push_back(AccImplDst.get());
2267 MSrcPtr = AccImplSrc.get();
2268 MDstPtr = AccImplDst.get();
2271 MAccStorage.push_back(std::move(AccImplSrc));
2272 MAccStorage.push_back(std::move(AccImplDst));
2284 throwIfActionIsCreated();
2285 static_assert(isValidTargetForExplicitOp(AccessTarget),
2286 "Invalid accessor target for the update_host method.");
2287 setType(detail::CG::UpdateHost);
2292 MDstPtr =
static_cast<void *
>(AccImpl.get());
2293 MRequirements.push_back(AccImpl.get());
2294 MAccStorage.push_back(std::move(AccImpl));
2313 throwIfActionIsCreated();
2315 static_assert(isValidTargetForExplicitOp(AccessTarget),
2316 "Invalid accessor target for the fill method.");
2317 if (!MIsHost && (((Dims == 1) && isConstOrGlobal(AccessTarget)) ||
2318 isImageOrImageArray(AccessTarget))) {
2319 setType(detail::CG::Fill);
2324 MDstPtr =
static_cast<void *
>(AccImpl.get());
2325 MRequirements.push_back(AccImpl.get());
2326 MAccStorage.push_back(std::move(AccImpl));
2328 MPattern.resize(
sizeof(T));
2329 auto PatternPtr =
reinterpret_cast<T *
>(MPattern.data());
2330 *PatternPtr = Pattern;
2338 Range, [=](
id<Dims> Index) { Dst[Index] = Pattern; });
2348 template <
typename T>
void fill(
void *Ptr,
const T &Pattern,
size_t Count) {
2349 throwIfActionIsCreated();
2350 static_assert(std::is_trivially_copyable<T>::value,
2351 "Pattern must be trivially copyable");
2352 parallel_for<class __usmfill<T>>(
range<1>(Count), [=](
id<1> Index) {
2353 T *CastedPtr =
static_cast<T *
>(Ptr);
2354 CastedPtr[Index] = Pattern;
2362 throwIfActionIsCreated();
2363 setType(detail::CG::Barrier);
2378 void ext_oneapi_barrier(
const std::vector<event> &WaitList);
2399 void memcpy(
void *Dest, const
void *Src,
size_t Count);
2411 template <typename T>
void copy(const T *Src, T *Dest,
size_t Count) {
2412 this->
memcpy(Dest, Src, Count *
sizeof(T));
2423 void memset(
void *Dest,
int Value,
size_t Count);
2431 void prefetch(
const void *Ptr,
size_t Count);
2439 void mem_advise(
const void *Ptr,
size_t Length,
int Advice);
2458 template <
typename T =
unsigned char,
2459 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2461 size_t SrcPitch,
size_t Width,
size_t Height) {
2462 throwIfActionIsCreated();
2463 if (Width > DestPitch)
2465 "Destination pitch must be greater than or equal "
2466 "to the width specified in 'ext_oneapi_memcpy2d'");
2467 if (Width > SrcPitch)
2469 "Source pitch must be greater than or equal "
2470 "to the width specified in 'ext_oneapi_memcpy2d'");
2473 if (supportsUSMMemcpy2D())
2474 ext_oneapi_memcpy2d_impl(Dest, DestPitch, Src, SrcPitch, Width, Height);
2476 commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2494 template <
typename T>
2496 size_t DestPitch,
size_t Width,
size_t Height) {
2497 if (Width > DestPitch)
2499 "Destination pitch must be greater than or equal "
2500 "to the width specified in 'ext_oneapi_copy2d'");
2501 if (Width > SrcPitch)
2503 "Source pitch must be greater than or equal "
2504 "to the width specified in 'ext_oneapi_copy2d'");
2507 if (supportsUSMMemcpy2D())
2508 ext_oneapi_memcpy2d_impl(Dest, DestPitch *
sizeof(T), Src,
2509 SrcPitch *
sizeof(T), Width *
sizeof(T), Height);
2511 commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2530 template <
typename T =
unsigned char,
2531 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2533 size_t Width,
size_t Height) {
2534 throwIfActionIsCreated();
2535 if (Width > DestPitch)
2537 "Destination pitch must be greater than or equal "
2538 "to the width specified in 'ext_oneapi_memset2d'");
2539 T CharVal =
static_cast<T
>(Value);
2542 if (supportsUSMMemset2D())
2543 ext_oneapi_memset2d_impl(Dest, DestPitch, Value, Width, Height);
2545 commonUSMFill2DFallbackKernel(Dest, DestPitch, CharVal, Width, Height);
2560 template <
typename T>
2562 size_t Width,
size_t Height) {
2563 throwIfActionIsCreated();
2564 static_assert(std::is_trivially_copyable<T>::value,
2565 "Pattern must be trivially copyable");
2566 if (Width > DestPitch)
2568 "Destination pitch must be greater than or equal "
2569 "to the width specified in 'ext_oneapi_fill2d'");
2572 if (supportsUSMFill2D())
2573 ext_oneapi_fill2d_impl(Dest, DestPitch, &Pattern,
sizeof(T), Width,
2576 commonUSMFill2DFallbackKernel(Dest, DestPitch, Pattern, Width, Height);
2580 std::shared_ptr<detail::handler_impl> MImpl;
2581 std::shared_ptr<detail::queue_impl> MQueue;
2586 std::vector<std::vector<char>> MArgsStorage;
2587 std::vector<detail::AccessorImplPtr> MAccStorage;
2588 std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
2589 std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
2590 mutable std::vector<std::shared_ptr<const void>> MSharedPtrStorage;
2592 std::vector<detail::ArgDesc> MArgs;
2596 std::vector<detail::ArgDesc> MAssociatedAccesors;
2598 std::vector<detail::AccessorImplHost *> MRequirements;
2601 std::string MKernelName;
2603 std::shared_ptr<detail::kernel_impl> MKernel;
2609 void *MSrcPtr =
nullptr;
2611 void *MDstPtr =
nullptr;
2615 std::vector<char> MPattern;
2617 std::unique_ptr<detail::HostKernelBase> MHostKernel;
2619 std::unique_ptr<detail::HostTask> MHostTask;
2622 std::unique_ptr<detail::InteropTask> MInteropTask;
2624 std::vector<detail::EventImplPtr> MEvents;
2627 std::vector<detail::EventImplPtr> MEventsWaitWithBarrier;
2629 bool MIsHost =
false;
2632 bool MIsFinalized =
false;
2638 template <
typename DataT,
int Dims,
access::mode AccMode,
2640 typename PropertyListT>
2648 friend class stream;
2652 template <
typename T,
class BinaryOperation,
int Dims,
size_t Extent,
2657 template <
class FunctorTy>
2661 typename PropertiesT,
typename... RestT>
2663 PropertiesT Properties,
2667 typename PropertiesT,
typename... RestT>
2670 PropertiesT Properties, RestT... Rest);
2672 #ifndef __SYCL_DEVICE_ONLY__
2678 friend class ::MockHandler;
2681 bool DisableRangeRounding();
2683 bool RangeRoundingTrace();
2685 void GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
2688 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
2689 typename KernelType,
2691 KernelType, TransformedArgType>::value> * =
nullptr>
2692 auto getRangeRoundedKernelLambda(KernelType
KernelFunc,
2695 KernelType>(NumWorkItems,
2699 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
2700 typename KernelType,
2702 KernelType, TransformedArgType>::value> * =
nullptr>
2703 auto getRangeRoundedKernelLambda(KernelType
KernelFunc,
2710 bool supportsUSMMemcpy2D();
2711 bool supportsUSMFill2D();
2712 bool supportsUSMMemset2D();
2715 id<2> computeFallbackKernelBounds(
size_t Width,
size_t Height);
2719 template <
typename T>
2720 void commonUSMCopy2DFallbackKernel(
const void *Src,
size_t SrcPitch,
2721 void *Dest,
size_t DestPitch,
size_t Width,
2724 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
2725 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
2726 parallel_for<class __usmmemcpy2d<T>>(
2727 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
2728 T *CastedDest =
static_cast<T *
>(Dest);
2729 const T *CastedSrc =
static_cast<const T *
>(Src);
2730 for (uint32_t I = 0; I < Iterations[0]; ++I) {
2731 for (uint32_t J = 0; J < Iterations[1]; ++J) {
2732 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
2733 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
2734 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
2735 CastedSrc[adjustedIndex[0] * SrcPitch + adjustedIndex[1]];
2744 template <
typename T>
2745 void commonUSMFill2DFallbackKernel(
void *Dest,
size_t DestPitch,
2746 const T &Pattern,
size_t Width,
2749 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
2750 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
2751 parallel_for<class __usmfill2d<T>>(
2752 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
2753 T *CastedDest =
static_cast<T *
>(Dest);
2754 for (uint32_t I = 0; I < Iterations[0]; ++I) {
2755 for (uint32_t J = 0; J < Iterations[1]; ++J) {
2756 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
2757 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
2758 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
2767 void ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
const void *Src,
2768 size_t SrcPitch,
size_t Width,
size_t Height);
2771 void ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
const void *Value,
2772 size_t ValueSize,
size_t Width,
size_t Height);
2775 void ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
2776 size_t Width,
size_t Height);
The file contains implementations of accessor class.
CGTYPE
Type of the command group.
void operator()(TransformedArgType Arg, kernel_handler KH) const
RoundedRangeKernelWithKH(range< Dims > NumWorkItems, KernelType KernelFunc)
RoundedRangeKernel(range< Dims > NumWorkItems, KernelType KernelFunc)
void operator()(TransformedArgType Arg) const
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Command group handler class.
void fill(void *Ptr, const T &Pattern, size_t Count)
Fills the specified memory with the specified pattern.
void parallel_for(range< 2 > NumWorkItems, kernel Kernel)
void parallel_for(kernel Kernel, range< Dims > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range.
void parallel_for_work_group(range< Dims > NumWorkGroups, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
}@
void single_task(_KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function as a function object type.
void parallel_for(nd_range< Dims > NDRange, kernel Kernel)
Defines and invokes a SYCL kernel function for the specified range and offsets.
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
detail::enable_if_t< ShouldEnableSetArg< T >::value, void > set_arg(int ArgIndex, T &&Arg)
Sets argument for OpenCL interoperability kernels.
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
void ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 3 > NumWorkItems, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
void parallel_for(range< 3 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
handler & operator=(handler &&)=delete
handler(handler &&)=delete
void copy(accessor< T_Src, Dims_Src, AccessMode_Src, AccessTarget_Src, IsPlaceholder_Src > Src, accessor< T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst, IsPlaceholder_Dst > Dst)
Copies the content of memory object accessed by Src to the memory object accessed by Dst.
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
detail::enable_if_t< detail::check_fn_signature< detail::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< detail::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< Dims > Range, RestT &&...Rest)
void ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, size_t Width, size_t Height)
Fills the memory pointed by a USM pointer with the value specified.
void parallel_for(range< 3 > NumWorkItems, kernel Kernel)
void parallel_for(range< 1 > NumWorkItems, kernel Kernel)
void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function.
handler & operator=(const handler &)=delete
void parallel_for(kernel Kernel, nd_range< Dims > NDRange, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range and offsets.
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 2 > NumWorkItems, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
void ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, size_t DestPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
std::is_same< remove_cv_ref_t< U >, remove_cv_ref_t< T > > is_same_type
void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height)
Copies data from one 2D memory region to another, both pointed by USM pointers.
void single_task(kernel Kernel)
Invokes a SYCL kernel.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
typename detail::remove_cv_t< detail::remove_reference_t< T > > remove_cv_ref_t
void copy(const T_Src *Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
void copy(std::shared_ptr< T_Src > Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
handler(const handler &)=delete
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, T_Dst *Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc))
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 1 > NumWorkItems, PropertiesT, _KERNELFUNCPARAM(KernelFunc))
void parallel_for_work_group(range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
void set_specialization_constant(typename std::remove_reference_t< decltype(SpecName)>::value_type Value)
void set_args(Ts &&...Args)
Sets arguments for OpenCL interoperability kernels.
__SYCL_DEPRECATED("run_on_host_intel() is deprecated, use host_task() instead") void run_on_host_intel(FuncT Func)
Defines and invokes a SYCL kernel on host device.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > single_task(PropertiesT, _KERNELFUNCPARAM(KernelFunc))
void parallel_for(range< 2 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
__SYCL_DEPRECATED("interop_task() is deprecated, use host_task() instead") void interop_task(FuncT Func)
Invokes a lambda on the host.
void set_arg(int ArgIndex, accessor< DataT, Dims, AccessMode, AccessTarget, IsPlaceholder > Arg)
std::remove_reference_t< decltype(SpecName)>::value_type get_specialization_constant() const
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
Reductions.
void set_arg(int ArgIndex, local_accessor< DataT, Dims > Arg)
A unique identifier of an item in an index space.
Identifies an instance of the function object executing at each point in a range.
Provides an abstraction of a SYCL kernel.
Identifies an instance of the function object executing at each point in an nd_range.
id< dimensions > get_global_id() const
range< dimensions > get_global_range() const
id< dimensions > get_offset() const
Defines the iteration domain of both the work-groups and the overall dispatch.
Objects of the property_list class are containers for the SYCL properties.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL2020_DEPRECATED(message)
__ESIMD_API void barrier()
Generic work-group barrier.
#define __SYCL_KERNEL_ATTR__
#define _KERNELFUNCPARAM(a)
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
void withAuxHandler(handler &CGH, FunctorTy Func)
void finalizeHandler(handler &CGH)
void reduction_parallel_for(handler &CGH, nd_range< Dims > NDRange, PropertiesT Properties, RestT... Rest)
detail::enable_if_t< std::is_same< T, nd_range< Dims > >::value > checkValueRange(const T &V)
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
size_t getLinearIndex(const T< Dims > &Index, const U< Dims > &Range)
typename std::remove_cv< T >::type remove_cv_t
id< 1 > getDelinearizedId(const range< 1 > &, size_t Index)
static Arg member_ptr_helper(RetType(Func::*)(Arg) const)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
SuggestedArgType argument_helper(...)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
std::shared_ptr< LocalAccessorImplHost > LocalAccessorImplPtr
typename std::remove_const< T >::type remove_const_t
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
void memcpy(void *Dst, const void *Src, std::size_t Size)
void associateWithHandler(handler &, AccessorBaseHost *, access::target)
typename std::remove_reference< T >::type remove_reference_t
typename std::enable_if< B, T >::type enable_if_t
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
decltype(argument_helper< F, SuggestedArgType >(0)) lambda_arg_type
std::shared_ptr< AccessorImplHost > AccessorImplPtr
intptr_t OSModuleHandle
Uniquely identifies an operating system module (executable or a dynamic library)
F * storePlainArg(std::vector< std::vector< char >> &ArgStorage, T &&Arg)
prefetch_impl< _B > prefetch
typename merged_properties< LHSPropertiesT, RHSPropertiesT >::type merged_properties_t
properties< std::tuple< PropertyValueTs... > > properties_t
properties< std::tuple<> > empty_properties_t
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
constexpr mode_tag_t< access_mode::read_write > read_write
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
---— Error handling, matching OpenCL plugin semantics.
std::function< void(const sycl::nd_item< NDims > &)> KernelFunc
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
Predicate returning true if all template type parameters except the last one are reductions.
ext::oneapi::experimental::detail::merged_properties_t< PropertiesT, get_method_properties > type
typename ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< KernelType >::properties_t get_method_properties