61 #include <type_traits>
70 #if !SYCL_LANGUAGE_VERSION || SYCL_LANGUAGE_VERSION < 202001
71 #define __SYCL_NONCONST_FUNCTOR__
76 #ifdef __SYCL_NONCONST_FUNCTOR__
77 #define _KERNELFUNCPARAMTYPE KernelType
79 #define _KERNELFUNCPARAMTYPE const KernelType &
81 #define _KERNELFUNCPARAM(a) _KERNELFUNCPARAMTYPE a
83 #if defined(__SYCL_UNNAMED_LAMBDA__)
94 using __fill = sycl::detail::auto_name;
95 template <
typename T>
using __usmfill = sycl::detail::auto_name;
96 template <
typename T>
using __usmfill2d = sycl::detail::auto_name;
97 template <
typename T>
using __usmmemcpy2d = sycl::detail::auto_name;
99 template <
typename T_Src,
typename T_Dst,
int Dims,
104 template <
typename T_Src,
typename T_Dst,
int Dims,
127 template <
typename T_Src,
typename T_Dst,
int Dims,
132 template <
typename T_Src,
typename T_Dst,
int Dims,
150 inline namespace _V1 {
155 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
158 namespace ext::intel::experimental {
159 template <
class _name,
class _dataT, int32_t _min_capacity,
class _propertiesT,
164 namespace ext ::oneapi ::experimental {
165 struct image_descriptor;
168 namespace ext::oneapi::experimental::detail {
186 template <
typename RetType,
typename Func,
typename Arg>
191 template <
typename RetType,
typename Func,
typename Arg>
196 template <
typename RetType,
typename Func,
typename Arg1,
typename Arg2>
201 template <
typename RetType,
typename Func,
typename Arg1,
typename Arg2>
204 template <
typename F,
typename SuggestedArgType>
207 template <typename F, typename SuggestedArgType>
210 template <typename F, typename SuggestedArgType>
230 #if __SYCL_ID_QUERIES_FIT_IN_INT__
231 template <
typename T>
struct NotIntMsg;
233 template <
int Dims>
struct NotIntMsg<
range<Dims>> {
234 constexpr
static const char *Msg =
235 "Provided range is out of integer limits. Pass "
236 "`-fno-sycl-id-queries-fit-in-int' to disable range check.";
239 template <
int Dims>
struct NotIntMsg<
id<Dims>> {
240 constexpr
static const char *Msg =
241 "Provided offset is out of integer limits. Pass "
242 "`-fno-sycl-id-queries-fit-in-int' to disable offset check.";
248 template <
typename KernelType,
typename PropertiesT,
typename Cond =
void>
252 template <
typename KernelType,
typename PropertiesT>
254 KernelType, PropertiesT,
255 std::enable_if_t<ext::oneapi::experimental::detail::
256 HasKernelPropertiesGetMethod<KernelType>::value>> {
262 "get(sycl::ext::oneapi::experimental::properties_tag) member in kernel "
263 "functor class must return a valid property list.");
268 #if __SYCL_ID_QUERIES_FIT_IN_INT__
269 template <
typename T,
typename ValT>
270 typename std::enable_if_t<std::is_same<ValT, size_t>::value ||
271 std::is_same<ValT, unsigned long long>::value>
272 checkValueRangeImpl(ValT V) {
273 static constexpr
size_t Limit =
280 template <
int Dims,
typename T>
281 typename std::enable_if_t<std::is_same_v<T, range<Dims>> ||
282 std::is_same_v<T, id<Dims>>>
284 #if __SYCL_ID_QUERIES_FIT_IN_INT__
285 for (
size_t Dim = 0; Dim < Dims; ++Dim)
286 checkValueRangeImpl<T>(V[Dim]);
289 unsigned long long Product = 1;
290 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
293 checkValueRangeImpl<T>(Product);
303 #if __SYCL_ID_QUERIES_FIT_IN_INT__
304 checkValueRange<Dims>(R);
305 checkValueRange<Dims>(O);
307 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
308 unsigned long long Sum = R[Dim] + O[Dim];
310 checkValueRangeImpl<range<Dims>>(Sum);
318 template <
int Dims,
typename T>
319 typename std::enable_if_t<std::is_same_v<T, nd_range<Dims>>>
321 #if __SYCL_ID_QUERIES_FIT_IN_INT__
322 checkValueRange<Dims>(V.get_global_range());
323 checkValueRange<Dims>(V.get_local_range());
324 checkValueRange<Dims>(V.get_offset());
326 checkValueRange<Dims>(V.get_global_range(), V.get_offset());
342 : Id(Id), InitId(Id), UserRange(UserRange), RoundedRange(RoundedRange) {
343 for (
int i = 0; i < Dims; ++i)
344 if (Id[i] >= UserRange[i])
348 explicit operator bool() {
return !Done; }
351 for (
int i = 0; i < Dims; ++i) {
352 Id[i] += RoundedRange[i];
353 if (Id[i] < UserRange[i])
362 template <
typename KernelType>
auto getItem() {
363 if constexpr (std::is_invocable_v<KernelType,
item<Dims> &> ||
364 std::is_invocable_v<KernelType,
item<Dims> &, kernel_handler>)
365 return detail::Builder::createItem<Dims, true>(UserRange,
getId(), {});
370 "Kernel must be invocable with an item!");
371 return detail::Builder::createItem<Dims, false>(UserRange,
getId());
382 template <
typename TransformedArgType,
int Dims,
typename KernelType>
391 auto item = Gen.template getItem<KernelType>();
397 template <
typename TransformedArgType,
int Dims,
typename KernelType>
406 auto item = Gen.template getItem<KernelType>();
412 using std::enable_if_t;
413 using sycl::detail::queue_impl;
417 template <
typename T>
418 static std::enable_if_t<std::is_unsigned_v<T>,
bool>
426 for (
int i = 0; i < Dims; ++i) {
475 handler(std::shared_ptr<detail::queue_impl> Queue,
bool CallerNeedsEvent);
487 handler(std::shared_ptr<detail::queue_impl> Queue,
488 std::shared_ptr<detail::queue_impl> PrimaryQueue,
489 std::shared_ptr<detail::queue_impl> SecondaryQueue,
490 bool CallerNeedsEvent);
498 handler(std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph);
500 void *storeRawArg(
const void *Ptr,
size_t Size);
504 return storeRawArg(RKA.MArgData, RKA.MArgSize);
508 template <
typename T>
void *storePlainArg(T &&Arg) {
509 return storeRawArg(&Arg,
sizeof(T));
516 void throwIfActionIsCreated() {
519 "Attempt to set multiple actions for the "
520 "command group. Command group must consist of "
521 "a single kernel or explicit memory operation.");
524 constexpr
static int AccessTargetMask = 0x7ff;
528 template <
typename KernelName,
typename KernelType>
529 void throwOnLocalAccessorMisuse()
const {
532 using KI = sycl::detail::KernelInfo<NameT>;
534 auto *KernelArgs = &KI::getParamDesc(0);
536 for (
unsigned I = 0; I < KI::getNumParams(); ++I) {
539 static_cast<access::target>(KernelArgs[I].info & AccessTargetMask);
541 (AccTarget == target::local))
544 "A local accessor must not be used in a SYCL kernel function "
545 "that is invoked via single_task or via the simple form of "
546 "parallel_for that takes a range parameter.");
553 extractArgsAndReqsFromLambda(
char *LambdaPtr,
size_t KernelArgsNum,
558 void extractArgsAndReqs();
561 const int Size,
const size_t Index,
size_t &IndexShift,
562 bool IsKernelCreatedFromSource,
bool IsESIMD);
567 template <
typename LambdaNameT>
bool lambdaAndKernelHaveEqualName() {
573 assert(MKernel &&
"MKernel is not initialized");
576 return KernelName == LambdaName;
599 event finalize(
bool CallerNeedsEvent);
606 void addStream(
const std::shared_ptr<detail::stream_impl> &Stream) {
607 MStreamStorage.push_back(Stream);
615 void addReduction(
const std::shared_ptr<const void> &ReduObj);
622 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
627 addReduction(std::shared_ptr<const void>(ReduBuf));
632 #ifdef __SYCL_DEVICE_ONLY__
651 template <
typename T,
typename... Ts>
652 void setArgsHelper(
int ArgIndex, T &&Arg, Ts &&...Args) {
653 set_arg(ArgIndex, std::move(Arg));
654 setArgsHelper(++ArgIndex, std::move(Args)...);
657 void setArgsHelper(
int) {}
659 void setLocalAccessorArgHelper(
int ArgIndex,
664 MLocalAccStorage.push_back(std::move(LocalAccImpl));
666 static_cast<int>(access::target::local), ArgIndex);
672 void setArgHelper(
int ArgIndex,
677 #ifndef __SYCL_DEVICE_ONLY__
678 setLocalAccessorArgHelper(ArgIndex, Arg);
683 template <
typename DataT,
int Dims>
687 #ifndef __SYCL_DEVICE_ONLY__
688 setLocalAccessorArgHelper(ArgIndex, Arg);
695 typename std::enable_if_t<AccessTarget != access::target::local, void>
702 addAccessorReq(std::move(AccImpl));
705 static_cast<int>(AccessTarget), ArgIndex);
708 template <
typename T>
void setArgHelper(
int ArgIndex, T &&Arg) {
709 void *StoredArg = storePlainArg(Arg);
711 if (!std::is_same<cl_mem, T>::value && std::is_pointer<T>::value) {
720 void setArgHelper(
int ArgIndex, sampler &&Arg) {
721 void *StoredArg = storePlainArg(Arg);
723 sizeof(sampler), ArgIndex);
727 template <
typename T>
729 setArgHelper(
int ArgIndex,
735 setArgHelper(ArgIndex, std::move(ArgValue));
738 registerDynamicParameter(DynamicParam, ArgIndex);
742 void setArgHelper(
int ArgIndex,
744 auto StoredArg = storeRawArg(Arg);
746 Arg.MArgSize, ArgIndex);
753 void registerDynamicParameter(
772 template <
class KernelType,
class NormalizedKernelType,
int Dims>
773 KernelType *ResetHostKernelHelper(
const KernelType &KernelFunc) {
774 NormalizedKernelType NormalizedKernel(KernelFunc);
775 auto NormalizedKernelFunc =
779 std::move(NormalizedKernelFunc));
780 MHostKernel.reset(HostKernelPtr);
781 return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
786 template <
class KernelType,
typename ArgT,
int Dims>
787 std::enable_if_t<std::is_same_v<ArgT, sycl::id<Dims>>, KernelType *>
788 ResetHostKernel(
const KernelType &KernelFunc) {
789 struct NormalizedKernelType {
790 KernelType MKernelFunc;
791 NormalizedKernelType(
const KernelType &KernelFunc)
792 : MKernelFunc(KernelFunc) {}
797 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
802 template <
class KernelType,
typename ArgT,
int Dims>
803 std::enable_if_t<std::is_same_v<ArgT, sycl::nd_item<Dims>>, KernelType *>
804 ResetHostKernel(
const KernelType &KernelFunc) {
805 struct NormalizedKernelType {
806 KernelType MKernelFunc;
807 NormalizedKernelType(
const KernelType &KernelFunc)
808 : MKernelFunc(KernelFunc) {}
813 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
818 template <
class KernelType,
typename ArgT,
int Dims>
819 std::enable_if_t<std::is_same_v<ArgT, sycl::item<Dims, false>>, KernelType *>
820 ResetHostKernel(
const KernelType &KernelFunc) {
821 struct NormalizedKernelType {
822 KernelType MKernelFunc;
823 NormalizedKernelType(
const KernelType &KernelFunc)
824 : MKernelFunc(KernelFunc) {}
831 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
836 template <
class KernelType,
typename ArgT,
int Dims>
837 std::enable_if_t<std::is_same_v<ArgT, sycl::item<Dims, true>>, KernelType *>
838 ResetHostKernel(
const KernelType &KernelFunc) {
839 struct NormalizedKernelType {
840 KernelType MKernelFunc;
841 NormalizedKernelType(
const KernelType &KernelFunc)
842 : MKernelFunc(KernelFunc) {}
849 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
854 template <
class KernelType,
typename ArgT,
int Dims>
855 typename std::enable_if_t<std::is_same_v<ArgT, void>, KernelType *>
856 ResetHostKernel(
const KernelType &KernelFunc) {
857 struct NormalizedKernelType {
858 KernelType MKernelFunc;
859 NormalizedKernelType(
const KernelType &KernelFunc)
860 : MKernelFunc(KernelFunc) {}
866 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
874 template <
class KernelType,
typename ArgT,
int Dims>
875 std::enable_if_t<std::is_same_v<ArgT, sycl::group<Dims>>, KernelType *>
876 ResetHostKernel(
const KernelType &KernelFunc) {
879 return (KernelType *)(MHostKernel->getPtr());
889 void verifyUsedKernelBundle(
const std::string &KernelName) {
900 template <
typename KernelName,
typename KernelType,
int Dims,
901 typename LambdaArgType>
902 void StoreLambda(KernelType KernelFunc) {
904 constexpr
bool IsCallableWithKernelHandler =
906 LambdaArgType>::value;
908 KernelType *KernelPtr =
909 ResetHostKernel<KernelType, LambdaArgType, Dims>(KernelFunc);
911 constexpr
bool KernelHasName =
912 KI::getName() !=
nullptr && KI::getName()[0] !=
'\0';
918 !KernelHasName ||
sizeof(KernelFunc) == KI::getKernelSize(),
919 "Unexpected kernel lambda size. This can be caused by an "
920 "external host compiler producing a lambda with an "
921 "unexpected layout. This is a limitation of the compiler."
922 "In many cases the difference is related to capturing constexpr "
923 "variables. In such cases removing constexpr specifier aligns the "
924 "captures between the host compiler and the device compiler."
926 "In case of MSVC, passing "
927 "-fsycl-host-compiler-options='/std:c++latest' "
935 extractArgsAndReqsFromLambda(
reinterpret_cast<char *
>(KernelPtr),
936 KI::getNumParams(), &KI::getParamDesc(0),
938 MKernelName = KI::getName();
944 setArgsToAssociatedAccessors();
949 if (IsCallableWithKernelHandler) {
950 getOrInsertHandlerKernelBundle(
true);
954 void verifyDeviceHasProgressGuarantee(
959 template <
typename Properties>
960 void checkAndSetClusterRange(
const Properties &Props) {
961 namespace syclex = sycl::ext::oneapi::experimental;
962 constexpr std::size_t ClusterDim =
963 syclex::detail::getClusterDim<Properties>();
964 if constexpr (ClusterDim > 0) {
965 auto ClusterSize = Props
967 syclex::cuda::cluster_size_key<ClusterDim>>()
969 setKernelClusterLaunch(padRange(ClusterSize), ClusterDim);
979 void processProperties(PropertiesT Props) {
983 "Template type is not a property list.");
990 "Floating point control property is supported for ESIMD kernels only.");
994 "indirectly_callable property cannot be applied to SYCL kernels");
1000 setKernelCacheConfig(StableKernelCacheConfig::LargeSLM);
1002 setKernelCacheConfig(StableKernelCacheConfig::LargeData);
1005 std::ignore = Props;
1008 constexpr
bool UsesRootSync = PropertiesT::template
has_property<
1010 setKernelIsCooperative(UsesRootSync);
1012 sycl::ext::oneapi::experimental::
1013 work_group_progress_key>()) {
1016 verifyDeviceHasProgressGuarantee(
1018 sycl::ext::oneapi::experimental::execution_scope::work_group,
1019 prop.coordinationScope);
1022 sycl::ext::oneapi::experimental::
1023 sub_group_progress_key>()) {
1026 verifyDeviceHasProgressGuarantee(
1029 prop.coordinationScope);
1032 sycl::ext::oneapi::experimental::
1033 work_item_progress_key>()) {
1036 verifyDeviceHasProgressGuarantee(
1038 sycl::ext::oneapi::experimental::execution_scope::work_item,
1039 prop.coordinationScope);
1042 checkAndSetClusterRange(Props);
1049 template <
int Dims_Src,
int Dims_Dst>
1052 if (Dims_Src > Dims_Dst)
1054 for (
size_t I = 0; I < Dims_Src; ++I)
1055 if (Src[I] > Dst[I])
1065 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
1069 std::enable_if_t<(DimSrc > 0) && (DimDst > 0),
bool>
1072 if (IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
1077 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
1078 LinearizedRange, [=](
id<1> Id) {
1079 size_t Index = Id[0];
1082 Dst[DstId] = Src[SrcId];
1094 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
1098 std::enable_if_t<DimSrc == 0 || DimDst == 0, bool>
1104 constexpr
static bool isConstOrGlobal(
access::target AccessTarget) {
1106 AccessTarget == access::target::constant_buffer;
1109 constexpr
static bool isImageOrImageArray(
access::target AccessTarget) {
1114 constexpr
static bool
1116 return isConstOrGlobal(AccessTarget) || isImageOrImageArray(AccessTarget);
1124 constexpr
static bool
1133 constexpr
static bool isBackendSupportedFillSize(
size_t Size) {
1134 return Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16 ||
1135 Size == 32 || Size == 64 || Size == 128;
1138 bool eventNeeded()
const;
1140 template <
int Dims,
typename LambdaArgType>
struct TransformUserItemType {
1141 using type = std::conditional_t<
1142 std::is_convertible_v<nd_item<Dims>, LambdaArgType>,
nd_item<Dims>,
1143 std::conditional_t<std::is_convertible_v<item<Dims>, LambdaArgType>,
1147 std::optional<std::array<size_t, 3>> getMaxWorkGroups();
1150 std::tuple<std::array<size_t, 3>,
bool> getMaxWorkGroups_v2();
1153 std::tuple<range<Dims>,
bool> getRoundedRange(
range<Dims> UserRange) {
1172 if (this->DisableRangeRounding())
1176 size_t MinFactorX = 16;
1178 size_t GoodFactor = 32;
1180 size_t MinRangeX = 1024;
1184 this->GetRangeRoundingSettings(MinFactorX, GoodFactor, MinRangeX);
1196 auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2();
1199 for (
int i = 0; i < Dims; ++i)
1206 for (
int i = 0; i < Dims; ++i)
1207 IdResult[i] = (std::min)(Limit, MaxWGs[Dims - i - 1]);
1212 for (
int i = 0; i < Dims; ++i) {
1213 auto DesiredSize = MaxNWGs[i] * GoodFactor;
1215 DesiredSize <= M ? DesiredSize : (M / GoodFactor) * GoodFactor;
1218 bool DidAdjust =
false;
1219 auto Adjust = [&](
int Dim,
size_t Value) {
1220 if (this->RangeRoundingTrace())
1221 std::cout <<
"parallel_for range adjusted at dim " << Dim <<
" from "
1222 << RoundedRange[Dim] <<
" to " << Value << std::endl;
1223 RoundedRange[Dim] = Value;
1227 #ifdef __SYCL_EXP_PARALLEL_FOR_RANGE_ROUNDING__
1228 size_t GoodExpFactor = 1;
1243 this->GetRangeRoundingSettings(MinFactorX, GoodExpFactor, MinRangeX);
1245 for (
auto i = 0; i < Dims; ++i)
1246 if (UserRange[i] % GoodExpFactor) {
1247 Adjust(i, ((UserRange[i] / GoodExpFactor) + 1) * GoodExpFactor);
1253 if (RoundedRange[0] % MinFactorX != 0 && RoundedRange[0] >= MinRangeX) {
1258 Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor);
1261 #ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1267 for (
int i = 0; i < Dims; ++i)
1268 if (RoundedRange[i] > MaxRange[i])
1269 Adjust(i, MaxRange[i]);
1273 return {RoundedRange,
true};
1288 typename KernelName,
typename KernelType,
int Dims,
1290 void parallel_for_lambda_impl(
range<Dims> UserRange, PropertiesT Props,
1291 KernelType KernelFunc) {
1292 throwIfActionIsCreated();
1293 throwOnLocalAccessorMisuse<KernelName, KernelType>();
1296 "The total number of work-items in "
1297 "a range must fit within size_t");
1299 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1304 using TransformedArgType = std::conditional_t<
1305 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
1306 typename TransformUserItemType<Dims, LambdaArgType>::type>;
1309 "Kernel argument cannot have a sycl::nd_item type in "
1310 "sycl::parallel_for with sycl::range");
1312 static_assert(std::is_convertible_v<
item<Dims>, LambdaArgType> ||
1314 "sycl::parallel_for(sycl::range) kernel must have the "
1315 "first argument of sycl::item type, or of a type which is "
1316 "implicitly convertible from sycl::item");
1318 using RefLambdaArgType = std::add_lvalue_reference_t<LambdaArgType>;
1320 (std::is_invocable_v<KernelType, RefLambdaArgType> ||
1321 std::is_invocable_v<KernelType, RefLambdaArgType, kernel_handler>),
1322 "SYCL kernel lambda/functor has an unexpected signature, it should be "
1323 "invocable with sycl::item and optionally sycl::kernel_handler");
1335 #if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
1336 !defined(DPCPP_HOST_DEVICE_OPENMP) && \
1337 !defined(DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
1338 auto [RoundedRange, HasRoundedRange] = getRoundedRange(UserRange);
1339 if (HasRoundedRange) {
1342 getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
1343 KernelFunc, UserRange);
1345 using KName = std::conditional_t<std::is_same<KernelType, NameT>::value,
1346 decltype(Wrapper), NameWT>;
1348 kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
1349 PropertiesT>(Wrapper);
1350 #ifndef __SYCL_DEVICE_ONLY__
1356 detail::checkValueRange<Dims>(UserRange);
1357 setNDRangeDescriptor(RoundedRange);
1358 StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
1359 std::move(Wrapper));
1361 setNDRangeUsed(
false);
1370 #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1373 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1374 PropertiesT>(KernelFunc);
1375 #ifndef __SYCL_DEVICE_ONLY__
1376 processProperties<NameT, PropertiesT>(Props);
1377 detail::checkValueRange<Dims>(UserRange);
1378 setNDRangeDescriptor(std::move(UserRange));
1379 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1380 std::move(KernelFunc));
1382 setNDRangeUsed(
false);
1403 template <
typename KernelName,
typename KernelType,
int Dims,
1404 typename PropertiesT>
1405 void parallel_for_impl(
nd_range<Dims> ExecutionRange, PropertiesT Props,
1407 throwIfActionIsCreated();
1413 using LambdaArgType =
1414 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1417 "Kernel argument of a sycl::parallel_for with sycl::nd_range "
1418 "must be either sycl::nd_item or be convertible from sycl::nd_item");
1421 (void)ExecutionRange;
1423 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1424 PropertiesT>(KernelFunc);
1425 #ifndef __SYCL_DEVICE_ONLY__
1426 detail::checkValueRange<Dims>(ExecutionRange);
1427 setNDRangeDescriptor(std::move(ExecutionRange));
1428 processProperties<NameT, PropertiesT>(Props);
1429 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1430 std::move(KernelFunc));
1432 setNDRangeUsed(
true);
1445 throwIfActionIsCreated();
1447 detail::checkValueRange<Dims>(NumWorkItems);
1448 setNDRangeDescriptor(std::move(NumWorkItems));
1450 setNDRangeUsed(
false);
1451 extractArgsAndReqs();
1452 MKernelName = getKernelName();
1466 typename KernelName,
typename KernelType,
int Dims,
1468 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1471 throwIfActionIsCreated();
1477 using LambdaArgType =
1478 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1479 (void)NumWorkGroups;
1481 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1482 PropertiesT>(KernelFunc);
1483 #ifndef __SYCL_DEVICE_ONLY__
1484 processProperties<NameT, PropertiesT>(Props);
1485 detail::checkValueRange<Dims>(NumWorkGroups);
1486 setNDRangeDescriptor(NumWorkGroups,
true);
1487 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1489 setNDRangeUsed(
false);
1506 typename KernelName,
typename KernelType,
int Dims,
1508 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1512 throwIfActionIsCreated();
1518 using LambdaArgType =
1519 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1520 (void)NumWorkGroups;
1523 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1524 PropertiesT>(KernelFunc);
1525 #ifndef __SYCL_DEVICE_ONLY__
1526 processProperties<NameT, PropertiesT>(Props);
1529 detail::checkValueRange<Dims>(ExecRange);
1530 setNDRangeDescriptor(std::move(ExecRange));
1531 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1536 #ifdef SYCL_LANGUAGE_VERSION
1537 #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]]
1539 #define __SYCL_KERNEL_ATTR__
1544 template <
typename KernelName,
typename KernelType,
typename... Props>
1545 #ifdef __SYCL_DEVICE_ONLY__
1546 [[__sycl_detail__::add_ir_attributes_function(
1553 #ifdef __SYCL_DEVICE_ONLY__
1562 template <
typename KernelName,
typename KernelType,
typename... Props>
1563 #ifdef __SYCL_DEVICE_ONLY__
1564 [[__sycl_detail__::add_ir_attributes_function(
1571 kernel_handler KH) {
1572 #ifdef __SYCL_DEVICE_ONLY__
1582 template <
typename KernelName,
typename ElementType,
typename KernelType,
1584 #ifdef __SYCL_DEVICE_ONLY__
1585 [[__sycl_detail__::add_ir_attributes_function(
1590 #ifdef __SYCL_DEVICE_ONLY__
1591 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1599 template <
typename KernelName,
typename ElementType,
typename KernelType,
1601 #ifdef __SYCL_DEVICE_ONLY__
1602 [[__sycl_detail__::add_ir_attributes_function(
1607 kernel_handler KH) {
1608 #ifdef __SYCL_DEVICE_ONLY__
1609 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1618 template <
typename KernelName,
typename ElementType,
typename KernelType,
1620 #ifdef __SYCL_DEVICE_ONLY__
1621 [[__sycl_detail__::add_ir_attributes_function(
1627 #ifdef __SYCL_DEVICE_ONLY__
1628 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1636 template <
typename KernelName,
typename ElementType,
typename KernelType,
1638 #ifdef __SYCL_DEVICE_ONLY__
1639 [[__sycl_detail__::add_ir_attributes_function(
1645 kernel_handler KH) {
1646 #ifdef __SYCL_DEVICE_ONLY__
1647 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1654 template <
typename... Props>
struct KernelPropertiesUnpackerImpl {
1661 template <
typename... TypesToForward,
typename... ArgsTy>
1662 static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
1663 h->kernel_single_task<TypesToForward..., Props...>(Args...);
1666 template <
typename... TypesToForward,
typename... ArgsTy>
1667 static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
1668 h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
1671 template <
typename... TypesToForward,
typename... ArgsTy>
1672 static void kernel_parallel_for_work_group_unpack(handler *h,
1674 h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
1678 template <
typename PropertiesT>
1679 struct KernelPropertiesUnpacker :
public KernelPropertiesUnpackerImpl<> {
1683 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1684 "Template type is not a property list.");
1687 template <
typename... Props>
1688 struct KernelPropertiesUnpacker<
1690 :
public KernelPropertiesUnpackerImpl<Props...> {};
1704 template <
typename KernelName,
typename KernelType,
typename PropertiesT,
1705 bool HasKernelHandlerArg,
typename FuncTy>
1707 #ifdef __SYCL_DEVICE_ONLY__
1708 detail::CheckDeviceCopyable<KernelType>();
1710 using MergedPropertiesT =
1711 typename detail::GetMergedKernelProperties<KernelType,
1713 using Unpacker = KernelPropertiesUnpacker<MergedPropertiesT>;
1714 #ifndef __SYCL_DEVICE_ONLY__
1716 if constexpr (ext::oneapi::experimental::detail::
1717 HasKernelPropertiesGetMethod<
1719 processProperties<KernelName>(
1720 KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
1723 if constexpr (HasKernelHandlerArg) {
1725 Lambda(Unpacker{},
this, KernelFunc, KH);
1727 Lambda(Unpacker{},
this, KernelFunc);
1735 typename KernelName,
typename KernelType,
1738 unpack<KernelName, KernelType, PropertiesT,
1740 KernelFunc, [&](
auto Unpacker,
auto... args) {
1741 Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
1747 typename KernelName,
typename ElementType,
typename KernelType,
1750 unpack<KernelName, KernelType, PropertiesT,
1751 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1752 ElementType>::value>(
1753 KernelFunc, [&](
auto Unpacker,
auto... args) {
1754 Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
1755 KernelType>(args...);
1760 typename KernelName,
typename ElementType,
typename KernelType,
1762 void kernel_parallel_for_work_group_wrapper(
_KERNELFUNCPARAM(KernelFunc)) {
1763 unpack<KernelName, KernelType, PropertiesT,
1764 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1765 ElementType>::value>(
1766 KernelFunc, [&](
auto Unpacker,
auto... args) {
1767 Unpacker.template kernel_parallel_for_work_group_unpack<
1768 KernelName, ElementType, KernelType>(args...);
1780 typename KernelName,
typename KernelType,
1782 void single_task_lambda_impl(PropertiesT Props,
1785 throwIfActionIsCreated();
1786 throwOnLocalAccessorMisuse<KernelName, KernelType>();
1792 kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(KernelFunc);
1793 #ifndef __SYCL_DEVICE_ONLY__
1796 setNDRangeDescriptor(range<1>{1});
1797 processProperties<NameT, PropertiesT>(Props);
1798 StoreLambda<NameT, KernelType, 1,
void>(KernelFunc);
1803 void setStateExplicitKernelBundle();
1804 void setStateSpecConstSet();
1805 bool isStateExplicitKernelBundle()
const;
1807 std::shared_ptr<detail::kernel_bundle_impl>
1808 getOrInsertHandlerKernelBundle(
bool Insert)
const;
1810 void setHandlerKernelBundle(kernel Kernel);
1812 void setHandlerKernelBundle(
1813 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr);
1815 void SetHostTask(std::function<
void()> &&Func);
1816 void SetHostTask(std::function<
void(interop_handle)> &&Func);
1818 template <
typename FuncT>
1819 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
1821 detail::check_fn_signature<std::remove_reference_t<FuncT>,
1822 void(interop_handle)>::value>
1823 host_task_impl(FuncT &&Func) {
1824 throwIfActionIsCreated();
1828 setArgsToAssociatedAccessors();
1830 SetHostTask(std::move(Func));
1833 template <
typename FuncT>
1834 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
1835 void(interop_handle)>::value>
1836 ext_codeplay_enqueue_native_command_impl(FuncT &&Func) {
1837 throwIfActionIsCreated();
1841 setArgsToAssociatedAccessors();
1843 SetHostTask(std::move(Func));
1850 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1851 getCommandGraph()
const;
1867 template <auto &SpecName>
1869 typename std::remove_reference_t<decltype(SpecName)>::
value_type Value) {
1871 setStateSpecConstSet();
1874 getOrInsertHandlerKernelBundle(
true);
1876 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1878 .set_specialization_constant<SpecName>(Value);
1881 template <auto &SpecName>
1882 typename std::remove_reference_t<decltype(SpecName)>
::value_type
1885 if (isStateExplicitKernelBundle())
1887 "Specialization constants cannot be read after "
1888 "explicitly setting the used kernel bundle");
1891 getOrInsertHandlerKernelBundle(
true);
1893 return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1895 .get_specialization_constant<SpecName>();
1909 template <
typename DataT,
int Dims,
access::mode AccMode,
1912 if (Acc.is_placeholder())
1925 template <
typename DataT,
int Dims,
access::mode AccMode,
1931 AccT Acc = *
static_cast<AccT *
>(
1933 if (Acc.is_placeholder())
1940 void depends_on(
event Event);
1945 void depends_on(
const std::vector<event> &Events);
1947 template <
typename T>
1950 template <
typename U,
typename T>
1954 static constexpr
bool value =
1955 std::is_trivially_copyable_v<std::remove_reference_t<T>>
1956 #if SYCL_LANGUAGE_VERSION && SYCL_LANGUAGE_VERSION <= 201707
1957 && std::is_standard_layout<std::remove_reference_t<T>>::value
1961 std::is_pointer_v<remove_cv_ref_t<T>>)
1971 template <
typename T>
1972 typename std::enable_if_t<ShouldEnableSetArg<T>::value,
void>
1974 setArgHelper(ArgIndex, std::move(Arg));
1982 setArgHelper(ArgIndex, std::move(Arg));
1985 template <
typename DataT,
int Dims>
1987 setArgHelper(ArgIndex, std::move(Arg));
1991 template <
typename T>
1994 setArgHelper(argIndex, dynamicParam);
1999 setArgHelper(argIndex, std::move(Arg));
2007 template <
typename... Ts>
void set_args(Ts &&...Args) {
2008 setArgsHelper(0, std::move(Args)...);
2018 template <
typename KernelName = detail::auto_name,
typename KernelType>
2020 single_task_lambda_impl<KernelName>(
2024 template <
typename KernelName = detail::auto_name,
typename KernelType>
2026 parallel_for_lambda_impl<KernelName>(
2028 std::move(KernelFunc));
2031 template <
typename KernelName = detail::auto_name,
typename KernelType>
2033 parallel_for_lambda_impl<KernelName>(
2035 std::move(KernelFunc));
2038 template <
typename KernelName = detail::auto_name,
typename KernelType>
2040 parallel_for_lambda_impl<KernelName>(
2042 std::move(KernelFunc));
2046 template <
typename FuncT>
2047 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
2052 host_task_impl(Func);
2056 template <
typename FuncT>
2057 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
2060 throwIfGraphAssociated<
2061 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
2062 sycl_ext_codeplay_enqueue_native_command>();
2063 ext_codeplay_enqueue_native_command_impl(Func);
2084 throwIfActionIsCreated();
2088 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2089 using TransformedArgType = std::conditional_t<
2090 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
2091 typename TransformUserItemType<Dims, LambdaArgType>::type>;
2093 (void)WorkItemOffset;
2094 kernel_parallel_for_wrapper<NameT, TransformedArgType>(KernelFunc);
2095 #ifndef __SYCL_DEVICE_ONLY__
2096 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2097 setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
2098 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
2099 std::move(KernelFunc));
2101 setNDRangeUsed(
false);
2119 parallel_for_work_group_lambda_impl<KernelName>(
2141 parallel_for_work_group_lambda_impl<KernelName>(
2153 throwIfActionIsCreated();
2155 setHandlerKernelBundle(Kernel);
2161 extractArgsAndReqs();
2162 MKernelName = getKernelName();
2166 parallel_for_impl(NumWorkItems, Kernel);
2170 parallel_for_impl(NumWorkItems, Kernel);
2174 parallel_for_impl(NumWorkItems, Kernel);
2189 throwIfActionIsCreated();
2191 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2192 setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
2194 setNDRangeUsed(
false);
2195 extractArgsAndReqs();
2196 MKernelName = getKernelName();
2208 throwIfActionIsCreated();
2210 detail::checkValueRange<Dims>(NDRange);
2211 setNDRangeDescriptor(std::move(NDRange));
2213 setNDRangeUsed(
true);
2214 extractArgsAndReqs();
2215 MKernelName = getKernelName();
2224 template <
typename KernelName = detail::auto_name,
typename KernelType>
2226 throwIfActionIsCreated();
2228 setHandlerKernelBundle(Kernel);
2233 kernel_single_task<NameT>(KernelFunc);
2234 #ifndef __SYCL_DEVICE_ONLY__
2240 if (!lambdaAndKernelHaveEqualName<NameT>()) {
2241 extractArgsAndReqs();
2242 MKernelName = getKernelName();
2244 StoreLambda<NameT, KernelType, 1,
void>(std::move(KernelFunc));
2246 detail::CheckDeviceCopyable<KernelType>();
2261 throwIfActionIsCreated();
2263 setHandlerKernelBundle(Kernel);
2267 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2270 kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2271 #ifndef __SYCL_DEVICE_ONLY__
2272 detail::checkValueRange<Dims>(NumWorkItems);
2273 setNDRangeDescriptor(std::move(NumWorkItems));
2276 setNDRangeUsed(
false);
2277 if (!lambdaAndKernelHaveEqualName<NameT>()) {
2278 extractArgsAndReqs();
2279 MKernelName = getKernelName();
2281 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2282 std::move(KernelFunc));
2300 throwIfActionIsCreated();
2302 setHandlerKernelBundle(Kernel);
2306 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2309 (void)WorkItemOffset;
2310 kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2311 #ifndef __SYCL_DEVICE_ONLY__
2312 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2313 setNDRangeDescriptor(std::move(NumWorkItems), std::move(WorkItemOffset));
2316 setNDRangeUsed(
false);
2317 if (!lambdaAndKernelHaveEqualName<NameT>()) {
2318 extractArgsAndReqs();
2319 MKernelName = getKernelName();
2321 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2322 std::move(KernelFunc));
2339 throwIfActionIsCreated();
2341 setHandlerKernelBundle(Kernel);
2345 using LambdaArgType =
2346 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
2349 kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2350 #ifndef __SYCL_DEVICE_ONLY__
2351 detail::checkValueRange<Dims>(NDRange);
2352 setNDRangeDescriptor(std::move(NDRange));
2355 setNDRangeUsed(
true);
2356 if (!lambdaAndKernelHaveEqualName<NameT>()) {
2357 extractArgsAndReqs();
2358 MKernelName = getKernelName();
2360 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2361 std::move(KernelFunc));
2382 throwIfActionIsCreated();
2384 setHandlerKernelBundle(Kernel);
2388 using LambdaArgType =
2389 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2391 (void)NumWorkGroups;
2392 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
2393 #ifndef __SYCL_DEVICE_ONLY__
2394 detail::checkValueRange<Dims>(NumWorkGroups);
2395 setNDRangeDescriptor(NumWorkGroups,
true);
2397 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
2422 throwIfActionIsCreated();
2424 setHandlerKernelBundle(Kernel);
2428 using LambdaArgType =
2429 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2431 (void)NumWorkGroups;
2433 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
2434 #ifndef __SYCL_DEVICE_ONLY__
2437 detail::checkValueRange<Dims>(ExecRange);
2438 setNDRangeDescriptor(std::move(ExecRange));
2440 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
2446 typename PropertiesT>
2450 single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
2455 typename PropertiesT>
2460 parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
2461 NumWorkItems, Props, std::move(KernelFunc));
2465 typename PropertiesT>
2470 parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
2471 NumWorkItems, Props, std::move(KernelFunc));
2475 typename PropertiesT>
2480 parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
2481 NumWorkItems, Props, std::move(KernelFunc));
2485 typename PropertiesT,
int Dims>
2490 parallel_for_impl<KernelName>(Range, Properties, std::move(KernelFunc));
2498 (
sizeof...(RestT) > 1) &&
2502 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2503 UnsupportedGraphFeatures::sycl_reductions>();
2504 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2505 std::forward<RestT>(Rest)...);
2511 (
sizeof...(RestT) > 1) &&
2515 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2516 UnsupportedGraphFeatures::sycl_reductions>();
2517 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2518 std::forward<RestT>(Rest)...);
2524 (
sizeof...(RestT) > 1) &&
2528 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2529 UnsupportedGraphFeatures::sycl_reductions>();
2530 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2531 std::forward<RestT>(Rest)...);
2537 parallel_for<KernelName>(Range,
2539 std::forward<RestT>(Rest)...);
2545 parallel_for<KernelName>(Range,
2547 std::forward<RestT>(Rest)...);
2553 parallel_for<KernelName>(Range,
2555 std::forward<RestT>(Rest)...);
2559 typename PropertiesT,
typename... RestT>
2561 (
sizeof...(RestT) > 1) &&
2565 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2566 UnsupportedGraphFeatures::sycl_reductions>();
2567 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2568 std::forward<RestT>(Rest)...);
2575 parallel_for<KernelName>(Range,
2577 std::forward<RestT>(Rest)...);
2583 int Dims,
typename PropertiesT>
2586 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2587 PropertiesT>(NumWorkGroups, Props,
2592 int Dims,
typename PropertiesT>
2596 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2602 #undef _KERNELFUNCPARAM
2617 std::shared_ptr<T_Dst> Dst) {
2618 if (Src.is_placeholder())
2619 checkIfPlaceholderIsBoundToHandler(Src);
2621 throwIfActionIsCreated();
2622 static_assert(isValidTargetForExplicitOp(AccessTarget),
2623 "Invalid accessor target for the copy method.");
2624 static_assert(isValidModeForSourceAccessor(
AccessMode),
2625 "Invalid accessor mode for the copy method.");
2628 addLifetimeSharedPtrStorage(Dst);
2629 typename std::shared_ptr<T_Dst>::element_type *RawDstPtr = Dst.get();
2630 copy(Src, RawDstPtr);
2646 if (Dst.is_placeholder())
2647 checkIfPlaceholderIsBoundToHandler(Dst);
2649 throwIfActionIsCreated();
2650 static_assert(isValidTargetForExplicitOp(AccessTarget),
2651 "Invalid accessor target for the copy method.");
2652 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2653 "Invalid accessor mode for the copy method.");
2658 addLifetimeSharedPtrStorage(Src);
2659 typename std::shared_ptr<T_Src>::element_type *RawSrcPtr = Src.get();
2660 copy(RawSrcPtr, Dst);
2675 if (Src.is_placeholder())
2676 checkIfPlaceholderIsBoundToHandler(Src);
2678 throwIfActionIsCreated();
2679 static_assert(isValidTargetForExplicitOp(AccessTarget),
2680 "Invalid accessor target for the copy method.");
2681 static_assert(isValidModeForSourceAccessor(
AccessMode),
2682 "Invalid accessor mode for the copy method.");
2688 MSrcPtr =
static_cast<void *
>(AccImpl.get());
2689 MDstPtr =
static_cast<void *
>(Dst);
2692 addAccessorReq(std::move(AccImpl));
2708 if (Dst.is_placeholder())
2709 checkIfPlaceholderIsBoundToHandler(Dst);
2711 throwIfActionIsCreated();
2712 static_assert(isValidTargetForExplicitOp(AccessTarget),
2713 "Invalid accessor target for the copy method.");
2714 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2715 "Invalid accessor mode for the copy method.");
2724 MSrcPtr =
const_cast<T_Src *
>(Src);
2725 MDstPtr =
static_cast<void *
>(AccImpl.get());
2728 addAccessorReq(std::move(AccImpl));
2739 typename T_Src,
int Dims_Src,
access::mode AccessMode_Src,
2747 accessor<T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst,
2750 if (Src.is_placeholder())
2751 checkIfPlaceholderIsBoundToHandler(Src);
2752 if (Dst.is_placeholder())
2753 checkIfPlaceholderIsBoundToHandler(Dst);
2755 throwIfActionIsCreated();
2756 static_assert(isValidTargetForExplicitOp(AccessTarget_Src),
2757 "Invalid source accessor target for the copy method.");
2758 static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
2759 "Invalid destination accessor target for the copy method.");
2760 static_assert(isValidModeForSourceAccessor(AccessMode_Src),
2761 "Invalid source accessor mode for the copy method.");
2762 static_assert(isValidModeForDestinationAccessor(AccessMode_Dst),
2763 "Invalid destination accessor mode for the copy method.");
2764 if (Dst.get_size() < Src.get_size())
2766 "The destination accessor size is too small to "
2767 "copy the memory into.");
2769 if (copyAccToAccHelper(Src, Dst))
2779 MSrcPtr = AccImplSrc.get();
2780 MDstPtr = AccImplDst.get();
2783 addAccessorReq(std::move(AccImplSrc));
2784 addAccessorReq(std::move(AccImplDst));
2796 if (Acc.is_placeholder())
2797 checkIfPlaceholderIsBoundToHandler(Acc);
2799 throwIfActionIsCreated();
2800 static_assert(isValidTargetForExplicitOp(AccessTarget),
2801 "Invalid accessor target for the update_host method.");
2807 MDstPtr =
static_cast<void *
>(AccImpl.get());
2808 addAccessorReq(std::move(AccImpl));
2828 if (Dst.is_placeholder())
2829 checkIfPlaceholderIsBoundToHandler(Dst);
2831 throwIfActionIsCreated();
2834 static_assert(isValidTargetForExplicitOp(AccessTarget),
2835 "Invalid accessor target for the fill method.");
2839 if constexpr (isBackendSupportedFillSize(
sizeof(T)) &&
2840 ((Dims <= 1) || isImageOrImageArray(AccessTarget))) {
2841 StageFillCG(Dst, Pattern);
2842 }
else if constexpr (Dims == 0) {
2844 parallel_for<__fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2852 if (OffsetUsable && RangesUsable &&
2853 isBackendSupportedFillSize(
sizeof(T))) {
2854 StageFillCG(Dst, Pattern);
2857 parallel_for<__fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2858 Range, [=](
id<Dims> Index) { Dst[Index] = Pattern; });
2869 template <
typename T>
void fill(
void *Ptr,
const T &Pattern,
size_t Count) {
2870 throwIfActionIsCreated();
2873 "Pattern must be device copyable");
2875 parallel_for<__usmfill<T>>(
range<1>(Count), [=](
id<1> Index) {
2876 T *CastedPtr =
static_cast<T *
>(Ptr);
2877 CastedPtr[Index] = Pattern;
2880 this->fill_impl(Ptr, &Pattern,
sizeof(T), Count);
2888 throwIfActionIsCreated();
2898 void ext_oneapi_barrier(
const std::vector<event> &WaitList);
2910 void memcpy(
void *Dest,
const void *Src,
size_t Count);
2922 template <
typename T>
void copy(
const T *Src, T *Dest,
size_t Count) {
2923 this->memcpy(Dest, Src, Count *
sizeof(T));
2933 void memset(
void *Dest,
int Value,
size_t Count);
2941 void prefetch(
const void *Ptr,
size_t Count);
2949 void mem_advise(
const void *Ptr,
size_t Length,
int Advice);
2967 template <
typename T =
unsigned char,
2968 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2969 void ext_oneapi_memcpy2d(
void *Dest,
size_t DestPitch,
const void *Src,
2970 size_t SrcPitch,
size_t Width,
size_t Height);
2985 template <
typename T>
2986 void ext_oneapi_copy2d(
const T *Src,
size_t SrcPitch, T *Dest,
2987 size_t DestPitch,
size_t Width,
size_t Height);
3004 template <
typename T =
unsigned char,
3005 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
3006 void ext_oneapi_memset2d(
void *Dest,
size_t DestPitch,
int Value,
3007 size_t Width,
size_t Height);
3021 template <
typename T>
3022 void ext_oneapi_fill2d(
void *Dest,
size_t DestPitch,
const T &Pattern,
3023 size_t Width,
size_t Height);
3033 template <
typename T,
typename PropertyListT>
3035 const void *Src,
size_t NumBytes =
sizeof(T),
3036 size_t DestOffset = 0) {
3037 throwIfGraphAssociated<
3038 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
3039 sycl_ext_oneapi_device_global>();
3040 if (
sizeof(T) < DestOffset + NumBytes)
3042 "Copy to device_global is out of bounds.");
3044 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
3050 memcpyToHostOnlyDeviceGlobal(&Dest, Src,
sizeof(T), IsDeviceImageScoped,
3051 NumBytes, DestOffset);
3055 memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes, DestOffset);
3066 template <
typename T,
typename PropertyListT>
3070 size_t NumBytes =
sizeof(T),
size_t SrcOffset = 0) {
3071 throwIfGraphAssociated<
3072 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
3073 sycl_ext_oneapi_device_global>();
3074 if (
sizeof(T) < SrcOffset + NumBytes)
3076 "Copy from device_global is out of bounds.");
3078 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
3084 memcpyFromHostOnlyDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
3089 memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
3102 template <
typename T,
typename PropertyListT>
3103 void copy(
const std::remove_all_extents_t<T> *Src,
3105 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
3106 size_t StartIndex = 0) {
3107 this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
3108 StartIndex *
sizeof(std::remove_all_extents_t<T>));
3121 template <
typename T,
typename PropertyListT>
3124 std::remove_all_extents_t<T> *Dest,
3125 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
3126 size_t StartIndex = 0) {
3127 this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
3128 StartIndex *
sizeof(std::remove_all_extents_t<T>));
3145 void ext_oneapi_copy(
3169 void ext_oneapi_copy(
3185 void ext_oneapi_copy(
3227 void ext_oneapi_copy(
3228 const void *Src,
void *Dest,
3230 size_t DeviceRowPitch);
3266 void ext_oneapi_copy(
3279 void ext_oneapi_wait_external_semaphore(
3290 void ext_oneapi_wait_external_semaphore(
3292 uint64_t WaitValue);
3300 void ext_oneapi_signal_external_semaphore(
3312 void ext_oneapi_signal_external_semaphore(
3314 uint64_t SignalValue);
3317 std::shared_ptr<detail::handler_impl> impl;
3318 std::shared_ptr<detail::queue_impl> MQueue;
3320 std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
3321 std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
3324 std::shared_ptr<detail::kernel_impl> MKernel;
3326 void *MSrcPtr =
nullptr;
3328 void *MDstPtr =
nullptr;
3332 std::vector<unsigned char> MPattern;
3334 std::unique_ptr<detail::HostKernelBase> MHostKernel;
3337 bool MIsFinalized =
false;
3343 template <
typename DataT,
int Dims,
access::mode AccMode,
3345 typename PropertyListT>
3353 friend class stream;
3357 template <
typename T,
class BinaryOperation,
int Dims,
size_t Extent,
3358 bool ExplicitIdentity,
typename RedOutVar>
3362 template <
class FunctorTy>
3366 typename PropertiesT,
typename... RestT>
3368 PropertiesT Properties,
3372 typename PropertiesT,
typename... RestT>
3375 PropertiesT Properties, RestT... Rest);
3377 #ifndef __SYCL_DEVICE_ONLY__
3387 friend class ::MockHandler;
3392 template <
class _name,
class _dataT, int32_t _min_capacity,
3393 class _propertiesT,
class>
3396 template <
class Obj>
3397 friend const decltype(Obj::impl) &
3406 void ext_intel_read_host_pipe(
const std::string &Name,
void *Ptr,
size_t Size,
3407 bool Block =
false) {
3411 size_t Size,
bool Block =
false);
3419 void ext_intel_write_host_pipe(
const std::string &Name,
void *Ptr,
3420 size_t Size,
bool Block =
false) {
3423 void ext_intel_write_host_pipe(detail::string_view Name,
void *Ptr,
3424 size_t Size,
bool Block =
false);
3428 bool DisableRangeRounding();
3430 bool RangeRoundingTrace();
3432 void GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
3435 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
3436 typename KernelType,
3438 KernelType, TransformedArgType>::value> * =
nullptr>
3439 auto getRangeRoundedKernelLambda(KernelType KernelFunc,
3442 KernelType>{UserRange, KernelFunc};
3445 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
3446 typename KernelType,
3447 std::enable_if_t<!detail::KernelLambdaHasKernelHandlerArgT<
3448 KernelType, TransformedArgType>::value> * =
nullptr>
3449 auto getRangeRoundedKernelLambda(KernelType KernelFunc,
3451 return detail::RoundedRangeKernel<TransformedArgType, Dims, KernelType>{
3452 UserRange, KernelFunc};
3455 const std::shared_ptr<detail::context_impl> &getContextImplPtr()
const;
3458 bool supportsUSMMemcpy2D();
3459 bool supportsUSMFill2D();
3460 bool supportsUSMMemset2D();
3463 id<2> computeFallbackKernelBounds(
size_t Width,
size_t Height);
3467 backend getDeviceBackend()
const;
3471 template <
typename T>
3472 void commonUSMCopy2DFallbackKernel(
const void *Src,
size_t SrcPitch,
3473 void *Dest,
size_t DestPitch,
size_t Width,
3478 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
3479 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
3480 parallel_for<__usmmemcpy2d<T>>(
3481 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
3482 T *CastedDest =
static_cast<T *
>(Dest);
3483 const T *CastedSrc =
static_cast<const T *
>(Src);
3484 for (uint32_t I = 0; I < Iterations[0]; ++I) {
3485 for (uint32_t J = 0; J < Iterations[1]; ++J) {
3486 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
3487 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
3488 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
3489 CastedSrc[adjustedIndex[0] * SrcPitch + adjustedIndex[1]];
3498 template <
typename T>
3499 void commonUSMCopy2DFallbackHostTask(
const void *Src,
size_t SrcPitch,
3500 void *Dest,
size_t DestPitch,
3501 size_t Width,
size_t Height) {
3505 const T *CastedSrc =
static_cast<const T *
>(Src);
3506 T *CastedDest =
static_cast<T *
>(Dest);
3507 for (
size_t I = 0; I < Height; ++I) {
3508 const T *SrcItBegin = CastedSrc + SrcPitch * I;
3509 T *DestItBegin = CastedDest + DestPitch * I;
3510 std::copy(SrcItBegin, SrcItBegin + Width, DestItBegin);
3519 typename PropertyListT = property_list>
3521 accessor<T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>
3525 detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst;
3528 MDstPtr =
static_cast<void *
>(AccImpl.get());
3529 addAccessorReq(std::move(AccImpl));
3531 MPattern.resize(
sizeof(T));
3532 auto PatternPtr =
reinterpret_cast<T *
>(MPattern.data());
3533 *PatternPtr = Pattern;
3538 template <
typename T>
3539 void commonUSMFill2DFallbackKernel(
void *Dest,
size_t DestPitch,
3540 const T &Pattern,
size_t Width,
3545 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
3546 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
3547 parallel_for<__usmfill2d<T>>(
3548 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
3549 T *CastedDest =
static_cast<T *
>(Dest);
3550 for (uint32_t I = 0; I < Iterations[0]; ++I) {
3551 for (uint32_t J = 0; J < Iterations[1]; ++J) {
3552 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
3553 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
3554 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
3564 template <
typename T>
3565 void commonUSMFill2DFallbackHostTask(
void *Dest,
size_t DestPitch,
3566 const T &Pattern,
size_t Width,
3571 T *CastedDest =
static_cast<T *
>(Dest);
3572 for (
size_t I = 0; I < Height; ++I) {
3573 T *ItBegin = CastedDest + DestPitch * I;
3574 std::fill(ItBegin, ItBegin + Width, Pattern);
3580 void fill_impl(
void *Dest,
const void *Value,
size_t ValueSize,
size_t Count);
3583 void ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
const void *Src,
3584 size_t SrcPitch,
size_t Width,
size_t Height);
3587 void ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
const void *Value,
3588 size_t ValueSize,
size_t Width,
size_t Height);
3591 void ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
3592 size_t Width,
size_t Height);
3595 void memcpyToDeviceGlobal(
const void *DeviceGlobalPtr,
const void *Src,
3596 bool IsDeviceImageScoped,
size_t NumBytes,
3600 void memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
3601 bool IsDeviceImageScoped,
size_t NumBytes,
3605 void memcpyToHostOnlyDeviceGlobal(
const void *DeviceGlobalPtr,
3606 const void *Src,
size_t DeviceGlobalTSize,
3607 bool IsDeviceImageScoped,
size_t NumBytes,
3611 void memcpyFromHostOnlyDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
3612 bool IsDeviceImageScoped,
size_t NumBytes,
3618 typename PropertyListT = property_list>
3619 void checkIfPlaceholderIsBoundToHandler(
3620 accessor<T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>
3622 auto *AccBase =
reinterpret_cast<detail::AccessorBaseHost *
>(&Acc);
3624 if (HasAssociatedAccessor(Req, AccessTarget))
3626 "placeholder accessor must be bound by calling "
3627 "handler::require() before it can be used.");
3631 enum class StableKernelCacheConfig : int32_t {
3638 void setKernelCacheConfig(StableKernelCacheConfig);
3640 void setKernelIsCooperative(
bool);
3643 void setKernelClusterLaunch(
sycl::range<3> ClusterSize,
int Dims);
3647 void throwIfGraphAssociated()
const {
3649 if (getCommandGraph()) {
3650 std::string FeatureString =
3654 "The " + FeatureString +
3655 " feature is not yet available "
3656 "for use with the SYCL Graph extension.");
3661 void setNDRangeUsed(
bool Value);
3663 inline void internalProfilingTagImpl() {
3664 throwIfActionIsCreated();
3670 void addLifetimeSharedPtrStorage(std::shared_ptr<const void> SPtr);
3675 void setArgsToAssociatedAccessors();
3677 bool HasAssociatedAccessor(detail::AccessorImplHost *Req,
3681 if constexpr (Dims == 3) {
3685 for (
int I = 0; I < Dims; ++I)
3692 if constexpr (Dims == 3) {
3696 for (
int I = 0; I < Dims; ++I)
3704 bool SetNumWorkGroups =
false) {
3705 return setNDRangeDescriptorPadded(padRange(N), SetNumWorkGroups, Dims);
3710 return setNDRangeDescriptorPadded(padRange(NumWorkItems), padId(Offset),
3715 return setNDRangeDescriptorPadded(
3721 void setNDRangeDescriptorPadded(
sycl::range<3> N,
bool SetNumWorkGroups,
3735 void depends_on(
const std::vector<detail::EventImplPtr> &Events);
3742 Handler.internalProfilingTagImpl();
The file contains implementations of accessor class.
Defines a shared array that can be used by kernels in queues.
range< 3 > & getMemoryRange()
range< 3 > & getAccessRange()
static void internalProfilingTagImpl(handler &Handler)
RoundedRangeIDGenerator(const id< Dims > &Id, const range< Dims > &UserRange, const range< Dims > &RoundedRange)
void operator()(item< Dims > It, kernel_handler KH) const
void operator()(item< Dims > It) const
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
Implementation details of command_graph<modifiable>.
Command group handler class.
void fill(void *Ptr, const T &Pattern, size_t Count)
Fills the specified memory with the specified pattern.
void parallel_for(range< 2 > NumWorkItems, kernel Kernel)
void parallel_for(kernel Kernel, range< Dims > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range.
void single_task(_KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function as a function object type.
void copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0)
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
void parallel_for(nd_range< Dims > NDRange, kernel Kernel)
Defines and invokes a SYCL kernel function for the specified range and offsets.
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 2 > Range, PropertiesT Properties, RestT &&...Rest)
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
void copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0)
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
void parallel_for(range< 3 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
handler & operator=(handler &&)=delete
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 3 > Range, PropertiesT Properties, RestT &&...Rest)
handler(handler &&)=delete
void copy(accessor< T_Src, Dims_Src, AccessMode_Src, AccessTarget_Src, IsPlaceholder_Src > Src, accessor< T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst, IsPlaceholder_Dst > Dst)
Copies the content of memory object accessed by Src to the memory object accessed by Dst.
void copy(const T *Src, T *Dest, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
void parallel_for(range< 3 > NumWorkItems, kernel Kernel)
void parallel_for(range< 1 > NumWorkItems, kernel Kernel)
void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function.
handler & operator=(const handler &)=delete
void parallel_for(kernel Kernel, nd_range< Dims > NDRange, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range and offsets.
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
void set_arg(int argIndex, ext::oneapi::experimental::dynamic_parameter< T > &dynamicParam)
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 2 > NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
std::is_same< remove_cv_ref_t< U >, remove_cv_ref_t< T > > is_same_type
std::enable_if_t< ShouldEnableSetArg< T >::value, void > set_arg(int ArgIndex, T &&Arg)
Sets argument for OpenCL interoperability kernels.
void parallel_for_work_group(range< Dims > NumWorkGroups, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
}@
void require(ext::oneapi::experimental::dynamic_parameter< accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder >> dynamicParamAcc)
Requires access to the memory object associated with the placeholder accessor contained in a dynamic_...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
void single_task(kernel Kernel)
Invokes a SYCL kernel.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
void copy(const T_Src *Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 3 > NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > ext_codeplay_enqueue_native_command(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func immediately.
void copy(std::shared_ptr< T_Src > Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
handler(const handler &)=delete
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, T_Dst *Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
typename std::remove_cv_t< std::remove_reference_t< T > > remove_cv_ref_t
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc))
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< 1 > Range, RestT &&...Rest)
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< 2 > Range, RestT &&...Rest)
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< 3 > Range, RestT &&...Rest)
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 1 > NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
void memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t SrcOffset=0)
Copies data from a device_global to USM memory.
void parallel_for_work_group(range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
void set_arg(int argIndex, ext::oneapi::experimental::raw_kernel_arg &&Arg)
void set_specialization_constant(typename std::remove_reference_t< decltype(SpecName)>::value_type Value)
void set_args(Ts &&...Args)
Sets arguments for OpenCL interoperability kernels.
void memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t DestOffset=0)
Copies data from a USM memory region to a device_global.
void parallel_for(range< 2 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
void set_arg(int ArgIndex, accessor< DataT, Dims, AccessMode, AccessTarget, IsPlaceholder > Arg)
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 1 > Range, PropertiesT Properties, RestT &&...Rest)
Reductions.
std::remove_reference_t< decltype(SpecName)>::value_type get_specialization_constant() const
void set_arg(int ArgIndex, local_accessor< DataT, Dims > Arg)
A unique identifier of an item in an index space.
Identifies an instance of the function object executing at each point in a range.
id< Dimensions > get_id() const
range< Dimensions > get_range() const
Provides an abstraction of a SYCL kernel.
Identifies an instance of the function object executing at each point in an nd_range.
id< Dimensions > get_global_id() const
id< Dimensions > get_offset() const
range< Dimensions > get_global_range() const
Defines the iteration domain of both the work-groups and the overall dispatch.
range< Dimensions > get_global_range() const
range< Dimensions > get_local_range() const
id< Dimensions > get_offset() const
Objects of the property_list class are containers for the SYCL properties.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
#define _KERNELFUNCPARAMTYPE
#define __SYCL_KERNEL_ATTR__
#define _KERNELFUNCPARAM(a)
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
void withAuxHandler(handler &CGH, FunctorTy Func)
void finalizeHandler(handler &CGH)
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
void * getValueFromDynamicParameter(ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase)
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
decltype(member_ptr_helper(&F::operator())) argument_helper(int)
id< 1 > getDelinearizedId(const range< 1 > &, size_t Index)
static Arg member_ptr_helper(RetType(Func::*)(Arg) const)
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
static std::enable_if_t< std::is_unsigned_v< T >, bool > multiply_with_overflow_check(T &dst, T x, T y)
std::enable_if_t< std::is_same_v< T, range< Dims > >||std::is_same_v< T, id< Dims > > > checkValueRange(const T &V)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
std::shared_ptr< event_impl > EventImplPtr
void markBufferAsInternal(const std::shared_ptr< buffer_impl > &BufImpl)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
std::shared_ptr< LocalAccessorImplHost > LocalAccessorImplPtr
CGType
Type of the command group.
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
bool range_size_fits_in_size_t(const range< Dims > &r)
void associateWithHandler(handler &, AccessorBaseHost *, access::target)
void reduction_parallel_for(handler &CGH, range< Dims > NDRange, PropertiesT Properties, RestT... Rest)
decltype(argument_helper< F, SuggestedArgType >(0)) lambda_arg_type
std::shared_ptr< AccessorImplHost > AccessorImplPtr
constexpr cache_config_enum large_slm
constexpr cache_config_enum large_data
const char * UnsupportedFeatureToString(UnsupportedGraphFeatures Feature)
typename merged_properties< LHSPropertiesT, RHSPropertiesT >::type merged_properties_t
properties< std::tuple< PropertyValueTs... > > properties_t
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
@ executable
In executable state, the graph is ready to execute.
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
static constexpr bool has_property()
forward_progress_guarantee
static constexpr auto get_property()
properties< std::tuple<> > empty_properties_t
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
void parallel_for(handler &CGH, range< Dimensions > Range, const KernelType &KernelObj, ReductionsT &&...Reductions)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
signed char __SYCL2020_DEPRECATED
ext::intel::pipe< name, dataT, min_capacity > pipe
class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
C++ wrapper of extern "C" PI interfaces.
Predicate returning true if all template type parameters except the last one are reductions.
ext::oneapi::experimental::detail::merged_properties_t< PropertiesT, get_method_properties > type
typename ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< KernelType >::properties_t get_method_properties
static constexpr const char * getName()
constexpr static bool value
A struct to describe the properties of an image.
Opaque image memory handle type.
Opaque interop semaphore handle type.
is_device_copyable is a user specializable class template to indicate that a type T is device copyabl...