61 #include <type_traits>
70 #if !SYCL_LANGUAGE_VERSION || SYCL_LANGUAGE_VERSION < 202001
71 #define __SYCL_NONCONST_FUNCTOR__
76 #ifdef __SYCL_NONCONST_FUNCTOR__
77 #define _KERNELFUNCPARAMTYPE KernelType
79 #define _KERNELFUNCPARAMTYPE const KernelType &
81 #define _KERNELFUNCPARAM(a) _KERNELFUNCPARAMTYPE a
83 #if defined(__SYCL_UNNAMED_LAMBDA__)
94 using __fill = sycl::detail::auto_name;
95 template <
typename T>
using __usmfill = sycl::detail::auto_name;
96 template <
typename T>
using __usmfill2d = sycl::detail::auto_name;
97 template <
typename T>
using __usmmemcpy2d = sycl::detail::auto_name;
99 template <
typename T_Src,
typename T_Dst,
int Dims,
104 template <
typename T_Src,
typename T_Dst,
int Dims,
127 template <
typename T_Src,
typename T_Dst,
int Dims,
132 template <
typename T_Src,
typename T_Dst,
int Dims,
150 inline namespace _V1 {
155 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
158 namespace ext::intel::experimental {
159 template <
class _name,
class _dataT, int32_t _min_capacity,
class _propertiesT,
164 namespace ext::oneapi::experimental::detail {
175 class image_accessor;
176 template <
typename RetType,
typename Func,
typename Arg>
181 template <
typename RetType,
typename Func,
typename Arg>
186 template <
typename RetType,
typename Func,
typename Arg1,
typename Arg2>
191 template <
typename RetType,
typename Func,
typename Arg1,
typename Arg2>
194 template <
typename F,
typename SuggestedArgType>
197 template <typename F, typename SuggestedArgType>
200 template <typename F, typename SuggestedArgType>
220 #if __SYCL_ID_QUERIES_FIT_IN_INT__
221 template <
typename T>
struct NotIntMsg;
223 template <
int Dims>
struct NotIntMsg<
range<Dims>> {
224 constexpr
static const char *Msg =
225 "Provided range is out of integer limits. Pass "
226 "`-fno-sycl-id-queries-fit-in-int' to disable range check.";
229 template <
int Dims>
struct NotIntMsg<
id<Dims>> {
230 constexpr
static const char *Msg =
231 "Provided offset is out of integer limits. Pass "
232 "`-fno-sycl-id-queries-fit-in-int' to disable offset check.";
238 template <
typename KernelType,
typename PropertiesT,
typename Cond =
void>
242 template <
typename KernelType,
typename PropertiesT>
244 KernelType, PropertiesT,
245 std::enable_if_t<ext::oneapi::experimental::detail::
246 HasKernelPropertiesGetMethod<KernelType>::value>> {
252 "get(sycl::ext::oneapi::experimental::properties_tag) member in kernel "
253 "functor class must return a valid property list.");
258 #if __SYCL_ID_QUERIES_FIT_IN_INT__
259 template <
typename T,
typename ValT>
260 typename std::enable_if_t<std::is_same<ValT, size_t>::value ||
261 std::is_same<ValT, unsigned long long>::value>
262 checkValueRangeImpl(ValT V) {
263 static constexpr
size_t Limit =
270 template <
int Dims,
typename T>
271 typename std::enable_if_t<std::is_same_v<T, range<Dims>> ||
272 std::is_same_v<T, id<Dims>>>
274 #if __SYCL_ID_QUERIES_FIT_IN_INT__
275 for (
size_t Dim = 0; Dim < Dims; ++Dim)
276 checkValueRangeImpl<T>(V[Dim]);
279 unsigned long long Product = 1;
280 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
283 checkValueRangeImpl<T>(Product);
293 #if __SYCL_ID_QUERIES_FIT_IN_INT__
294 checkValueRange<Dims>(R);
295 checkValueRange<Dims>(O);
297 for (
size_t Dim = 0; Dim < Dims; ++Dim) {
298 unsigned long long Sum = R[Dim] + O[Dim];
300 checkValueRangeImpl<range<Dims>>(Sum);
308 template <
int Dims,
typename T>
309 typename std::enable_if_t<std::is_same_v<T, nd_range<Dims>>>
311 #if __SYCL_ID_QUERIES_FIT_IN_INT__
312 checkValueRange<Dims>(V.get_global_range());
313 checkValueRange<Dims>(V.get_local_range());
314 checkValueRange<Dims>(V.get_offset());
316 checkValueRange<Dims>(V.get_global_range(), V.get_offset());
332 : Id(Id), InitId(Id), UserRange(UserRange), RoundedRange(RoundedRange) {
333 for (
int i = 0; i < Dims; ++i)
334 if (Id[i] >= UserRange[i])
338 explicit operator bool() {
return !Done; }
341 for (
int i = 0; i < Dims; ++i) {
342 Id[i] += RoundedRange[i];
343 if (Id[i] < UserRange[i])
352 template <
typename KernelType>
auto getItem() {
353 if constexpr (std::is_invocable_v<KernelType,
item<Dims> &> ||
354 std::is_invocable_v<KernelType,
item<Dims> &, kernel_handler>)
355 return detail::Builder::createItem<Dims, true>(UserRange,
getId(), {});
360 "Kernel must be invocable with an item!");
361 return detail::Builder::createItem<Dims, false>(UserRange,
getId());
372 template <
typename TransformedArgType,
int Dims,
typename KernelType>
381 auto item = Gen.template getItem<KernelType>();
387 template <
typename TransformedArgType,
int Dims,
typename KernelType>
396 auto item = Gen.template getItem<KernelType>();
402 using std::enable_if_t;
403 using sycl::detail::queue_impl;
407 template <
typename T>
408 static std::enable_if_t<std::is_unsigned_v<T>,
bool>
416 for (
int i = 0; i < Dims; ++i) {
464 handler(std::shared_ptr<detail::queue_impl> Queue,
bool IsHost);
475 handler(std::shared_ptr<detail::queue_impl> Queue,
476 std::shared_ptr<detail::queue_impl> PrimaryQueue,
477 std::shared_ptr<detail::queue_impl> SecondaryQueue,
bool IsHost);
485 handler(std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph);
488 template <
typename T,
typename F =
typename std::remove_const_t<
489 typename std::remove_reference_t<T>>>
490 F *storePlainArg(T &&Arg) {
491 CGData.MArgsStorage.emplace_back(
sizeof(T));
492 auto Storage =
reinterpret_cast<F *
>(CGData.MArgsStorage.back().data());
501 void throwIfActionIsCreated() {
504 "Attempt to set multiple actions for the "
505 "command group. Command group must consist of "
506 "a single kernel or explicit memory operation.");
509 constexpr
static int AccessTargetMask = 0x7ff;
513 template <
typename KernelName,
typename KernelType>
514 void throwOnLocalAccessorMisuse()
const {
517 using KI = sycl::detail::KernelInfo<NameT>;
519 auto *KernelArgs = &KI::getParamDesc(0);
521 for (
unsigned I = 0; I < KI::getNumParams(); ++I) {
524 static_cast<access::target>(KernelArgs[I].info & AccessTargetMask);
526 (AccTarget == target::local))
529 "A local accessor must not be used in a SYCL kernel function "
530 "that is invoked via single_task or via the simple form of "
531 "parallel_for that takes a range parameter.");
538 extractArgsAndReqsFromLambda(
char *LambdaPtr,
size_t KernelArgsNum,
543 void extractArgsAndReqs();
546 const int Size,
const size_t Index,
size_t &IndexShift,
547 bool IsKernelCreatedFromSource,
bool IsESIMD);
552 template <
typename LambdaNameT>
bool lambdaAndKernelHaveEqualName() {
558 assert(MKernel &&
"MKernel is not initialized");
561 return KernelName == LambdaName;
581 void addStream(
const std::shared_ptr<detail::stream_impl> &Stream) {
582 MStreamStorage.push_back(Stream);
590 void addReduction(
const std::shared_ptr<const void> &ReduObj);
597 template <
typename T,
int Dimensions,
typename AllocatorT,
typename Enable>
602 addReduction(std::shared_ptr<const void>(ReduBuf));
608 bool is_host() {
return MIsHost; }
610 #ifdef __SYCL_DEVICE_ONLY__
629 template <
typename T,
typename... Ts>
630 void setArgsHelper(
int ArgIndex, T &&Arg, Ts &&...Args) {
631 set_arg(ArgIndex, std::move(Arg));
632 setArgsHelper(++ArgIndex, std::move(Args)...);
635 void setArgsHelper(
int) {}
637 void setLocalAccessorArgHelper(
int ArgIndex,
642 MLocalAccStorage.push_back(std::move(LocalAccImpl));
644 static_cast<int>(access::target::local), ArgIndex);
650 void setArgHelper(
int ArgIndex,
655 #ifndef __SYCL_DEVICE_ONLY__
656 setLocalAccessorArgHelper(ArgIndex, Arg);
661 template <
typename DataT,
int Dims>
665 #ifndef __SYCL_DEVICE_ONLY__
666 setLocalAccessorArgHelper(ArgIndex, Arg);
673 typename std::enable_if_t<AccessTarget != access::target::local, void>
681 CGData.MRequirements.push_back(Req);
683 CGData.MAccStorage.push_back(std::move(AccImpl));
686 static_cast<int>(AccessTarget), ArgIndex);
689 template <
typename T>
void setArgHelper(
int ArgIndex, T &&Arg) {
690 auto StoredArg =
static_cast<void *
>(storePlainArg(Arg));
692 if (!std::is_same<cl_mem, T>::value && std::is_pointer<T>::value) {
694 sizeof(T), ArgIndex);
697 StoredArg,
sizeof(T), ArgIndex);
701 void setArgHelper(
int ArgIndex, sampler &&Arg) {
702 auto StoredArg =
static_cast<void *
>(storePlainArg(Arg));
704 sizeof(sampler), ArgIndex);
708 template <
typename T>
710 setArgHelper(
int ArgIndex,
716 setArgHelper(ArgIndex, std::move(ArgValue));
719 registerDynamicParameter(DynamicParam, ArgIndex);
726 void registerDynamicParameter(
732 void verifyKernelInvoc(
const kernel &Kernel) {
733 std::ignore = Kernel;
751 template <
class KernelType,
class NormalizedKernelType,
int Dims>
752 KernelType *ResetHostKernelHelper(
const KernelType &KernelFunc) {
753 NormalizedKernelType NormalizedKernel(KernelFunc);
754 auto NormalizedKernelFunc =
758 std::move(NormalizedKernelFunc));
759 MHostKernel.reset(HostKernelPtr);
760 return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
765 template <
class KernelType,
typename ArgT,
int Dims>
766 std::enable_if_t<std::is_same_v<ArgT, sycl::id<Dims>>, KernelType *>
767 ResetHostKernel(
const KernelType &KernelFunc) {
768 struct NormalizedKernelType {
769 KernelType MKernelFunc;
770 NormalizedKernelType(
const KernelType &KernelFunc)
771 : MKernelFunc(KernelFunc) {}
776 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
781 template <
class KernelType,
typename ArgT,
int Dims>
782 std::enable_if_t<std::is_same_v<ArgT, sycl::nd_item<Dims>>, KernelType *>
783 ResetHostKernel(
const KernelType &KernelFunc) {
784 struct NormalizedKernelType {
785 KernelType MKernelFunc;
786 NormalizedKernelType(
const KernelType &KernelFunc)
787 : MKernelFunc(KernelFunc) {}
792 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
797 template <
class KernelType,
typename ArgT,
int Dims>
798 std::enable_if_t<std::is_same_v<ArgT, sycl::item<Dims, false>>, KernelType *>
799 ResetHostKernel(
const KernelType &KernelFunc) {
800 struct NormalizedKernelType {
801 KernelType MKernelFunc;
802 NormalizedKernelType(
const KernelType &KernelFunc)
803 : MKernelFunc(KernelFunc) {}
810 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
815 template <
class KernelType,
typename ArgT,
int Dims>
816 std::enable_if_t<std::is_same_v<ArgT, sycl::item<Dims, true>>, KernelType *>
817 ResetHostKernel(
const KernelType &KernelFunc) {
818 struct NormalizedKernelType {
819 KernelType MKernelFunc;
820 NormalizedKernelType(
const KernelType &KernelFunc)
821 : MKernelFunc(KernelFunc) {}
828 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
833 template <
class KernelType,
typename ArgT,
int Dims>
834 typename std::enable_if_t<std::is_same_v<ArgT, void>, KernelType *>
835 ResetHostKernel(
const KernelType &KernelFunc) {
836 struct NormalizedKernelType {
837 KernelType MKernelFunc;
838 NormalizedKernelType(
const KernelType &KernelFunc)
839 : MKernelFunc(KernelFunc) {}
845 return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
853 template <
class KernelType,
typename ArgT,
int Dims>
854 std::enable_if_t<std::is_same_v<ArgT, sycl::group<Dims>>, KernelType *>
855 ResetHostKernel(
const KernelType &KernelFunc) {
858 return (KernelType *)(MHostKernel->getPtr());
868 void verifyUsedKernelBundle(
const std::string &KernelName) {
879 template <
typename KernelName,
typename KernelType,
int Dims,
880 typename LambdaArgType>
881 void StoreLambda(KernelType KernelFunc) {
883 constexpr
bool IsCallableWithKernelHandler =
885 LambdaArgType>::value;
887 if (IsCallableWithKernelHandler && MIsHost) {
888 throw sycl::feature_not_supported(
889 "kernel_handler is not yet supported by host device.",
890 PI_ERROR_INVALID_OPERATION);
893 KernelType *KernelPtr =
894 ResetHostKernel<KernelType, LambdaArgType, Dims>(KernelFunc);
896 constexpr
bool KernelHasName =
897 KI::getName() !=
nullptr && KI::getName()[0] !=
'\0';
903 !KernelHasName ||
sizeof(KernelFunc) == KI::getKernelSize(),
904 "Unexpected kernel lambda size. This can be caused by an "
905 "external host compiler producing a lambda with an "
906 "unexpected layout. This is a limitation of the compiler."
907 "In many cases the difference is related to capturing constexpr "
908 "variables. In such cases removing constexpr specifier aligns the "
909 "captures between the host compiler and the device compiler."
911 "In case of MSVC, passing "
912 "-fsycl-host-compiler-options='/std:c++latest' "
920 extractArgsAndReqsFromLambda(
reinterpret_cast<char *
>(KernelPtr),
921 KI::getNumParams(), &KI::getParamDesc(0),
923 MKernelName = KI::getName();
929 MArgs = MAssociatedAccesors;
934 if (IsCallableWithKernelHandler) {
935 getOrInsertHandlerKernelBundle(
true);
945 void processProperties(PropertiesT Props) {
949 "Template type is not a property list.");
956 "Floating point control property is supported for ESIMD kernels only.");
970 constexpr
bool UsesRootSync = PropertiesT::template
has_property<
972 setKernelIsCooperative(UsesRootSync);
979 template <
int Dims_Src,
int Dims_Dst>
982 if (Dims_Src > Dims_Dst)
984 for (
size_t I = 0; I < Dims_Src; ++I)
995 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
999 std::enable_if_t<(DimSrc > 0) && (DimDst > 0),
bool>
1003 IsCopyingRectRegionAvailable(Src.get_range(), Dst.get_range()))
1007 parallel_for<
__copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
1008 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
1009 LinearizedRange, [=](
id<1> Id) {
1010 size_t Index = Id[0];
1013 Dst[DstId] = Src[SrcId];
1025 template <
typename TSrc,
int DimSrc,
access::mode ModeSrc,
1029 std::enable_if_t<DimSrc == 0 || DimDst == 0, bool>
1035 single_task<
__copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
1036 ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
1037 [=]() { *(Dst.get_pointer()) = *(Src.get_pointer()); });
1041 #ifndef __SYCL_DEVICE_ONLY__
1047 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
1049 std::enable_if_t<(Dim > 0)>
1053 parallel_for<__copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
1056 using TSrcNonConst =
typename std::remove_const_t<TSrc>;
1057 (
reinterpret_cast<TSrcNonConst *
>(Dst))[LinearIndex] = Src[Index];
1066 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
1068 std::enable_if_t<Dim == 0>
1071 single_task<__copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
1073 using TSrcNonConst =
typename std::remove_const_t<TSrc>;
1074 *(
reinterpret_cast<TSrcNonConst *
>(Dst)) = *(Src.get_pointer());
1082 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
1084 std::enable_if_t<(Dim > 0)>
1085 copyPtrToAccHost(TSrc *Src,
1088 parallel_for<__copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
1091 Dst[Index] = (
reinterpret_cast<const TDst *
>(Src))[LinearIndex];
1100 template <
typename TSrc,
typename TDst,
int Dim,
access::mode AccMode,
1102 std::enable_if_t<Dim == 0>
1103 copyPtrToAccHost(TSrc *Src,
1105 single_task<__copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
1107 *(Dst.get_pointer()) = *(
reinterpret_cast<const TDst *
>(Src));
1112 constexpr
static bool isConstOrGlobal(
access::target AccessTarget) {
1114 AccessTarget == access::target::constant_buffer;
1117 constexpr
static bool isImageOrImageArray(
access::target AccessTarget) {
1122 constexpr
static bool
1124 return isConstOrGlobal(AccessTarget) || isImageOrImageArray(AccessTarget);
1132 constexpr
static bool
1141 constexpr
static bool isBackendSupportedFillSize(
size_t Size) {
1142 return Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16 ||
1143 Size == 32 || Size == 64 || Size == 128;
1146 template <
int Dims,
typename LambdaArgType>
struct TransformUserItemType {
1147 using type = std::conditional_t<
1148 std::is_convertible_v<nd_item<Dims>, LambdaArgType>,
nd_item<Dims>,
1149 std::conditional_t<std::is_convertible_v<item<Dims>, LambdaArgType>,
1153 std::optional<std::array<size_t, 3>> getMaxWorkGroups();
1156 std::tuple<std::array<size_t, 3>,
bool> getMaxWorkGroups_v2();
1159 std::tuple<range<Dims>,
bool> getRoundedRange(
range<Dims> UserRange) {
1178 if (this->DisableRangeRounding())
1182 size_t MinFactorX = 16;
1184 size_t GoodFactor = 32;
1186 size_t MinRangeX = 1024;
1190 this->GetRangeRoundingSettings(MinFactorX, GoodFactor, MinRangeX);
1202 auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2();
1205 for (
int i = 0; i < Dims; ++i)
1212 for (
int i = 0; i < Dims; ++i)
1213 IdResult[i] = (std::min)(Limit, MaxWGs[Dims - i - 1]);
1218 for (
int i = 0; i < Dims; ++i) {
1219 auto DesiredSize = MaxNWGs[i] * GoodFactor;
1221 DesiredSize <= M ? DesiredSize : (M / GoodFactor) * GoodFactor;
1224 bool DidAdjust =
false;
1225 auto Adjust = [&](
int Dim,
size_t Value) {
1226 if (this->RangeRoundingTrace())
1227 std::cout <<
"parallel_for range adjusted at dim " << Dim <<
" from "
1228 << RoundedRange[Dim] <<
" to " << Value << std::endl;
1229 RoundedRange[Dim] = Value;
1233 #ifdef __SYCL_EXP_PARALLEL_FOR_RANGE_ROUNDING__
1234 size_t GoodExpFactor = 1;
1249 this->GetRangeRoundingSettings(MinFactorX, GoodExpFactor, MinRangeX);
1251 for (
auto i = 0; i < Dims; ++i)
1252 if (UserRange[i] % GoodExpFactor) {
1253 Adjust(i, ((UserRange[i] / GoodExpFactor) + 1) * GoodExpFactor);
1259 if (RoundedRange[0] % MinFactorX != 0 && RoundedRange[0] >= MinRangeX) {
1264 Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor);
1267 #ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1273 for (
int i = 0; i < Dims; ++i)
1274 if (RoundedRange[i] > MaxRange[i])
1275 Adjust(i, MaxRange[i]);
1279 return {RoundedRange,
true};
1294 typename KernelName,
typename KernelType,
int Dims,
1296 void parallel_for_lambda_impl(
range<Dims> UserRange, PropertiesT Props,
1297 KernelType KernelFunc) {
1298 throwIfActionIsCreated();
1299 throwOnLocalAccessorMisuse<KernelName, KernelType>();
1302 "The total number of work-items in "
1303 "a range must fit within size_t");
1305 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
1310 using TransformedArgType = std::conditional_t<
1311 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
1312 typename TransformUserItemType<Dims, LambdaArgType>::type>;
1315 "Kernel argument cannot have a sycl::nd_item type in "
1316 "sycl::parallel_for with sycl::range");
1318 static_assert(std::is_convertible_v<
item<Dims>, LambdaArgType> ||
1320 "sycl::parallel_for(sycl::range) kernel must have the "
1321 "first argument of sycl::item type, or of a type which is "
1322 "implicitly convertible from sycl::item");
1324 using RefLambdaArgType = std::add_lvalue_reference_t<LambdaArgType>;
1326 (std::is_invocable_v<KernelType, RefLambdaArgType> ||
1327 std::is_invocable_v<KernelType, RefLambdaArgType, kernel_handler>),
1328 "SYCL kernel lambda/functor has an unexpected signature, it should be "
1329 "invocable with sycl::item and optionally sycl::kernel_handler");
1341 #if !defined(__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__) && \
1342 !defined(DPCPP_HOST_DEVICE_OPENMP) && \
1343 !defined(DPCPP_HOST_DEVICE_PERF_NATIVE) && SYCL_LANGUAGE_VERSION >= 202001
1344 auto [RoundedRange, HasRoundedRange] = getRoundedRange(UserRange);
1345 if (HasRoundedRange) {
1348 getRangeRoundedKernelLambda<NameWT, TransformedArgType, Dims>(
1349 KernelFunc, UserRange);
1351 using KName = std::conditional_t<std::is_same<KernelType, NameT>::value,
1352 decltype(Wrapper), NameWT>;
1354 kernel_parallel_for_wrapper<KName, TransformedArgType, decltype(Wrapper),
1355 PropertiesT>(Wrapper);
1356 #ifndef __SYCL_DEVICE_ONLY__
1362 detail::checkValueRange<Dims>(UserRange);
1363 MNDRDesc.set(RoundedRange);
1364 StoreLambda<KName, decltype(Wrapper), Dims, TransformedArgType>(
1365 std::move(Wrapper));
1367 setNDRangeUsed(
false);
1376 #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1379 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1380 PropertiesT>(KernelFunc);
1381 #ifndef __SYCL_DEVICE_ONLY__
1382 processProperties<NameT, PropertiesT>(Props);
1383 detail::checkValueRange<Dims>(UserRange);
1384 MNDRDesc.set(std::move(UserRange));
1385 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1386 std::move(KernelFunc));
1388 setNDRangeUsed(
false);
1409 template <
typename KernelName,
typename KernelType,
int Dims,
1410 typename PropertiesT>
1411 void parallel_for_impl(
nd_range<Dims> ExecutionRange, PropertiesT Props,
1413 throwIfActionIsCreated();
1419 using LambdaArgType =
1420 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
1423 "Kernel argument of a sycl::parallel_for with sycl::nd_range "
1424 "must be either sycl::nd_item or be convertible from sycl::nd_item");
1427 (void)ExecutionRange;
1429 kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
1430 PropertiesT>(KernelFunc);
1431 #ifndef __SYCL_DEVICE_ONLY__
1432 processProperties<NameT, PropertiesT>(Props);
1433 detail::checkValueRange<Dims>(ExecutionRange);
1434 MNDRDesc.set(std::move(ExecutionRange));
1435 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
1436 std::move(KernelFunc));
1438 setNDRangeUsed(
true);
1451 throwIfActionIsCreated();
1453 detail::checkValueRange<Dims>(NumWorkItems);
1454 MNDRDesc.set(std::move(NumWorkItems));
1456 setNDRangeUsed(
false);
1457 extractArgsAndReqs();
1458 MKernelName = getKernelName();
1472 typename KernelName,
typename KernelType,
int Dims,
1474 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1477 throwIfActionIsCreated();
1483 using LambdaArgType =
1484 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1485 (void)NumWorkGroups;
1487 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1488 PropertiesT>(KernelFunc);
1489 #ifndef __SYCL_DEVICE_ONLY__
1490 processProperties<NameT, PropertiesT>(Props);
1491 detail::checkValueRange<Dims>(NumWorkGroups);
1492 MNDRDesc.setNumWorkGroups(NumWorkGroups);
1493 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1495 setNDRangeUsed(
false);
1512 typename KernelName,
typename KernelType,
int Dims,
1514 void parallel_for_work_group_lambda_impl(
range<Dims> NumWorkGroups,
1518 throwIfActionIsCreated();
1524 using LambdaArgType =
1525 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
1526 (void)NumWorkGroups;
1529 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType, KernelType,
1530 PropertiesT>(KernelFunc);
1531 #ifndef __SYCL_DEVICE_ONLY__
1532 processProperties<NameT, PropertiesT>(Props);
1535 detail::checkValueRange<Dims>(ExecRange);
1536 MNDRDesc.set(std::move(ExecRange));
1537 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
1542 #ifdef SYCL_LANGUAGE_VERSION
1543 #define __SYCL_KERNEL_ATTR__ [[clang::sycl_kernel]]
1545 #define __SYCL_KERNEL_ATTR__
1550 template <
typename KernelName,
typename KernelType,
typename... Props>
1551 #ifdef __SYCL_DEVICE_ONLY__
1552 [[__sycl_detail__::add_ir_attributes_function(
1560 #ifdef __SYCL_DEVICE_ONLY__
1569 template <
typename KernelName,
typename KernelType,
typename... Props>
1570 #ifdef __SYCL_DEVICE_ONLY__
1571 [[__sycl_detail__::add_ir_attributes_function(
1579 #ifdef __SYCL_DEVICE_ONLY__
1589 template <
typename KernelName,
typename ElementType,
typename KernelType,
1591 #ifdef __SYCL_DEVICE_ONLY__
1592 [[__sycl_detail__::add_ir_attributes_function(
1598 #ifdef __SYCL_DEVICE_ONLY__
1599 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1607 template <
typename KernelName,
typename ElementType,
typename KernelType,
1609 #ifdef __SYCL_DEVICE_ONLY__
1610 [[__sycl_detail__::add_ir_attributes_function(
1616 #ifdef __SYCL_DEVICE_ONLY__
1617 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1626 template <
typename KernelName,
typename ElementType,
typename KernelType,
1628 #ifdef __SYCL_DEVICE_ONLY__
1629 [[__sycl_detail__::add_ir_attributes_function(
1635 #ifdef __SYCL_DEVICE_ONLY__
1636 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
1644 template <
typename KernelName,
typename ElementType,
typename KernelType,
1646 #ifdef __SYCL_DEVICE_ONLY__
1647 [[__sycl_detail__::add_ir_attributes_function(
1653 kernel_handler KH) {
1654 #ifdef __SYCL_DEVICE_ONLY__
1655 KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()), KH);
1662 template <
typename... Props>
struct KernelPropertiesUnpackerImpl {
1669 template <
typename... TypesToForward,
typename... ArgsTy>
1670 static void kernel_single_task_unpack(handler *h, ArgsTy... Args) {
1671 h->kernel_single_task<TypesToForward..., Props...>(Args...);
1674 template <
typename... TypesToForward,
typename... ArgsTy>
1675 static void kernel_parallel_for_unpack(handler *h, ArgsTy... Args) {
1676 h->kernel_parallel_for<TypesToForward..., Props...>(Args...);
1679 template <
typename... TypesToForward,
typename... ArgsTy>
1680 static void kernel_parallel_for_work_group_unpack(handler *h,
1682 h->kernel_parallel_for_work_group<TypesToForward..., Props...>(Args...);
1686 template <
typename PropertiesT>
1687 struct KernelPropertiesUnpacker :
public KernelPropertiesUnpackerImpl<> {
1691 ext::oneapi::experimental::is_property_list<PropertiesT>::value,
1692 "Template type is not a property list.");
1695 template <
typename... Props>
1696 struct KernelPropertiesUnpacker<
1698 :
public KernelPropertiesUnpackerImpl<Props...> {};
1712 template <
typename KernelName,
typename KernelType,
typename PropertiesT,
1713 bool HasKernelHandlerArg,
typename FuncTy>
1715 #ifdef __SYCL_DEVICE_ONLY__
1716 detail::CheckDeviceCopyable<KernelType>();
1718 using MergedPropertiesT =
1719 typename detail::GetMergedKernelProperties<KernelType,
1721 using Unpacker = KernelPropertiesUnpacker<MergedPropertiesT>;
1722 #ifndef __SYCL_DEVICE_ONLY__
1724 if constexpr (ext::oneapi::experimental::detail::
1725 HasKernelPropertiesGetMethod<
1727 processProperties<KernelName>(
1728 KernelFunc.get(ext::oneapi::experimental::properties_tag{}));
1731 if constexpr (HasKernelHandlerArg) {
1733 Lambda(Unpacker{},
this, KernelFunc, KH);
1735 Lambda(Unpacker{},
this, KernelFunc);
1743 typename KernelName,
typename KernelType,
1746 unpack<KernelName, KernelType, PropertiesT,
1748 KernelFunc, [&](
auto Unpacker,
auto... args) {
1749 Unpacker.template kernel_single_task_unpack<KernelName, KernelType>(
1755 typename KernelName,
typename ElementType,
typename KernelType,
1758 unpack<KernelName, KernelType, PropertiesT,
1759 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1760 ElementType>::value>(
1761 KernelFunc, [&](
auto Unpacker,
auto... args) {
1762 Unpacker.template kernel_parallel_for_unpack<KernelName, ElementType,
1763 KernelType>(args...);
1768 typename KernelName,
typename ElementType,
typename KernelType,
1770 void kernel_parallel_for_work_group_wrapper(
_KERNELFUNCPARAM(KernelFunc)) {
1771 unpack<KernelName, KernelType, PropertiesT,
1772 detail::KernelLambdaHasKernelHandlerArgT<KernelType,
1773 ElementType>::value>(
1774 KernelFunc, [&](
auto Unpacker,
auto... args) {
1775 Unpacker.template kernel_parallel_for_work_group_unpack<
1776 KernelName, ElementType, KernelType>(args...);
1788 typename KernelName,
typename KernelType,
1790 void single_task_lambda_impl(PropertiesT Props,
1793 throwIfActionIsCreated();
1794 throwOnLocalAccessorMisuse<KernelName, KernelType>();
1800 kernel_single_task_wrapper<NameT, KernelType, PropertiesT>(KernelFunc);
1801 #ifndef __SYCL_DEVICE_ONLY__
1804 MNDRDesc.set(range<1>{1});
1805 processProperties<NameT, PropertiesT>(Props);
1806 StoreLambda<NameT, KernelType, 1,
void>(KernelFunc);
1811 void setStateExplicitKernelBundle();
1812 void setStateSpecConstSet();
1813 bool isStateExplicitKernelBundle()
const;
1815 std::shared_ptr<detail::kernel_bundle_impl>
1816 getOrInsertHandlerKernelBundle(
bool Insert)
const;
1818 void setHandlerKernelBundle(kernel Kernel);
1820 void setHandlerKernelBundle(
1821 const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr);
1823 template <
typename FuncT>
1824 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
1826 detail::check_fn_signature<std::remove_reference_t<FuncT>,
1827 void(interop_handle)>::value>
1828 host_task_impl(FuncT &&Func) {
1829 throwIfActionIsCreated();
1831 MNDRDesc.set(range<1>(1));
1834 MArgs = MAssociatedAccesors;
1836 MHostTask.reset(
new detail::HostTask(std::move(Func)));
1844 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1845 getCommandGraph()
const;
1861 template <auto &SpecName>
1863 typename std::remove_reference_t<decltype(SpecName)>::
value_type Value) {
1865 setStateSpecConstSet();
1868 getOrInsertHandlerKernelBundle(
true);
1870 detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1872 .set_specialization_constant<SpecName>(Value);
1875 template <auto &SpecName>
1876 typename std::remove_reference_t<decltype(SpecName)>
::value_type
1879 if (isStateExplicitKernelBundle())
1881 "Specialization constants cannot be read after "
1882 "explicitly setting the used kernel bundle");
1885 getOrInsertHandlerKernelBundle(
true);
1887 return detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
1889 .get_specialization_constant<SpecName>();
1903 template <
typename DataT,
int Dims,
access::mode AccMode,
1906 if (Acc.is_placeholder())
1919 template <
typename DataT,
int Dims,
access::mode AccMode,
1925 AccT Acc = *
static_cast<AccT *
>(
1927 if (Acc.is_placeholder())
1934 void depends_on(
event Event);
1939 void depends_on(
const std::vector<event> &Events);
1941 template <
typename T>
1944 template <
typename U,
typename T>
1948 static constexpr
bool value =
1949 std::is_trivially_copyable_v<std::remove_reference_t<T>>
1950 #if SYCL_LANGUAGE_VERSION && SYCL_LANGUAGE_VERSION <= 201707
1951 && std::is_standard_layout<std::remove_reference_t<T>>::value
1955 std::is_pointer_v<remove_cv_ref_t<T>>)
1965 template <
typename T>
1966 typename std::enable_if_t<ShouldEnableSetArg<T>::value,
void>
1968 setArgHelper(ArgIndex, std::move(Arg));
1976 setArgHelper(ArgIndex, std::move(Arg));
1979 template <
typename DataT,
int Dims>
1981 setArgHelper(ArgIndex, std::move(Arg));
1985 template <
typename T>
1988 setArgHelper(argIndex, dynamicParam);
1996 template <
typename... Ts>
void set_args(Ts &&...Args) {
1997 setArgsHelper(0, std::move(Args)...);
2007 template <
typename KernelName = detail::auto_name,
typename KernelType>
2009 single_task_lambda_impl<KernelName>(
2013 template <
typename KernelName = detail::auto_name,
typename KernelType>
2015 parallel_for_lambda_impl<KernelName>(
2017 std::move(KernelFunc));
2020 template <
typename KernelName = detail::auto_name,
typename KernelType>
2022 parallel_for_lambda_impl<KernelName>(
2024 std::move(KernelFunc));
2027 template <
typename KernelName = detail::auto_name,
typename KernelType>
2029 parallel_for_lambda_impl<KernelName>(
2031 std::move(KernelFunc));
2035 template <
typename FuncT>
2036 std::enable_if_t<detail::check_fn_signature<std::remove_reference_t<FuncT>,
2041 host_task_impl(Func);
2060 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
2062 throwIfActionIsCreated();
2066 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2067 using TransformedArgType = std::conditional_t<
2068 std::is_integral<LambdaArgType>::value && Dims == 1,
item<Dims>,
2069 typename TransformUserItemType<Dims, LambdaArgType>::type>;
2071 (void)WorkItemOffset;
2072 kernel_parallel_for_wrapper<NameT, TransformedArgType>(KernelFunc);
2073 #ifndef __SYCL_DEVICE_ONLY__
2074 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2075 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
2076 StoreLambda<NameT, KernelType, Dims, TransformedArgType>(
2077 std::move(KernelFunc));
2079 setNDRangeUsed(
false);
2097 parallel_for_work_group_lambda_impl<KernelName>(
2119 parallel_for_work_group_lambda_impl<KernelName>(
2131 throwIfActionIsCreated();
2133 setHandlerKernelBundle(Kernel);
2139 extractArgsAndReqs();
2140 MKernelName = getKernelName();
2144 parallel_for_impl(NumWorkItems, Kernel);
2148 parallel_for_impl(NumWorkItems, Kernel);
2152 parallel_for_impl(NumWorkItems, Kernel);
2165 void parallel_for(
range<Dims> NumWorkItems,
id<Dims> WorkItemOffset,
2167 throwIfActionIsCreated();
2169 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2170 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
2172 setNDRangeUsed(
false);
2173 extractArgsAndReqs();
2174 MKernelName = getKernelName();
2186 throwIfActionIsCreated();
2188 detail::checkValueRange<Dims>(NDRange);
2189 MNDRDesc.set(std::move(NDRange));
2191 setNDRangeUsed(
true);
2192 extractArgsAndReqs();
2193 MKernelName = getKernelName();
2202 template <
typename KernelName = detail::auto_name,
typename KernelType>
2204 throwIfActionIsCreated();
2206 setHandlerKernelBundle(Kernel);
2211 kernel_single_task<NameT>(KernelFunc);
2212 #ifndef __SYCL_DEVICE_ONLY__
2218 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2219 extractArgsAndReqs();
2220 MKernelName = getKernelName();
2222 StoreLambda<NameT, KernelType, 1,
void>(std::move(KernelFunc));
2224 detail::CheckDeviceCopyable<KernelType>();
2239 throwIfActionIsCreated();
2241 setHandlerKernelBundle(Kernel);
2245 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2248 kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2249 #ifndef __SYCL_DEVICE_ONLY__
2250 detail::checkValueRange<Dims>(NumWorkItems);
2251 MNDRDesc.set(std::move(NumWorkItems));
2254 setNDRangeUsed(
false);
2255 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2256 extractArgsAndReqs();
2257 MKernelName = getKernelName();
2259 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2260 std::move(KernelFunc));
2278 throwIfActionIsCreated();
2280 setHandlerKernelBundle(Kernel);
2284 using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
2287 (void)WorkItemOffset;
2288 kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2289 #ifndef __SYCL_DEVICE_ONLY__
2290 detail::checkValueRange<Dims>(NumWorkItems, WorkItemOffset);
2291 MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset));
2294 setNDRangeUsed(
false);
2295 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2296 extractArgsAndReqs();
2297 MKernelName = getKernelName();
2299 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2300 std::move(KernelFunc));
2317 throwIfActionIsCreated();
2319 setHandlerKernelBundle(Kernel);
2323 using LambdaArgType =
2324 sycl::detail::lambda_arg_type<KernelType, nd_item<Dims>>;
2327 kernel_parallel_for_wrapper<NameT, LambdaArgType>(KernelFunc);
2328 #ifndef __SYCL_DEVICE_ONLY__
2329 detail::checkValueRange<Dims>(NDRange);
2330 MNDRDesc.set(std::move(NDRange));
2333 setNDRangeUsed(
true);
2334 if (!MIsHost && !lambdaAndKernelHaveEqualName<NameT>()) {
2335 extractArgsAndReqs();
2336 MKernelName = getKernelName();
2338 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(
2339 std::move(KernelFunc));
2360 throwIfActionIsCreated();
2362 setHandlerKernelBundle(Kernel);
2366 using LambdaArgType =
2367 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2369 (void)NumWorkGroups;
2370 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
2371 #ifndef __SYCL_DEVICE_ONLY__
2372 detail::checkValueRange<Dims>(NumWorkGroups);
2373 MNDRDesc.setNumWorkGroups(NumWorkGroups);
2375 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
2400 throwIfActionIsCreated();
2402 setHandlerKernelBundle(Kernel);
2406 using LambdaArgType =
2407 sycl::detail::lambda_arg_type<KernelType, group<Dims>>;
2409 (void)NumWorkGroups;
2411 kernel_parallel_for_work_group_wrapper<NameT, LambdaArgType>(KernelFunc);
2412 #ifndef __SYCL_DEVICE_ONLY__
2415 detail::checkValueRange<Dims>(ExecRange);
2416 MNDRDesc.set(std::move(ExecRange));
2418 StoreLambda<NameT, KernelType, Dims, LambdaArgType>(std::move(KernelFunc));
2424 typename PropertiesT>
2428 single_task_lambda_impl<KernelName, KernelType, PropertiesT>(Props,
2433 typename PropertiesT>
2438 parallel_for_lambda_impl<KernelName, KernelType, 1, PropertiesT>(
2439 NumWorkItems, Props, std::move(KernelFunc));
2443 typename PropertiesT>
2448 parallel_for_lambda_impl<KernelName, KernelType, 2, PropertiesT>(
2449 NumWorkItems, Props, std::move(KernelFunc));
2453 typename PropertiesT>
2458 parallel_for_lambda_impl<KernelName, KernelType, 3, PropertiesT>(
2459 NumWorkItems, Props, std::move(KernelFunc));
2463 typename PropertiesT,
int Dims>
2468 parallel_for_impl<KernelName>(Range, Properties, std::move(KernelFunc));
2476 (
sizeof...(RestT) > 1) &&
2480 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2481 UnsupportedGraphFeatures::sycl_reductions>();
2482 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2483 std::forward<RestT>(Rest)...);
2489 (
sizeof...(RestT) > 1) &&
2493 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2494 UnsupportedGraphFeatures::sycl_reductions>();
2495 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2496 std::forward<RestT>(Rest)...);
2502 (
sizeof...(RestT) > 1) &&
2506 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2507 UnsupportedGraphFeatures::sycl_reductions>();
2508 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2509 std::forward<RestT>(Rest)...);
2515 parallel_for<KernelName>(Range,
2517 std::forward<RestT>(Rest)...);
2523 parallel_for<KernelName>(Range,
2525 std::forward<RestT>(Rest)...);
2531 parallel_for<KernelName>(Range,
2533 std::forward<RestT>(Rest)...);
2537 typename PropertiesT,
typename... RestT>
2539 (
sizeof...(RestT) > 1) &&
2543 throwIfGraphAssociated<ext::oneapi::experimental::detail::
2544 UnsupportedGraphFeatures::sycl_reductions>();
2545 detail::reduction_parallel_for<KernelName>(*
this, Range, Properties,
2546 std::forward<RestT>(Rest)...);
2553 parallel_for<KernelName>(Range,
2555 std::forward<RestT>(Rest)...);
2561 int Dims,
typename PropertiesT>
2564 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2565 PropertiesT>(NumWorkGroups, Props,
2570 int Dims,
typename PropertiesT>
2574 parallel_for_work_group_lambda_impl<KernelName, KernelType, Dims,
2580 #undef _KERNELFUNCPARAM
2595 std::shared_ptr<T_Dst> Dst) {
2596 if (Src.is_placeholder())
2597 checkIfPlaceholderIsBoundToHandler(Src);
2599 throwIfActionIsCreated();
2600 static_assert(isValidTargetForExplicitOp(AccessTarget),
2601 "Invalid accessor target for the copy method.");
2602 static_assert(isValidModeForSourceAccessor(
AccessMode),
2603 "Invalid accessor mode for the copy method.");
2606 CGData.MSharedPtrStorage.push_back(Dst);
2607 typename std::shared_ptr<T_Dst>::element_type *RawDstPtr = Dst.get();
2608 copy(Src, RawDstPtr);
2624 if (Dst.is_placeholder())
2625 checkIfPlaceholderIsBoundToHandler(Dst);
2627 throwIfActionIsCreated();
2628 static_assert(isValidTargetForExplicitOp(AccessTarget),
2629 "Invalid accessor target for the copy method.");
2630 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2631 "Invalid accessor mode for the copy method.");
2636 CGData.MSharedPtrStorage.push_back(Src);
2637 typename std::shared_ptr<T_Src>::element_type *RawSrcPtr = Src.get();
2638 copy(RawSrcPtr, Dst);
2653 if (Src.is_placeholder())
2654 checkIfPlaceholderIsBoundToHandler(Src);
2656 throwIfActionIsCreated();
2657 static_assert(isValidTargetForExplicitOp(AccessTarget),
2658 "Invalid accessor target for the copy method.");
2659 static_assert(isValidModeForSourceAccessor(
AccessMode),
2660 "Invalid accessor mode for the copy method.");
2661 #ifndef __SYCL_DEVICE_ONLY__
2665 copyAccToPtrHost(Src, Dst);
2674 CGData.MRequirements.push_back(AccImpl.get());
2675 MSrcPtr =
static_cast<void *
>(AccImpl.get());
2676 MDstPtr =
static_cast<void *
>(Dst);
2679 CGData.MAccStorage.push_back(std::move(AccImpl));
2695 if (Dst.is_placeholder())
2696 checkIfPlaceholderIsBoundToHandler(Dst);
2698 throwIfActionIsCreated();
2699 static_assert(isValidTargetForExplicitOp(AccessTarget),
2700 "Invalid accessor target for the copy method.");
2701 static_assert(isValidModeForDestinationAccessor(
AccessMode),
2702 "Invalid accessor mode for the copy method.");
2705 #ifndef __SYCL_DEVICE_ONLY__
2709 copyPtrToAccHost(Src, Dst);
2718 CGData.MRequirements.push_back(AccImpl.get());
2719 MSrcPtr =
const_cast<T_Src *
>(Src);
2720 MDstPtr =
static_cast<void *
>(AccImpl.get());
2723 CGData.MAccStorage.push_back(std::move(AccImpl));
2734 typename T_Src,
int Dims_Src,
access::mode AccessMode_Src,
2742 accessor<T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst,
2745 if (Src.is_placeholder())
2746 checkIfPlaceholderIsBoundToHandler(Src);
2747 if (Dst.is_placeholder())
2748 checkIfPlaceholderIsBoundToHandler(Dst);
2750 throwIfActionIsCreated();
2751 static_assert(isValidTargetForExplicitOp(AccessTarget_Src),
2752 "Invalid source accessor target for the copy method.");
2753 static_assert(isValidTargetForExplicitOp(AccessTarget_Dst),
2754 "Invalid destination accessor target for the copy method.");
2755 static_assert(isValidModeForSourceAccessor(AccessMode_Src),
2756 "Invalid source accessor mode for the copy method.");
2757 static_assert(isValidModeForDestinationAccessor(AccessMode_Dst),
2758 "Invalid destination accessor mode for the copy method.");
2759 if (Dst.get_size() < Src.get_size())
2760 throw sycl::invalid_object_error(
2761 "The destination accessor size is too small to copy the memory into.",
2762 PI_ERROR_INVALID_OPERATION);
2764 if (copyAccToAccHelper(Src, Dst))
2774 CGData.MRequirements.push_back(AccImplSrc.get());
2775 CGData.MRequirements.push_back(AccImplDst.get());
2776 MSrcPtr = AccImplSrc.get();
2777 MDstPtr = AccImplDst.get();
2780 CGData.MAccStorage.push_back(std::move(AccImplSrc));
2781 CGData.MAccStorage.push_back(std::move(AccImplDst));
2793 if (Acc.is_placeholder())
2794 checkIfPlaceholderIsBoundToHandler(Acc);
2796 throwIfActionIsCreated();
2797 static_assert(isValidTargetForExplicitOp(AccessTarget),
2798 "Invalid accessor target for the update_host method.");
2804 MDstPtr =
static_cast<void *
>(AccImpl.get());
2805 CGData.MRequirements.push_back(AccImpl.get());
2806 CGData.MAccStorage.push_back(std::move(AccImpl));
2826 assert(!MIsHost &&
"fill() should no longer be callable on a host device.");
2828 if (Dst.is_placeholder())
2829 checkIfPlaceholderIsBoundToHandler(Dst);
2831 throwIfActionIsCreated();
2834 static_assert(isValidTargetForExplicitOp(AccessTarget),
2835 "Invalid accessor target for the fill method.");
2839 if constexpr (isBackendSupportedFillSize(
sizeof(T)) &&
2840 ((Dims <= 1) || isImageOrImageArray(AccessTarget))) {
2841 StageFillCG(Dst, Pattern);
2842 }
else if constexpr (Dims == 0) {
2844 parallel_for<__fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2852 if (OffsetUsable && RangesUsable &&
2853 isBackendSupportedFillSize(
sizeof(T))) {
2854 StageFillCG(Dst, Pattern);
2857 parallel_for<__fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2858 Range, [=](
id<Dims> Index) { Dst[Index] = Pattern; });
2869 template <
typename T>
void fill(
void *Ptr,
const T &Pattern,
size_t Count) {
2870 throwIfActionIsCreated();
2873 "Pattern must be device copyable");
2874 parallel_for<__usmfill<T>>(
range<1>(Count), [=](
id<1> Index) {
2875 T *CastedPtr =
static_cast<T *
>(Ptr);
2876 CastedPtr[Index] = Pattern;
2884 throwIfActionIsCreated();
2894 void ext_oneapi_barrier(
const std::vector<event> &WaitList);
2906 void memcpy(
void *Dest,
const void *Src,
size_t Count);
2918 template <
typename T>
void copy(
const T *Src, T *Dest,
size_t Count) {
2919 this->memcpy(Dest, Src, Count *
sizeof(T));
2929 void memset(
void *Dest,
int Value,
size_t Count);
2937 void prefetch(
const void *Ptr,
size_t Count);
2945 void mem_advise(
const void *Ptr,
size_t Length,
int Advice);
2963 template <
typename T =
unsigned char,
2964 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
2965 void ext_oneapi_memcpy2d(
void *Dest,
size_t DestPitch,
const void *Src,
2966 size_t SrcPitch,
size_t Width,
size_t Height);
2981 template <
typename T>
2982 void ext_oneapi_copy2d(
const T *Src,
size_t SrcPitch, T *Dest,
2983 size_t DestPitch,
size_t Width,
size_t Height);
3000 template <
typename T =
unsigned char,
3001 typename = std::enable_if_t<std::is_same_v<T, unsigned char>>>
3002 void ext_oneapi_memset2d(
void *Dest,
size_t DestPitch,
int Value,
3003 size_t Width,
size_t Height);
3017 template <
typename T>
3018 void ext_oneapi_fill2d(
void *Dest,
size_t DestPitch,
const T &Pattern,
3019 size_t Width,
size_t Height);
3029 template <
typename T,
typename PropertyListT>
3031 const void *Src,
size_t NumBytes =
sizeof(T),
3032 size_t DestOffset = 0) {
3033 throwIfGraphAssociated<
3034 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
3035 sycl_ext_oneapi_device_global>();
3036 if (
sizeof(T) < DestOffset + NumBytes)
3038 "Copy to device_global is out of bounds.");
3040 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
3046 memcpyToHostOnlyDeviceGlobal(&Dest, Src,
sizeof(T), IsDeviceImageScoped,
3047 NumBytes, DestOffset);
3051 memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes, DestOffset);
3062 template <
typename T,
typename PropertyListT>
3066 size_t NumBytes =
sizeof(T),
size_t SrcOffset = 0) {
3067 throwIfGraphAssociated<
3068 ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
3069 sycl_ext_oneapi_device_global>();
3070 if (
sizeof(T) < SrcOffset + NumBytes)
3072 "Copy from device_global is out of bounds.");
3074 constexpr
bool IsDeviceImageScoped = PropertyListT::template
has_property<
3080 memcpyFromHostOnlyDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
3085 memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
3098 template <
typename T,
typename PropertyListT>
3099 void copy(
const std::remove_all_extents_t<T> *Src,
3101 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
3102 size_t StartIndex = 0) {
3103 this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
3104 StartIndex *
sizeof(std::remove_all_extents_t<T>));
3117 template <
typename T,
typename PropertyListT>
3120 std::remove_all_extents_t<T> *Dest,
3121 size_t Count =
sizeof(T) /
sizeof(std::remove_all_extents_t<T>),
3122 size_t StartIndex = 0) {
3123 this->memcpy(Dest, Src, Count *
sizeof(std::remove_all_extents_t<T>),
3124 StartIndex *
sizeof(std::remove_all_extents_t<T>));
3141 void ext_oneapi_copy(
3165 void ext_oneapi_copy(
3181 void ext_oneapi_copy(
3223 void ext_oneapi_copy(
3224 void *Src,
void *Dest,
3226 size_t DeviceRowPitch);
3250 void ext_oneapi_copy(
3261 void ext_oneapi_wait_external_semaphore(
3270 void ext_oneapi_signal_external_semaphore(
3275 std::shared_ptr<detail::handler_impl> MImpl;
3276 std::shared_ptr<detail::queue_impl> MQueue;
3283 std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
3284 std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;
3286 std::vector<detail::ArgDesc> MArgs;
3290 std::vector<detail::ArgDesc> MAssociatedAccesors;
3295 std::shared_ptr<detail::kernel_impl> MKernel;
3301 void *MSrcPtr =
nullptr;
3303 void *MDstPtr =
nullptr;
3307 std::vector<char> MPattern;
3309 std::unique_ptr<detail::HostKernelBase> MHostKernel;
3311 std::unique_ptr<detail::HostTask> MHostTask;
3314 std::vector<detail::EventImplPtr> MEventsWaitWithBarrier;
3317 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> MGraph;
3320 std::shared_ptr<ext::oneapi::experimental::detail::exec_graph_impl>
3323 std::shared_ptr<ext::oneapi::experimental::detail::node_impl> MSubgraphNode;
3325 std::unique_ptr<detail::CG> MGraphNodeCG;
3327 bool MIsHost =
false;
3330 bool MIsFinalized =
false;
3336 template <
typename DataT,
int Dims,
access::mode AccMode,
3338 typename PropertyListT>
3346 friend class stream;
3350 template <
typename T,
class BinaryOperation,
int Dims,
size_t Extent,
3351 bool ExplicitIdentity,
typename RedOutVar>
3355 template <
class FunctorTy>
3359 typename PropertiesT,
typename... RestT>
3361 PropertiesT Properties,
3365 typename PropertiesT,
typename... RestT>
3368 PropertiesT Properties, RestT... Rest);
3370 #ifndef __SYCL_DEVICE_ONLY__
3380 friend class ::MockHandler;
3385 template <
class _name,
class _dataT, int32_t _min_capacity,
3386 class _propertiesT,
class>
3395 void ext_intel_read_host_pipe(
const std::string &Name,
void *Ptr,
size_t Size,
3396 bool Block =
false) {
3400 size_t Size,
bool Block =
false);
3408 void ext_intel_write_host_pipe(
const std::string &Name,
void *Ptr,
3409 size_t Size,
bool Block =
false) {
3412 void ext_intel_write_host_pipe(detail::string_view Name,
void *Ptr,
3413 size_t Size,
bool Block =
false);
3417 bool DisableRangeRounding();
3419 bool RangeRoundingTrace();
3421 void GetRangeRoundingSettings(
size_t &MinFactor,
size_t &GoodFactor,
3424 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
3425 typename KernelType,
3427 KernelType, TransformedArgType>::value> * =
nullptr>
3428 auto getRangeRoundedKernelLambda(KernelType KernelFunc,
3431 KernelType>{UserRange, KernelFunc};
3434 template <
typename WrapperT,
typename TransformedArgType,
int Dims,
3435 typename KernelType,
3436 std::enable_if_t<!detail::KernelLambdaHasKernelHandlerArgT<
3437 KernelType, TransformedArgType>::value> * =
nullptr>
3438 auto getRangeRoundedKernelLambda(KernelType KernelFunc,
3440 return detail::RoundedRangeKernel<TransformedArgType, Dims, KernelType>{
3441 UserRange, KernelFunc};
3444 const std::shared_ptr<detail::context_impl> &getContextImplPtr()
const;
3447 bool supportsUSMMemcpy2D();
3448 bool supportsUSMFill2D();
3449 bool supportsUSMMemset2D();
3452 id<2> computeFallbackKernelBounds(
size_t Width,
size_t Height);
3456 template <
typename T>
3457 void commonUSMCopy2DFallbackKernel(
const void *Src,
size_t SrcPitch,
3458 void *Dest,
size_t DestPitch,
size_t Width,
3463 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
3464 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
3465 parallel_for<__usmmemcpy2d<T>>(
3466 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
3467 T *CastedDest =
static_cast<T *
>(Dest);
3468 const T *CastedSrc =
static_cast<const T *
>(Src);
3469 for (uint32_t I = 0; I < Iterations[0]; ++I) {
3470 for (uint32_t J = 0; J < Iterations[1]; ++J) {
3471 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
3472 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
3473 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
3474 CastedSrc[adjustedIndex[0] * SrcPitch + adjustedIndex[1]];
3483 template <
typename T>
3484 void commonUSMCopy2DFallbackHostTask(
const void *Src,
size_t SrcPitch,
3485 void *Dest,
size_t DestPitch,
3486 size_t Width,
size_t Height) {
3490 const T *CastedSrc =
static_cast<const T *
>(Src);
3491 T *CastedDest =
static_cast<T *
>(Dest);
3492 for (
size_t I = 0; I < Height; ++I) {
3493 const T *SrcItBegin = CastedSrc + SrcPitch * I;
3494 T *DestItBegin = CastedDest + DestPitch * I;
3495 std::copy(SrcItBegin, SrcItBegin + Width, DestItBegin);
3504 typename PropertyListT = property_list>
3506 accessor<T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>
3510 detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst;
3513 MDstPtr =
static_cast<void *
>(AccImpl.get());
3517 MPattern.resize(
sizeof(T));
3518 auto PatternPtr =
reinterpret_cast<T *
>(MPattern.data());
3519 *PatternPtr = Pattern;
3524 template <
typename T>
3525 void commonUSMFill2DFallbackKernel(
void *Dest,
size_t DestPitch,
3526 const T &Pattern,
size_t Width,
3531 id<2> Chunk = computeFallbackKernelBounds(Height, Width);
3532 id<2> Iterations = (Chunk + id<2>{Height, Width} - 1) / Chunk;
3533 parallel_for<__usmfill2d<T>>(
3534 range<2>{Chunk[0], Chunk[1]}, [=](id<2> Index) {
3535 T *CastedDest =
static_cast<T *
>(Dest);
3536 for (uint32_t I = 0; I < Iterations[0]; ++I) {
3537 for (uint32_t J = 0; J < Iterations[1]; ++J) {
3538 id<2> adjustedIndex = Index + Chunk * id<2>{I, J};
3539 if (adjustedIndex[0] < Height && adjustedIndex[1] < Width) {
3540 CastedDest[adjustedIndex[0] * DestPitch + adjustedIndex[1]] =
3550 template <
typename T>
3551 void commonUSMFill2DFallbackHostTask(
void *Dest,
size_t DestPitch,
3552 const T &Pattern,
size_t Width,
3557 T *CastedDest =
static_cast<T *
>(Dest);
3558 for (
size_t I = 0; I < Height; ++I) {
3559 T *ItBegin = CastedDest + DestPitch * I;
3560 std::fill(ItBegin, ItBegin + Width, Pattern);
3566 void ext_oneapi_memcpy2d_impl(
void *Dest,
size_t DestPitch,
const void *Src,
3567 size_t SrcPitch,
size_t Width,
size_t Height);
3570 void ext_oneapi_fill2d_impl(
void *Dest,
size_t DestPitch,
const void *Value,
3571 size_t ValueSize,
size_t Width,
size_t Height);
3574 void ext_oneapi_memset2d_impl(
void *Dest,
size_t DestPitch,
int Value,
3575 size_t Width,
size_t Height);
3578 void memcpyToDeviceGlobal(
const void *DeviceGlobalPtr,
const void *Src,
3579 bool IsDeviceImageScoped,
size_t NumBytes,
3583 void memcpyFromDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
3584 bool IsDeviceImageScoped,
size_t NumBytes,
3588 void memcpyToHostOnlyDeviceGlobal(
const void *DeviceGlobalPtr,
3589 const void *Src,
size_t DeviceGlobalTSize,
3590 bool IsDeviceImageScoped,
size_t NumBytes,
3594 void memcpyFromHostOnlyDeviceGlobal(
void *Dest,
const void *DeviceGlobalPtr,
3595 bool IsDeviceImageScoped,
size_t NumBytes,
3601 typename PropertyListT = property_list>
3602 void checkIfPlaceholderIsBoundToHandler(
3603 accessor<T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT>
3605 auto *AccBase =
reinterpret_cast<detail::AccessorBaseHost *
>(&Acc);
3607 detail::AccessorImplHost *Req = AccImpl.get();
3608 if (std::find_if(MAssociatedAccesors.begin(), MAssociatedAccesors.end(),
3609 [&](
const detail::ArgDesc &AD) {
3611 detail::kernel_param_kind_t::kind_accessor &&
3613 AD.MSize == static_cast<int>(AccessTarget);
3614 }) == MAssociatedAccesors.end())
3616 "placeholder accessor must be bound by calling "
3617 "handler::require() before it can be used.");
3623 void setKernelIsCooperative(
bool);
3627 void throwIfGraphAssociated()
const {
3629 if (getCommandGraph()) {
3630 std::string FeatureString =
3634 "The " + FeatureString +
3635 " feature is not yet available "
3636 "for use with the SYCL Graph extension.");
3641 void setNDRangeUsed(
bool Value);
The file contains implementations of accessor class.
Defines a shared array that can be used by kernels in queues.
range< 3 > & getMemoryRange()
range< 3 > & getAccessRange()
CGTYPE
Type of the command group.
RoundedRangeIDGenerator(const id< Dims > &Id, const range< Dims > &UserRange, const range< Dims > &RoundedRange)
void operator()(item< Dims > It, kernel_handler KH) const
void operator()(item< Dims > It) const
This class is the default KernelName template parameter type for kernel invocation APIs such as singl...
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Graph in the modifiable state.
Implementation details of command_graph<modifiable>.
Command group handler class.
void fill(void *Ptr, const T &Pattern, size_t Count)
Fills the specified memory with the specified pattern.
void parallel_for(range< 2 > NumWorkItems, kernel Kernel)
void parallel_for(kernel Kernel, range< Dims > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range.
void single_task(_KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function as a function object type.
void copy(const std::remove_all_extents_t< T > *Src, ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0)
Copies elements of type std::remove_all_extents_t<T> from a USM memory region to a device_global.
void parallel_for(nd_range< Dims > NDRange, kernel Kernel)
Defines and invokes a SYCL kernel function for the specified range and offsets.
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 2 > Range, PropertiesT Properties, RestT &&...Rest)
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, RestT &&...Rest)
void copy(const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, std::remove_all_extents_t< T > *Dest, size_t Count=sizeof(T)/sizeof(std::remove_all_extents_t< T >), size_t StartIndex=0)
Copies elements of type std::remove_all_extents_t<T> from a device_global to a USM memory region.
void parallel_for(range< 3 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
handler & operator=(handler &&)=delete
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 3 > Range, PropertiesT Properties, RestT &&...Rest)
handler(handler &&)=delete
void copy(accessor< T_Src, Dims_Src, AccessMode_Src, AccessTarget_Src, IsPlaceholder_Src > Src, accessor< T_Dst, Dims_Dst, AccessMode_Dst, AccessTarget_Dst, IsPlaceholder_Dst > Dst)
Copies the content of memory object accessed by Src to the memory object accessed by Dst.
void copy(const T *Src, T *Dest, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void parallel_for_work_group(range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
void parallel_for(range< 3 > NumWorkItems, kernel Kernel)
void parallel_for(range< 1 > NumWorkItems, kernel Kernel)
void single_task(kernel Kernel, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function.
handler & operator=(const handler &)=delete
void parallel_for(kernel Kernel, nd_range< Dims > NDRange, _KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function for the specified range and offsets.
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, range< Dims > WorkGroupSize, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
void require(accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder > Acc)
Requires access to the memory object associated with the placeholder accessor.
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
void update_host(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder > Acc)
Provides guarantees that the memory object accessed via Acc is updated on the host after command grou...
void set_arg(int argIndex, ext::oneapi::experimental::dynamic_parameter< T > &dynamicParam)
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 2 > NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
std::is_same< remove_cv_ref_t< U >, remove_cv_ref_t< T > > is_same_type
std::enable_if_t< ShouldEnableSetArg< T >::value, void > set_arg(int ArgIndex, T &&Arg)
Sets argument for OpenCL interoperability kernels.
void parallel_for_work_group(range< Dims > NumWorkGroups, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
}@
void require(ext::oneapi::experimental::dynamic_parameter< accessor< DataT, Dims, AccMode, AccTarget, isPlaceholder >> dynamicParamAcc)
Requires access to the memory object associated with the placeholder accessor contained in a dynamic_...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
void single_task(kernel Kernel)
Invokes a SYCL kernel.
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(nd_range< Dims > Range, RestT &&...Rest)
void copy(const T_Src *Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 3 > NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
void copy(std::shared_ptr< T_Src > Src, accessor< T_Dst, Dims, AccessMode, AccessTarget, IsPlaceholder > Dst)
Copies the content of memory pointed by Src into the memory object accessed by Dst.
handler(const handler &)=delete
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, T_Dst *Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
typename std::remove_cv_t< std::remove_reference_t< T > > remove_cv_ref_t
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(nd_range< Dims > Range, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc))
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< 1 > Range, RestT &&...Rest)
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< 2 > Range, RestT &&...Rest)
std::enable_if_t< detail::AreAllButLastReductions< RestT... >::value > parallel_for(range< 3 > Range, RestT &&...Rest)
void parallel_for_work_group(kernel Kernel, range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel.
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
std::enable_if_t< ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 1 > NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc))
void memcpy(void *Dest, const ext::oneapi::experimental::device_global< T, PropertyListT > &Src, size_t NumBytes=sizeof(T), size_t SrcOffset=0)
Copies data from a device_global to USM memory.
void parallel_for_work_group(range< Dims > NumWorkGroups, _KERNELFUNCPARAM(KernelFunc))
Hierarchical kernel invocation method of a kernel defined as a lambda encoding the body of each work-...
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
void set_specialization_constant(typename std::remove_reference_t< decltype(SpecName)>::value_type Value)
void set_args(Ts &&...Args)
Sets arguments for OpenCL interoperability kernels.
void memcpy(ext::oneapi::experimental::device_global< T, PropertyListT > &Dest, const void *Src, size_t NumBytes=sizeof(T), size_t DestOffset=0)
Copies data from a USM memory region to a device_global.
void parallel_for(range< 2 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
void set_arg(int ArgIndex, accessor< DataT, Dims, AccessMode, AccessTarget, IsPlaceholder > Arg)
std::enable_if_t<(sizeof...(RestT) > 1) &&detail::AreAllButLastReductions< RestT... >::value &&ext::oneapi::experimental::is_property_list< PropertiesT >::value > parallel_for(range< 1 > Range, PropertiesT Properties, RestT &&...Rest)
Reductions.
std::remove_reference_t< decltype(SpecName)>::value_type get_specialization_constant() const
void set_arg(int ArgIndex, local_accessor< DataT, Dims > Arg)
A unique identifier of an item in an index space.
Identifies an instance of the function object executing at each point in a range.
id< Dimensions > get_id() const
range< Dimensions > get_range() const
Provides an abstraction of a SYCL kernel.
Identifies an instance of the function object executing at each point in an nd_range.
id< Dimensions > get_global_id() const
id< Dimensions > get_offset() const
range< Dimensions > get_global_range() const
Defines the iteration domain of both the work-groups and the overall dispatch.
Objects of the property_list class are containers for the SYCL properties.
Defines the iteration domain of either a single work-group in a parallel dispatch,...
#define _KERNELFUNCPARAMTYPE
#define __SYCL_KERNEL_ATTR__
#define _KERNELFUNCPARAM(a)
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
void withAuxHandler(handler &CGH, FunctorTy Func)
void finalizeHandler(handler &CGH)
void * getValueFromDynamicParameter(ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase)
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
size_t getLinearIndex(const T< Dims > &Index, const U< Dims > &Range)
decltype(member_ptr_helper(&F::operator())) argument_helper(int)
id< 1 > getDelinearizedId(const range< 1 > &, size_t Index)
static Arg member_ptr_helper(RetType(Func::*)(Arg) const)
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
static std::enable_if_t< std::is_unsigned_v< T >, bool > multiply_with_overflow_check(T &dst, T x, T y)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
std::enable_if_t< std::is_same_v< T, range< Dims > >||std::is_same_v< T, id< Dims > > > checkValueRange(const T &V)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
void markBufferAsInternal(const std::shared_ptr< buffer_impl > &BufImpl)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
std::shared_ptr< LocalAccessorImplHost > LocalAccessorImplPtr
std::shared_ptr< detail::kernel_bundle_impl > KernelBundleImplPtr
bool range_size_fits_in_size_t(const range< Dims > &r)
void associateWithHandler(handler &, AccessorBaseHost *, access::target)
void reduction_parallel_for(handler &CGH, range< Dims > NDRange, PropertiesT Properties, RestT... Rest)
decltype(argument_helper< F, SuggestedArgType >(0)) lambda_arg_type
std::shared_ptr< AccessorImplHost > AccessorImplPtr
constexpr cache_config_enum large_slm
constexpr cache_config_enum large_data
const char * UnsupportedFeatureToString(UnsupportedGraphFeatures Feature)
typename merged_properties< LHSPropertiesT, RHSPropertiesT >::type merged_properties_t
properties< std::tuple< PropertyValueTs... > > properties_t
@ executable
In executable state, the graph is ready to execute.
static constexpr bool has_property()
static constexpr auto get_property()
decltype(properties{}) empty_properties_t
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
signed char __SYCL2020_DEPRECATED
ext::intel::pipe< name, dataT, min_capacity > pipe
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS IsPlaceholder
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS AccessMode
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static sycl::event fill(sycl::queue q, void *dev_ptr, const T &pattern, size_t count)
Set pattern to the first count elements of type T starting from dev_ptr.
@ PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_DATA
@ PI_EXT_KERNEL_EXEC_INFO_CACHE_LARGE_SLM
C++ wrapper of extern "C" PI interfaces.
Predicate returning true if all template type parameters except the last one are reductions.
std::vector< detail::AccessorImplPtr > MAccStorage
Storage for accessors.
std::vector< AccessorImplHost * > MRequirements
List of requirements that specify which memory is needed for the command group to be executed.
ext::oneapi::experimental::detail::merged_properties_t< PropertiesT, get_method_properties > type
typename ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod< KernelType >::properties_t get_method_properties
static constexpr const char * getName()
constexpr static bool value
A struct to describe the properties of an image.
Opaque image memory handle type.
Opaque interop semaphore handle type.
is_device_copyable is a user specializable class template to indicate that a type T is device copyabl...