26 inline namespace _V1 {
27 namespace ext::intel::esimd {
52 template <
typename AccessorTy>
54 if constexpr (std::is_same_v<detail::LocalAccessorMarker, AccessorTy> ||
55 sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>) {
58 #ifdef __ESIMD_FORCE_STATELESS_MEM
59 static_assert(sycl::detail::acc_properties::is_image_accessor_v<AccessorTy>,
60 "The function get_surface_index() is available only for "
61 "image- and local-accessors in stateless-only memory mode. "
63 "-fno-sycl-esimd-force-stateless-mem compilation switch.");
65 return __esimd_get_surface_index(
66 detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc));
73 template <
typename RT,
typename T,
int N>
75 if constexpr (
sizeof(T) == 1) {
77 return Vals.template bit_cast_view<uint8_t>();
78 }
else if constexpr (
sizeof(T) == 2) {
80 return Vals.template bit_cast_view<uint16_t>();
82 return Vals.template bit_cast_view<RT>();
87 template <
typename T,
typename T1,
int N>
89 auto Formatted = Vals.template bit_cast_view<T>();
90 if constexpr (
sizeof(T) ==
sizeof(T1)) {
93 constexpr
int Stride = Formatted.length / N;
94 return Formatted.template select<N, Stride>(0);
101 template <
typename PropertyListT, cache_level Level>
104 "ESIMD/GENX intrinsics accept only L1/L2 cache hints");
106 return getPropertyValue<PropertyListT, cache_hint_L1_key>(
cache_hint::none);
108 return getPropertyValue<PropertyListT, cache_hint_L2_key>(
cache_hint::none);
131 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
132 int N,
typename OffsetT>
136 static_assert(std::is_integral_v<OffsetT>,
"Unsupported offset type");
137 check_lsc_vector_size<NElts>();
138 check_lsc_data_size<T, DS>();
139 check_cache_hints<cache_action::load, PropertyListT>();
140 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
141 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
142 constexpr uint16_t AddressScale = 1;
143 constexpr
int ImmOffset = 0;
149 Addrs += convert<uintptr_t>(offsets);
152 __esimd_lsc_load_merge_stateless<MsgT, L1H, L2H, AddressScale, ImmOffset,
153 EDS, VS, Transposed, N>(
155 return lsc_format_ret<T>(Result);
174 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
175 int N,
typename Toffset>
178 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
179 check_lsc_vector_size<NElts>();
180 check_lsc_data_size<T, DS>();
181 check_cache_hints<cache_action::store, PropertyListT>();
182 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
183 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
184 constexpr uint16_t AddressScale = 1;
185 constexpr
int ImmOffset = 0;
191 addrs += convert<uintptr_t>(offsets);
193 __esimd_lsc_store_stateless<MsgT, L1H, L2H, AddressScale, ImmOffset, EDS, VS,
194 Transposed, N>(pred.
data(), addrs.
data(),
204 #ifdef __ESIMD_GATHER_SCATTER_LLVM_IR
258 #ifndef __ESIMD_GATHER_SCATTER_LLVM_IR
285 typename T,
int N,
int VS,
typename OffsetT,
287 __ESIMD_API std::enable_if_t<
288 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
290 simd<T, N> pass_thru, PropertyListT props = {}) {
291 static_assert(std::is_integral_v<OffsetT>,
"Unsupported offset type");
292 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
295 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
297 "gather() requires at least element-size alignment");
301 if constexpr (detail::has_cache_hints<PropertyListT>() || VS > 1 ||
303 static_assert(VS == 1 ||
sizeof(T) >= 4,
304 "VS > 1 is supprted only for 4- and 8-byte elements");
306 PropertyListT>(p, byte_offsets, mask, pass_thru);
309 Addrs = Addrs + convert<uint64_t>(byte_offsets);
311 using MsgT = detail::__raw_t<T>;
312 return __esimd_gather_ld<MsgT, N, Alignment>(
313 Addrs.data(), mask.
data(),
344 typename T,
int N,
int VS,
typename OffsetT,
346 __ESIMD_API std::enable_if_t<
347 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
349 PropertyListT props = {}) {
351 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
353 "gather() requires at least element-size alignment");
355 if constexpr (detail::has_cache_hints<PropertyListT>() || VS > 1 ||
359 return gather<T, N, VS>(p, byte_offsets, mask, PassThru, props);
362 Addrs += convert<uintptr_t>(byte_offsets);
363 using MsgT = detail::__raw_t<T>;
364 if constexpr (
sizeof(T) == 1) {
365 auto Ret = __esimd_svm_gather<MsgT, N, detail::ElemsPerAddrEncoding<4>(),
366 detail::ElemsPerAddrEncoding<1>()>(
367 Addrs.data(), mask.
data());
368 detail::check_rdregion_params<N * 4, N, 0, N, 4>();
369 return __esimd_rdregion<MsgT, N * 4, N, 0, N, 4>(Ret, 0);
370 }
else if constexpr (
sizeof(T) == 2) {
371 auto Ret = __esimd_svm_gather<MsgT, N, detail::ElemsPerAddrEncoding<2>(),
372 detail::ElemsPerAddrEncoding<2>()>(
373 Addrs.data(), mask.
data());
374 detail::check_rdregion_params<N * 2, N, 0, N, 2>();
375 return __esimd_rdregion<MsgT, N * 2, N, 0, N, 2>(Ret, 0);
377 return __esimd_svm_gather<MsgT, N, detail::ElemsPerAddrEncoding<1>(),
378 detail::ElemsPerAddrEncoding<1>()>(Addrs.data(),
403 typename T,
int N,
int VS,
typename OffsetT,
405 __ESIMD_API std::enable_if_t<
406 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
408 PropertyListT props = {}) {
410 return gather<T, N, VS>(p, byte_offsets, Mask, props);
438 typename T,
int N,
typename OffsetT,
440 __ESIMD_API std::enable_if_t<
441 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
443 simd<T, N> pass_thru, PropertyListT props = {}) {
444 constexpr
int VS = 1;
445 return gather<T, N, VS>(p, byte_offsets, mask, pass_thru, props);
470 typename T,
int N,
typename OffsetT,
472 __ESIMD_API std::enable_if_t<
473 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
475 PropertyListT props = {}) {
476 constexpr
int VS = 1;
477 return gather<T, N, VS>(p, byte_offsets, mask, props);
496 typename T,
int N,
typename OffsetT,
498 __ESIMD_API std::enable_if_t<
499 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
501 constexpr
int VS = 1;
502 return gather<T, N, VS>(p, byte_offsets, props);
534 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
536 __ESIMD_API std::enable_if_t<
537 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
538 detail::is_simd_view_type_v<OffsetSimdViewT>,
541 simd<T, N> pass_thru, PropertyListT props = {}) {
542 return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru, props);
574 int VS = 1,
typename OffsetT,
typename T,
typename PassThruSimdViewT,
575 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
577 __ESIMD_API std::enable_if_t<
578 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
579 detail::is_simd_view_type_v<PassThruSimdViewT>,
582 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
583 return gather<T, N, VS>(p, byte_offsets, mask, pass_thru.read(), props);
615 int VS = 1,
typename OffsetSimdViewT,
typename T,
616 typename PassThruSimdViewT,
617 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
619 __ESIMD_API std::enable_if_t<
620 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
621 detail::is_simd_view_type_v<OffsetSimdViewT> &&
622 detail::is_simd_view_type_v<PassThruSimdViewT>,
625 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
626 static_assert(N / VS ==
627 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
628 "Size of pass_thru parameter must correspond to the size of "
629 "byte_offsets parameter.");
630 return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru.read(),
661 int VS,
typename OffsetSimdViewT,
typename T,
int N,
663 __ESIMD_API std::enable_if_t<
664 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
665 detail::is_simd_view_type_v<OffsetSimdViewT>,
668 simd<T, N> pass_thru, PropertyListT props = {}) {
669 static_assert(N / VS ==
670 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
671 "Size of pass_thru parameter must correspond to the size of "
672 "byte_offsets parameter.");
673 return gather<T, N, VS>(p, byte_offsets.read(), mask, pass_thru, props);
700 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
702 __ESIMD_API std::enable_if_t<
703 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
704 detail::is_simd_view_type_v<OffsetSimdViewT>,
707 PropertyListT props = {}) {
708 return gather<T, N, VS>(p, byte_offsets.read(), mask, props);
733 int VS = 1,
typename OffsetSimdViewT,
typename T,
734 int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
736 __ESIMD_API std::enable_if_t<
737 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
738 detail::is_simd_view_type_v<OffsetSimdViewT>,
741 PropertyListT props = {}) {
742 return gather<T, N, VS>(p, byte_offsets.read(), mask, props);
763 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
765 __ESIMD_API std::enable_if_t<
766 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
767 detail::is_simd_view_type_v<OffsetSimdViewT>,
769 gather(
const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
770 return gather<T, N, VS>(p, byte_offsets.read(), props);
789 int VS = 1,
typename OffsetSimdViewT,
typename T,
790 int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
792 __ESIMD_API std::enable_if_t<
793 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
794 detail::is_simd_view_type_v<OffsetSimdViewT>,
796 gather(
const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
797 return gather<T, N, VS>(p, byte_offsets.read(), props);
812 template <
typename Tx,
int N,
typename Toffset>
813 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
simd<Tx, N>>
865 typename T,
int N,
int VS = 1,
typename OffsetT,
867 __ESIMD_API std::enable_if_t<
868 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
871 static_assert(std::is_integral_v<OffsetT>,
"Unsupported offset type");
872 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
875 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
877 "scatter() requires at least element-size alignment");
880 if constexpr (detail::has_cache_hints<PropertyListT>() || VS > 1 ||
883 static_assert(VS == 1 ||
sizeof(T) >= 4,
884 "VS > 1 is supprted only for 4- and 8-byte elements");
886 PropertyListT>(p, byte_offsets, vals, mask);
889 Addrs = Addrs + convert<uint64_t>(byte_offsets);
890 using MsgT = detail::__raw_t<T>;
891 __esimd_scatter_st<MsgT, N, Alignment>(
893 Addrs.data(), mask.
data());
895 using Tx = detail::__raw_t<T>;
898 addrs = addrs + byte_offsets_i;
899 if constexpr (
sizeof(T) == 1) {
900 detail::check_wrregion_params<N * 4, N, 0, N, 4>();
902 D = __esimd_wrregion<Tx, N * 4, N, 0, N, 4>(D.data(), vals.
data(),
904 __esimd_svm_scatter<Tx, N, detail::ElemsPerAddrEncoding<4>(),
905 detail::ElemsPerAddrEncoding<1>()>(
906 addrs.data(), D.data(), mask.
data());
907 }
else if constexpr (
sizeof(T) == 2) {
908 detail::check_wrregion_params<N * 2, N, 0, N, 2>();
910 D = __esimd_wrregion<Tx, N * 2, N, 0, N, 2>(D.data(), vals.
data(),
912 __esimd_svm_scatter<Tx, N, detail::ElemsPerAddrEncoding<2>(),
913 detail::ElemsPerAddrEncoding<2>()>(
914 addrs.data(), D.data(), mask.
data());
916 __esimd_svm_scatter<Tx, N, detail::ElemsPerAddrEncoding<1>(),
917 detail::ElemsPerAddrEncoding<1>()>(
918 addrs.data(), vals.
data(), mask.
data());
947 int VS = 1,
typename OffsetT,
typename ValuesSimdViewT,
typename T,
948 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
950 __ESIMD_API std::enable_if_t<
951 detail::is_simd_view_type_v<ValuesSimdViewT> &&
952 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
955 scatter<T, N, VS>(p, byte_offsets, vals.read(), mask, props);
980 typename T,
int N,
int VS = 1,
typename OffsetT,
982 __ESIMD_API std::enable_if_t<
983 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
985 PropertyListT props = {}) {
987 scatter<T, N, VS>(p, byte_offsets, vals, Mask, props);
1014 int VS = 1,
typename OffsetSimdViewT,
typename ValuesSimdViewT,
typename T,
1015 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
1017 __ESIMD_API std::enable_if_t<
1018 detail::is_simd_view_type_v<ValuesSimdViewT> &&
1019 detail::is_simd_view_type_v<OffsetSimdViewT> &&
1020 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1021 scatter(T *p, OffsetSimdViewT byte_offsets, ValuesSimdViewT vals,
1023 static_assert(N / VS ==
1024 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
1025 "Size of vals parameter must correspond to the size of "
1026 "byte_offsets parameter.");
1027 scatter<T, N, VS>(p, byte_offsets.read(), vals.read(), mask, props);
1053 int VS = 1,
typename OffsetT,
typename ValuesSimdViewT,
typename T,
1054 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
1056 __ESIMD_API std::enable_if_t<
1057 detail::is_simd_view_type_v<ValuesSimdViewT> &&
1058 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1060 PropertyListT props = {}) {
1061 scatter<T, N, VS>(p, byte_offsets, vals.read(), props);
1089 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
1091 __ESIMD_API std::enable_if_t<
1092 detail::is_simd_view_type_v<OffsetSimdViewT> &&
1093 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1096 scatter<T, N, VS>(p, byte_offsets.read(), vals, mask, props);
1124 int VS,
typename OffsetSimdViewT,
typename T,
int N,
1126 __ESIMD_API std::enable_if_t<
1127 detail::is_simd_view_type_v<OffsetSimdViewT> &&
1128 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1131 static_assert(N / VS ==
1132 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
1133 "Size of vals parameter must correspond to the size of "
1134 "byte_offsets parameter.");
1135 scatter<T, N, VS>(p, byte_offsets.read(), vals, mask, props);
1162 int VS,
typename OffsetSimdViewT,
typename T,
int N,
1164 __ESIMD_API std::enable_if_t<
1165 detail::is_simd_view_type_v<OffsetSimdViewT> &&
1166 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1168 PropertyListT props = {}) {
1169 static_assert(N / VS ==
1170 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
1171 "Size of vals parameter must correspond to the size of "
1172 "byte_offsets parameter.");
1173 scatter<T, N, VS>(p, byte_offsets.read(), vals, props);
1199 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
1201 __ESIMD_API std::enable_if_t<
1202 detail::is_simd_view_type_v<OffsetSimdViewT> &&
1203 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1205 PropertyListT props = {}) {
1207 scatter<T, N, VS>(p, byte_offsets.read(), vals, Mask, props);
1235 int VS = 1,
typename OffsetSimdViewT,
typename ValuesSimdViewT,
typename T,
1236 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
1238 __ESIMD_API std::enable_if_t<
1239 detail::is_simd_view_type_v<OffsetSimdViewT> &&
1240 detail::is_simd_view_type_v<ValuesSimdViewT> &&
1241 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
1242 scatter(T *p, OffsetSimdViewT byte_offsets, ValuesSimdViewT vals,
1243 PropertyListT props = {}) {
1244 static_assert(N / VS ==
1245 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
1246 "Size of vals parameter must correspond to the size of "
1247 "byte_offsets parameter.");
1248 scatter<T, N, VS>(p, byte_offsets.read(), vals.read(), props);
1262 template <
typename Tx,
int N,
typename Toffset>
1263 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
1271 #ifdef __ESIMD_FORCE_STATELESS_MEM
1308 template <
typename T,
int NElts,
typename PropertyListT>
1309 __ESIMD_API std::enable_if_t<is_property_list_v<PropertyListT>,
simd<T, NElts>>
1312 check_cache_hints<cache_action::load, PropertyListT>();
1314 PropertyListT::template get_property<alignment_key>().value;
1316 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
1317 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
1318 "Incorrect alignment for the data type");
1320 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
1321 constexpr
int SmallIntFactor32Bit =
1322 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
1323 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
1324 "Number of elements is not supported by Transposed load");
1330 constexpr
bool Use64BitData =
1331 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
1332 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
1333 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
1334 constexpr
int SmallIntFactor =
1335 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
1336 constexpr
int FactoredNElts = NElts / SmallIntFactor;
1337 check_lsc_vector_size<FactoredNElts>();
1340 using LoadElemT = __ESIMD_DNS::__raw_t<
1341 std::conditional_t<SmallIntFactor == 1, T,
1342 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
1343 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
1344 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
1346 constexpr uint16_t AddressScale = 1;
1347 constexpr
int ImmOffset = 0;
1352 constexpr
int N = 1;
1357 pass_thru.template bit_cast_view<LoadElemT>();
1359 __esimd_lsc_load_merge_stateless<LoadElemT, L1H, L2H, AddressScale,
1360 ImmOffset, ActualDS, VS, Transposed, N>(
1362 return Result.template bit_cast_view<T>();
1397 template <
typename T,
int NElts,
typename PropertyListT,
typename AccessorT>
1399 std::enable_if_t<detail::is_device_accessor_with_v<
1400 AccessorT, detail::accessor_mode_cap::can_read> &&
1401 is_property_list_v<PropertyListT>,
1405 #ifdef __ESIMD_FORCE_STATELESS_MEM
1407 return block_load_impl<T, NElts, PropertyListT>(
1408 accessorToPointer<T>(acc, offset), pred, PassThru);
1411 check_cache_hints<cache_action::load, PropertyListT>();
1413 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
1415 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
1416 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
1417 "Incorrect alignment for the data type");
1419 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
1420 constexpr
int SmallIntFactor32Bit =
1421 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
1422 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
1423 "Number of elements is not supported by Transposed load");
1429 constexpr
bool Use64BitData =
1430 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
1431 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
1432 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
1433 constexpr
int SmallIntFactor =
1434 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
1435 constexpr
int FactoredNElts = NElts / SmallIntFactor;
1436 check_lsc_vector_size<FactoredNElts>();
1439 using LoadElemT = __ESIMD_DNS::__raw_t<
1440 std::conditional_t<SmallIntFactor == 1, T,
1441 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
1442 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
1443 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
1444 constexpr uint16_t AddressScale = 1;
1445 constexpr
int ImmOffset = 0;
1448 constexpr
auto VS = to_lsc_vector_size<FactoredNElts>();
1450 constexpr
int N = 1;
1456 __esimd_lsc_load_bti<LoadElemT, L1H, L2H, AddressScale, ImmOffset,
1457 ActualDS, VS, Transposed, N>(pred.
data(),
1458 Offsets.
data(), SI);
1459 return Result.template bit_cast_view<T>();
1496 template <
typename T,
int NElts,
typename PropertyListT,
typename AccessorT>
1498 std::enable_if_t<detail::is_device_accessor_with_v<
1499 AccessorT, detail::accessor_mode_cap::can_read> &&
1500 is_property_list_v<PropertyListT>,
1504 #ifdef __ESIMD_FORCE_STATELESS_MEM
1505 return block_load_impl<T, NElts, PropertyListT>(
1506 accessorToPointer<T>(acc, offset), pred, pass_thru);
1509 check_cache_hints<cache_action::load, PropertyListT>();
1511 PropertyListT::template get_property<alignment_key>().value;
1513 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
1514 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
1515 "Incorrect alignment for the data type");
1517 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
1518 constexpr
int SmallIntFactor32Bit =
1519 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
1520 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
1521 "Number of elements is not supported by Transposed load");
1527 constexpr
bool Use64BitData =
1528 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
1529 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
1530 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
1531 constexpr
int SmallIntFactor =
1532 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
1533 constexpr
int FactoredNElts = NElts / SmallIntFactor;
1534 check_lsc_vector_size<FactoredNElts>();
1537 using LoadElemT = __ESIMD_DNS::__raw_t<
1538 std::conditional_t<SmallIntFactor == 1, T,
1539 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
1540 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
1541 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
1542 constexpr uint16_t AddressScale = 1;
1543 constexpr
int ImmOffset = 0;
1546 constexpr
auto VS = to_lsc_vector_size<FactoredNElts>();
1548 constexpr
int N = 1;
1554 pass_thru.template bit_cast_view<LoadElemT>();
1556 __esimd_lsc_load_merge_bti<LoadElemT, L1H, L2H, AddressScale, ImmOffset,
1557 ActualDS, VS, Transposed, N>(
1559 return Result.template bit_cast_view<T>();
1563 template <
typename T,
int NElts,
typename PropertyListT>
1564 __ESIMD_API std::enable_if_t<detail::is_property_list_v<PropertyListT>>
1566 detail::check_cache_hints<cache_action::store, PropertyListT>();
1568 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
1570 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
1571 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
1572 "Incorrect alignment for the data type");
1574 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
1575 constexpr
int SmallIntFactor32Bit =
1576 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
1577 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
1578 "Number of elements is not supported by Transposed store");
1584 constexpr
bool Use64BitData =
1585 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
1586 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
1587 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
1589 constexpr
int SmallIntFactor =
1590 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
1591 constexpr
int FactoredNElts = NElts / SmallIntFactor;
1593 check_lsc_vector_size<FactoredNElts>();
1595 using StoreType = __ESIMD_DNS::__raw_t<
1596 std::conditional_t<SmallIntFactor == 1, T,
1597 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
1598 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
1599 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
1600 constexpr uint16_t AddressScale = 1;
1601 constexpr
int ImmOffset = 0;
1606 constexpr
int N = 1;
1609 __esimd_lsc_store_stateless<StoreType, L1H, L2H, AddressScale, ImmOffset,
1610 ActualDS, VS, Transposed, N>(
1612 sycl::bit_cast<__ESIMD_DNS::vector_type_t<StoreType, FactoredNElts>>(
1616 template <
typename T,
int NElts,
typename PropertyListT,
typename AccessorT>
1618 std::enable_if_t<detail::is_device_accessor_with_v<
1619 AccessorT, detail::accessor_mode_cap::can_write> &&
1620 detail::is_property_list_v<PropertyListT>>
1623 #ifdef __ESIMD_FORCE_STATELESS_MEM
1624 block_store_impl<T, NElts, PropertyListT>(accessorToPointer<T>(acc, offset),
1628 check_cache_hints<cache_action::store, PropertyListT>();
1630 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
1632 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
1633 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
1634 "Incorrect alignment for the data type");
1636 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
1637 constexpr
int SmallIntFactor32Bit =
1638 sizeof(uint32_t) /
sizeof(T) >
static_cast<size_t>(1)
1639 ?
sizeof(uint32_t) /
sizeof(T)
1640 :
static_cast<size_t>(1);
1641 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
1642 "Number of elements is not supported by Transposed store");
1648 constexpr
bool Use64BitData =
1649 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
1650 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
1651 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
1652 constexpr
int SmallIntFactor =
1653 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
1654 constexpr
int FactoredNElts = NElts / SmallIntFactor;
1655 check_lsc_vector_size<FactoredNElts>();
1658 using StoreElemT = __ESIMD_DNS::__raw_t<
1659 std::conditional_t<SmallIntFactor == 1, T,
1660 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
1661 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
1662 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
1663 constexpr uint16_t AddressScale = 1;
1664 constexpr
int ImmOffset = 0;
1667 constexpr
auto VS = to_lsc_vector_size<FactoredNElts>();
1669 constexpr
int N = 1;
1675 __esimd_lsc_store_bti<StoreElemT, L1H, L2H, AddressScale, ImmOffset, ActualDS,
1678 sycl::bit_cast<__ESIMD_DNS::vector_type_t<StoreElemT, FactoredNElts>>(
1700 template <
typename Tx,
int N,
1702 __ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>>
1704 using T =
typename detail::__raw_t<Tx>;
1706 constexpr
size_t Align = Flags::template alignment<simd<T, N>>;
1707 __esimd_svm_block_st<T, N, Align>(
reinterpret_cast<VecT *
>(addr),
1776 __ESIMD_API std::enable_if_t<
1777 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
1779 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
1780 using NewPropertyListT =
1781 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
1782 if constexpr (detail::has_cache_hints<PropertyListT>()) {
1785 return detail::block_load_impl<T, N, NewPropertyListT>(ptr, Mask, PassThru);
1788 NewPropertyListT::template get_property<alignment_key>().value;
1831 __ESIMD_API std::enable_if_t<
1832 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
1833 block_load(
const T *ptr,
size_t byte_offset, PropertyListT props = {}) {
1834 const T *AdjustedPtr =
reinterpret_cast<const T *
>(
1835 reinterpret_cast<const int8_t *
>(ptr) + byte_offset);
1836 return block_load<T, N>(AdjustedPtr, props);
1875 std::enable_if_t<detail::is_property_list_v<PropertyListT>,
simd<T, N>>
1877 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
1878 using NewPropertyListT =
1879 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
1881 return detail::block_load_impl<T, N, NewPropertyListT>(ptr, pred, PassThru);
1920 __ESIMD_API std::enable_if_t<
1921 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
1923 PropertyListT props = {}) {
1924 const T *AdjustedPtr =
reinterpret_cast<const T *
>(
1925 reinterpret_cast<const int8_t *
>(ptr) + byte_offset);
1926 return block_load<T, N>(AdjustedPtr, pred, props);
1964 __ESIMD_API std::enable_if_t<
1965 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
1967 PropertyListT props = {}) {
1968 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
1969 using NewPropertyListT =
1970 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
1971 return detail::block_load_impl<T, N, NewPropertyListT>(ptr, pred, pass_thru);
2009 typename PassThruSimdViewT,
typename T,
2010 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
2012 __ESIMD_API std::enable_if_t<
2013 detail::is_simd_view_type_v<PassThruSimdViewT> &&
2014 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
2017 PropertyListT props = {}) {
2018 return block_load<T, N>(ptr, pred, pass_thru.read(), props);
2058 __ESIMD_API std::enable_if_t<
2059 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
2061 simd<T, N> pass_thru, PropertyListT props = {}) {
2062 const T *AdjustedPtr =
reinterpret_cast<const T *
>(
2063 reinterpret_cast<const int8_t *
>(ptr) + byte_offset);
2064 return block_load<T, N>(AdjustedPtr, pred, pass_thru, props);
2104 typename PassThruSimdViewT,
typename T,
2105 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
2107 __ESIMD_API std::enable_if_t<
2108 detail::is_simd_view_type_v<PassThruSimdViewT> &&
2109 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
2112 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
2113 return block_load<T, N>(ptr, byte_offset, pred, pass_thru.read(), props);
2131 template <
typename Tx,
int N,
2133 __ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>,
simd<Tx, N>>
2135 using T =
typename detail::__raw_t<Tx>;
2137 constexpr
size_t Align = Flags::template alignment<simd<T, N>>;
2138 return __esimd_svm_block_ld<T, N, Align>(
2139 reinterpret_cast<const VecT *
>(addr));
2157 template <
typename Tx,
int N,
typename AccessorTy,
2159 typename = std::enable_if_t<
2160 is_simd_flag_type_v<Flags> &&
2161 detail::is_device_accessor_with_v<
2162 AccessorTy, detail::accessor_mode_cap::can_read>>,
2163 class T = detail::__raw_t<Tx>>
2167 #ifdef __ESIMD_FORCE_STATELESS_MEM
2168 return block_load<Tx, N>(__ESIMD_DNS::accessorToPointer<Tx>(acc, byte_offset),
2171 std::ignore = flags;
2172 constexpr
unsigned Sz =
sizeof(T) * N;
2173 static_assert(Sz >= detail::OperandSize::OWORD,
2174 "block size must be at least 1 oword");
2175 static_assert(Sz % detail::OperandSize::OWORD == 0,
2176 "block size must be whole number of owords");
2178 "block must be 1, 2, 4 or 8 owords long");
2179 static_assert(Sz <= 8 * detail::OperandSize::OWORD,
2180 "block size must be at most 8 owords");
2182 auto surf_ind = __esimd_get_surface_index(
2183 detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc));
2186 detail::OperandSize::OWORD) {
2187 return __esimd_oword_ld<T, N>(surf_ind, byte_offset >> 4);
2189 return __esimd_oword_ld_unaligned<T, N>(surf_ind, byte_offset);
2259 typename T,
int N,
typename AccessorT,
2261 __ESIMD_API std::enable_if_t<
2262 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2263 detail::is_device_accessor_with_v<AccessorT,
2264 detail::accessor_mode_cap::can_read>,
2267 PropertyListT props = {}) {
2268 #ifdef __ESIMD_FORCE_STATELESS_MEM
2269 return block_load<T, N>(detail::accessorToPointer<T>(acc, byte_offset),
2274 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
2276 detail::getPropertyValue<PropertyListT, alignment_key>(DefaultAlignment);
2279 constexpr
size_t Size =
sizeof(T) * N;
2280 constexpr
size_t OWord = detail::OperandSize::OWORD;
2281 constexpr
bool IsLegacySize = Size == OWord || Size == 2 * OWord ||
2282 Size == 4 * OWord || Size == 8 * OWord;
2284 using NewPropertyListT =
2285 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
2286 if constexpr (detail::has_cache_hints<PropertyListT>() || !IsLegacySize) {
2287 return detail::block_load_impl<T, N, NewPropertyListT>(acc, byte_offset,
2291 NewPropertyListT::template get_property<alignment_key>().value;
2327 typename T,
int N,
typename AccessorT,
2329 __ESIMD_API std::enable_if_t<
2330 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2331 detail::is_device_accessor_with_v<AccessorT,
2332 detail::accessor_mode_cap::can_read>,
2337 using NewPropertyListT =
2338 detail::add_or_replace_alignment_property_t<PropertyListT, 16>;
2339 return block_load<T, N>(acc, 0, NewPropertyListT{});
2374 typename T,
int N,
typename AccessorT,
2376 __ESIMD_API std::enable_if_t<
2377 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2378 detail::is_device_accessor_with_v<AccessorT,
2379 detail::accessor_mode_cap::can_read>,
2383 PropertyListT = {}) {
2386 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
2387 using NewPropertyListT =
2388 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
2389 return detail::block_load_impl<T, N, NewPropertyListT>(acc, byte_offset, pred,
2427 typename PassThruSimdViewT,
2428 typename T = PassThruSimdViewT::value_type::element_type,
2429 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
2432 __ESIMD_API std::enable_if_t<
2433 detail::is_simd_view_type_v<PassThruSimdViewT> &&
2434 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2435 detail::is_device_accessor_with_v<AccessorT,
2436 detail::accessor_mode_cap::can_read>,
2440 PropertyListT props = {}) {
2441 return block_load<T, N>(acc, byte_offset, pred, pass_thru.read(), props);
2476 typename T,
int N,
typename AccessorT,
2478 __ESIMD_API std::enable_if_t<
2479 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2480 detail::is_device_accessor_with_v<AccessorT,
2481 detail::accessor_mode_cap::can_read>,
2486 return block_load<T, N>(acc, byte_offset, pred, PassThru, props);
2517 typename T,
int N,
typename AccessorT,
2519 __ESIMD_API std::enable_if_t<
2520 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2521 detail::is_device_accessor_with_v<AccessorT,
2522 detail::accessor_mode_cap::can_read>,
2525 PropertyListT = {}) {
2528 using NewPropertyListT =
2529 detail::add_or_replace_alignment_property_t<PropertyListT, 16>;
2530 return block_load<T, N>(acc, 0, pred, pass_thru, NewPropertyListT{});
2564 typename PassThruSimdViewT,
2565 typename T = PassThruSimdViewT::value_type::element_type,
2566 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
2569 __ESIMD_API std::enable_if_t<
2570 detail::is_simd_view_type_v<PassThruSimdViewT> &&
2571 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2572 detail::is_device_accessor_with_v<AccessorT,
2573 detail::accessor_mode_cap::can_read>,
2576 PropertyListT props = {}) {
2577 return block_load<T, N>(acc, pred, pass_thru.read(), props);
2607 typename T,
int N,
typename AccessorT,
2609 __ESIMD_API std::enable_if_t<
2610 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
2611 detail::is_device_accessor_with_v<AccessorT,
2612 detail::accessor_mode_cap::can_read>,
2617 using NewPropertyListT =
2618 detail::add_or_replace_alignment_property_t<PropertyListT, 16>;
2620 return block_load<T, N>(acc, 0, pred, PassThru, NewPropertyListT{});
2679 __ESIMD_API std::enable_if_t<detail::is_property_list_v<PropertyListT>>
2681 if constexpr (detail::has_cache_hints<PropertyListT>()) {
2682 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
2683 using NewPropertyListT =
2684 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
2686 detail::block_store_impl<T, N, NewPropertyListT>(ptr, vals, Mask);
2691 detail::getPropertyValue<PropertyListT, alignment_key>(
2692 detail::OperandSize::OWORD);
2734 __ESIMD_API std::enable_if_t<
2735 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
2737 PropertyListT props = {}) {
2739 reinterpret_cast<T *
>(
reinterpret_cast<int8_t *
>(ptr) + byte_offset);
2740 block_store<T, N>(AdjustedPtr, vals, props);
2778 __ESIMD_API std::enable_if_t<detail::is_property_list_v<PropertyListT>>
2780 PropertyListT = {}) {
2781 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
2782 using NewPropertyListT =
2783 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
2784 detail::block_store_impl<T, N, NewPropertyListT>(ptr, vals, pred);
2826 __ESIMD_API std::enable_if_t<
2827 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
2829 PropertyListT props = {}) {
2831 reinterpret_cast<T *
>(
reinterpret_cast<int8_t *
>(ptr) + byte_offset);
2832 block_store<T, N>(AdjustedPtr, vals, pred, props);
2871 typename ValuesSimdViewT,
typename T,
2872 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
2874 __ESIMD_API std::enable_if_t<detail::is_simd_view_type_v<ValuesSimdViewT> &&
2875 detail::is_property_list_v<PropertyListT>>
2877 block_store<T, N>(ptr, vals.read(), props);
2917 typename ValuesSimdViewT,
typename T,
2918 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
2920 __ESIMD_API std::enable_if_t<
2921 detail::is_simd_view_type_v<ValuesSimdViewT> &&
2922 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
2924 PropertyListT props = {}) {
2925 block_store<T, N>(ptr, byte_offset, vals.read(), props);
2963 typename ValuesSimdViewT,
typename T,
2964 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
2966 __ESIMD_API std::enable_if_t<detail::is_simd_view_type_v<ValuesSimdViewT> &&
2967 detail::is_property_list_v<PropertyListT>>
2969 PropertyListT props = {}) {
2970 block_store<T, N>(ptr, vals.read(), pred, props);
3012 typename ValuesSimdViewT,
typename T,
3013 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
3015 __ESIMD_API std::enable_if_t<
3016 detail::is_simd_view_type_v<ValuesSimdViewT> &&
3017 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
3019 PropertyListT props = {}) {
3020 block_store<T, N>(ptr, byte_offset, vals.read(), pred, props);
3086 typename T,
int N,
typename AccessorT,
3088 __ESIMD_API std::enable_if_t<
3089 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3090 detail::is_device_accessor_with_v<AccessorT,
3091 detail::accessor_mode_cap::can_write>>
3094 #ifdef __ESIMD_FORCE_STATELESS_MEM
3095 block_store<T, N>(detail::accessorToPointer<T>(acc, byte_offset), vals,
3098 constexpr
int DefaultLSCAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
3100 detail::getPropertyValue<PropertyListT, alignment_key>(
3101 DefaultLSCAlignment);
3102 constexpr
bool AlignmentRequiresLSC =
3103 PropertyListT::template has_property<alignment_key>() &&
Alignment < 16;
3104 using Tx = detail::__raw_t<T>;
3105 constexpr
unsigned Sz =
sizeof(Tx) * N;
3106 constexpr
bool SzRequiresLSC =
3107 Sz < detail::OperandSize::OWORD || Sz % detail::OperandSize::OWORD != 0 ||
3109 Sz > 8 * detail::OperandSize::OWORD;
3110 if constexpr (detail::has_cache_hints<PropertyListT>() ||
3111 AlignmentRequiresLSC || SzRequiresLSC) {
3112 using NewPropertyListT =
3113 detail::add_alignment_property_t<PropertyListT, DefaultLSCAlignment>;
3115 detail::block_store_impl<T, N, NewPropertyListT>(acc, byte_offset, vals,
3118 auto surf_ind = __esimd_get_surface_index(
3119 detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc));
3120 __esimd_oword_st<Tx, N>(surf_ind, byte_offset >> 4, vals.
data());
3155 typename T,
int N,
typename AccessorT,
3157 __ESIMD_API std::enable_if_t<
3158 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3159 detail::is_device_accessor_with_v<AccessorT,
3160 detail::accessor_mode_cap::can_write>>
3164 using NewPropertyListT =
3165 detail::add_or_replace_alignment_property_t<PropertyListT, 16>;
3166 block_store<T, N>(acc, 0, vals, NewPropertyListT{});
3201 typename T,
int N,
typename AccessorT,
3203 __ESIMD_API std::enable_if_t<
3204 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3205 detail::is_device_accessor_with_v<AccessorT,
3206 detail::accessor_mode_cap::can_write>>
3209 constexpr
size_t DefaultAlignment = (
sizeof(T) <= 4) ? 4 :
sizeof(T);
3210 using NewPropertyListT =
3211 detail::add_alignment_property_t<PropertyListT, DefaultAlignment>;
3212 detail::block_store_impl<T, N, NewPropertyListT>(acc, byte_offset, vals,
3241 typename T,
int N,
typename AccessorT,
3243 __ESIMD_API std::enable_if_t<
3244 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3245 detail::is_device_accessor_with_v<AccessorT,
3246 detail::accessor_mode_cap::can_write>>
3248 PropertyListT props = {}) {
3251 using NewPropertyListT =
3252 detail::add_or_replace_alignment_property_t<PropertyListT, 16>;
3253 block_store<T, N>(acc, 0, vals, pred, NewPropertyListT{});
3302 typename ValuesSimdViewT,
3303 typename T = ValuesSimdViewT::value_type::element_type,
3304 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
3307 __ESIMD_API std::enable_if_t<
3308 detail::is_simd_view_type_v<ValuesSimdViewT> &&
3309 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3310 detail::is_device_accessor_with_v<AccessorT,
3311 detail::accessor_mode_cap::can_write>>
3313 ValuesSimdViewT vals, PropertyListT props = {}) {
3314 block_store<T, N>(acc, byte_offset, vals.read(), props);
3349 typename ValuesSimdViewT,
3350 typename T = ValuesSimdViewT::value_type::element_type,
3351 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
3354 __ESIMD_API std::enable_if_t<
3355 detail::is_simd_view_type_v<ValuesSimdViewT> &&
3356 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3357 detail::is_device_accessor_with_v<AccessorT,
3358 detail::accessor_mode_cap::can_write>>
3359 block_store(AccessorT acc, ValuesSimdViewT vals, PropertyListT props = {}) {
3360 block_store<T, N>(acc, vals.read(), props);
3397 typename ValuesSimdViewT,
3398 typename T = ValuesSimdViewT::value_type::element_type,
3399 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
3402 __ESIMD_API std::enable_if_t<
3403 detail::is_simd_view_type_v<ValuesSimdViewT> &&
3404 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3405 detail::is_device_accessor_with_v<AccessorT,
3406 detail::accessor_mode_cap::can_write>>
3408 ValuesSimdViewT vals,
simd_mask<1> pred, PropertyListT props = {}) {
3409 block_store<T, N>(acc, byte_offset, vals.read(), pred, props);
3439 typename ValuesSimdViewT,
3440 typename T = ValuesSimdViewT::value_type::element_type,
3441 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
3444 __ESIMD_API std::enable_if_t<
3445 detail::is_simd_view_type_v<ValuesSimdViewT> &&
3446 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
3447 detail::is_device_accessor_with_v<AccessorT,
3448 detail::accessor_mode_cap::can_write>>
3450 PropertyListT props = {}) {
3451 block_store<T, N>(acc, vals.read(), pred, props);
3462 template <
typename T,
int N,
typename AccessorTy>
3463 ESIMD_INLINE ESIMD_NODEBUG std::enable_if_t<
3464 std::is_same_v<detail::LocalAccessorMarker, AccessorTy> ||
3465 is_accessor_with_v<AccessorTy, detail::accessor_mode_cap::can_write>>
3470 if constexpr (
sizeof(T) == 8) {
3471 scatter_impl<uint32_t, N>(
3472 acc, vals.template bit_cast_view<uint32_t>().template select<N, 2>(0),
3473 offsets, glob_offset, mask);
3474 scatter_impl<uint32_t, N>(
3475 acc, vals.template bit_cast_view<uint32_t>().template select<N, 2>(1),
3476 offsets, glob_offset +
sizeof(uint32_t), mask);
3478 constexpr
int TypeSizeLog2 = detail::ElemsPerAddrEncoding<sizeof(T)>();
3480 constexpr int16_t scale = 0;
3483 if constexpr (
sizeof(T) < 4) {
3484 using Tint = std::conditional_t<std::is_integral_v<T>, T,
3485 detail::uint_type_t<
sizeof(T)>>;
3486 using Treal = __raw_t<T>;
3487 simd<Tint, N> vals_int = bitcast<Tint, Treal, N>(std::move(vals).data());
3488 using PromoT =
typename std::conditional_t<std::is_signed<Tint>::value,
3490 const simd<PromoT, N> promo_vals = convert<PromoT>(std::move(vals_int));
3491 __esimd_scatter_scaled<PromoT, N, decltype(si), TypeSizeLog2, scale>(
3492 mask.
data(), si, glob_offset, offsets.data(), promo_vals.data());
3494 using Treal = __raw_t<T>;
3495 if constexpr (!std::is_same_v<Treal, T>) {
3497 __esimd_scatter_scaled<Treal, N, decltype(si), TypeSizeLog2, scale>(
3498 mask.
data(), si, glob_offset, offsets.data(), Values.data());
3500 __esimd_scatter_scaled<T, N, decltype(si), TypeSizeLog2, scale>(
3501 mask.
data(), si, glob_offset, offsets.data(), vals.data());
3507 #ifndef __ESIMD_FORCE_STATELESS_MEM
3525 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
3526 int N,
typename AccessorTy,
typename OffsetT>
3527 __ESIMD_API std::enable_if_t<
3528 is_device_accessor_with_v<AccessorTy, accessor_mode_cap::can_write>>
3531 static_assert(std::is_integral_v<OffsetT>,
3532 "Scatter must have integral byte_offset type");
3533 static_assert(
sizeof(OffsetT) <= 4,
3534 "Implicit truncation of 64-bit byte_offset to 32-bit is "
3535 "disabled. Use -fsycl-esimd-force-stateless-mem or explicitly "
3536 "convert offsets to a 32-bit vector");
3537 check_lsc_vector_size<NElts>();
3538 check_lsc_data_size<T, DS>();
3539 check_cache_hints<cache_action::store, PropertyListT>();
3540 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
3541 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
3542 constexpr uint16_t AddressScale = 1;
3543 constexpr
int ImmOffset = 0;
3551 __esimd_lsc_store_bti<MsgT, L1H, L2H, AddressScale, ImmOffset, EDS, LSCNElts,
3552 Transposed, N>(pred.
data(), ByteOffsets32.data(),
3557 template <
typename T,
int N,
typename AccessorTy>
3558 __ESIMD_API std::enable_if_t<
3559 (std::is_same_v<detail::LocalAccessorMarker, AccessorTy> ||
3560 is_accessor_with_v<AccessorTy, detail::accessor_mode_cap::can_read>),
3566 if constexpr (
sizeof(T) == 8) {
3568 Res.template bit_cast_view<uint32_t>().template select<N, 2>(0) =
3569 gather_impl<uint32_t, N>(acc, offsets, glob_offset, mask);
3570 Res.template bit_cast_view<uint32_t>().template select<N, 2>(1) =
3571 gather_impl<uint32_t, N>(acc, offsets, glob_offset +
sizeof(uint32_t),
3575 using Treal = __raw_t<T>;
3576 constexpr
int TypeSizeLog2 = detail::ElemsPerAddrEncoding<sizeof(T)>();
3578 constexpr uint32_t scale = 0;
3580 if constexpr (
sizeof(T) < 4) {
3581 using Tint = std::conditional_t<std::is_integral_v<T>, T,
3582 detail::uint_type_t<
sizeof(T)>>;
3584 static_assert(std::is_integral<Tint>::value,
3585 "only integral 1- & 2-byte types are supported");
3586 using PromoT =
typename std::conditional_t<std::is_signed<Tint>::value,
3589 __esimd_gather_masked_scaled2<PromoT, N, decltype(si), TypeSizeLog2,
3590 scale>(si, glob_offset, offsets.data(),
3592 auto Res = convert<Tint>(promo_vals);
3594 if constexpr (!std::is_same_v<Tint, T>) {
3595 return detail::bitcast<Treal, Tint, N>(Res.data());
3600 simd<Treal, N> Res = __esimd_gather_masked_scaled2<Treal, N, decltype(si),
3601 TypeSizeLog2, scale>(
3602 si, glob_offset, offsets.data(), mask.
data());
3603 if constexpr (!std::is_same_v<Treal, T>) {
3604 return Res.template bit_cast_view<T>();
3612 #ifndef __ESIMD_FORCE_STATELESS_MEM
3613 template <
typename T,
int N,
int VS,
typename PropertyListT,
lsc_data_size DS,
3614 typename OffsetT,
typename AccessorT>
3615 __ESIMD_API std::enable_if_t<
3616 is_device_accessor_with_v<AccessorT, accessor_mode_cap::can_read>,
3620 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
3621 static_assert(std::is_integral_v<OffsetT>,
3622 "Gather must have integral byte_offset type");
3623 static_assert(
sizeof(OffsetT) <= 4,
3624 "Implicit truncation of 64-bit byte_offset to 32-bit is "
3625 "disabled. Use -fsycl-esimd-force-stateless-mem or explicitly "
3626 "convert offsets to a 32-bit vector");
3627 static_assert(VS == 1 ||
sizeof(T) >= 4,
3628 "VS > 1 is supprted only for 4- and 8-byte elements");
3629 check_lsc_vector_size<VS>();
3630 check_lsc_data_size<T, DS>();
3631 check_cache_hints<cache_action::load, PropertyListT>();
3632 constexpr uint16_t AddressScale = 1;
3633 constexpr
int ImmOffset = 0;
3638 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
3639 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
3641 simd<uint32_t, N / VS> ByteOffsets32 = convert<uint32_t>(byte_offsets);
3642 simd<MsgT, N> PassThruExpanded = lsc_format_input<MsgT>(pass_thru);
3644 __esimd_lsc_load_merge_bti<MsgT, L1H, L2H, AddressScale, ImmOffset, EDS,
3645 LSCVS, Transposed, N / VS>(
3646 pred.
data(), ByteOffsets32.data(), SI, PassThruExpanded.data());
3647 return lsc_format_ret<T>(Result);
3668 template <
typename T,
int NElts, lsc_data_size DS,
int N>
3672 check_lsc_vector_size<NElts>();
3673 check_lsc_data_size<T, DS>();
3674 constexpr uint16_t AddressScale = 1;
3675 constexpr
int ImmOffset = 0;
3683 AddressScale, ImmOffset, EDS, LSCVS,
3684 Transposed, N>(pred.
data(), offsets.data(),
3685 PassThruExpanded.data());
3686 return lsc_format_ret<T>(Result);
3703 template <
typename T,
int NElts, lsc_data_size DS,
int N>
3706 check_lsc_vector_size<NElts>();
3707 check_lsc_data_size<T, DS>();
3708 constexpr uint16_t AddressScale = 1;
3709 constexpr
int ImmOffset = 0;
3716 ImmOffset, EDS, LSCVS, Transposed, N>(
3717 pred.
data(), offsets.data(), Tmp.data());
3735 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
3736 int N,
typename Toffset>
3739 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
3740 check_lsc_vector_size<NElts>();
3741 check_lsc_data_size<T, DS>();
3742 check_cache_hints<cache_action::prefetch, PropertyListT>();
3743 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
3744 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
3745 constexpr uint16_t AddressScale = 1;
3746 constexpr
int ImmOffset = 0;
3752 addrs += convert<uintptr_t>(byte_offsets);
3753 __esimd_lsc_prefetch_stateless<MsgT, L1H, L2H, AddressScale, ImmOffset, EDS,
3754 LSCVS, Transposed, N>(pred.
data(),
3758 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
3760 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
3762 check_lsc_data_size<T, DS>();
3763 check_cache_hints<cache_action::prefetch, PropertyListT>();
3766 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
3768 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
3769 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
3770 "Incorrect alignment for the data type");
3772 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
3773 constexpr
int SmallIntFactor32Bit =
3774 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
3775 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
3776 "Number of elements is not supported by Transposed load");
3782 constexpr
bool Use64BitData =
3783 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
3784 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
3785 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
3786 constexpr
int SmallIntFactor =
3787 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
3788 constexpr
int FactoredNElts = NElts / SmallIntFactor;
3789 check_lsc_vector_size<FactoredNElts>();
3792 using LoadElemT = __ESIMD_DNS::__raw_t<
3793 std::conditional_t<SmallIntFactor == 1, T,
3794 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
3796 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
3797 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
3798 constexpr uint16_t AddressScale = 1;
3799 constexpr
int ImmOffset = 0;
3800 constexpr
lsc_data_size EDS = finalize_data_size<LoadElemT, DS>();
3804 "Transposed prefetch is supported only for data size u32 or u64");
3805 constexpr
lsc_vector_size LSCVS = to_lsc_vector_size<FactoredNElts>();
3807 constexpr
int N = 1;
3810 __esimd_lsc_prefetch_stateless<LoadElemT, L1H, L2H, AddressScale, ImmOffset,
3811 EDS, LSCVS, Transposed, N>(pred.
data(),
3815 #ifndef __ESIMD_FORCE_STATELESS_MEM
3834 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
3835 int N,
typename AccessorTy,
typename OffsetT>
3836 __ESIMD_API std::enable_if_t<
3837 is_device_accessor_with_v<AccessorTy, accessor_mode_cap::can_read>>
3840 static_assert(std::is_integral_v<OffsetT>,
3841 "Prefetch must have integral byte_offset type");
3842 static_assert(
sizeof(OffsetT) <= 4,
3843 "Implicit truncation of 64-bit byte_offset to 32-bit is "
3844 "disabled. Use -fsycl-esimd-force-stateless-mem or explicitly "
3845 "convert offsets to a 32-bit vector");
3846 check_lsc_vector_size<NElts>();
3847 check_lsc_data_size<T, DS>();
3848 check_cache_hints<cache_action::prefetch, PropertyListT>();
3849 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
3850 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
3851 constexpr uint16_t AddressScale = 1;
3852 constexpr
int ImmOffset = 0;
3859 __esimd_lsc_prefetch_bti<MsgT, L1H, L2H, AddressScale, ImmOffset, EDS, LSCVS,
3860 Transposed, N>(pred.
data(), ByteOffsets32.data(),
3881 template <
typename T,
int NElts,
lsc_data_size DS,
typename PropertyListT,
3882 typename AccessorTy,
typename OffsetT>
3883 __ESIMD_API std::enable_if_t<
3884 std::is_integral_v<OffsetT> &&
3885 is_device_accessor_with_v<AccessorTy, accessor_mode_cap::can_read>>
3887 static_assert(
sizeof(OffsetT) <= 4,
3888 "Implicit truncation of 64-bit byte_offset to 32-bit is "
3889 "disabled. Use -fsycl-esimd-force-stateless-mem or explicitly "
3890 "convert offsets to a 32-bit vector");
3891 check_lsc_data_size<T, DS>();
3892 check_cache_hints<cache_action::prefetch, PropertyListT>();
3895 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
3897 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
3898 constexpr
int SmallIntFactor32Bit =
3899 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
3900 static_assert(NElts > 0 && NElts % SmallIntFactor32Bit == 0,
3901 "Number of elements is not supported by Transposed load");
3907 constexpr
bool Use64BitData =
3908 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
3909 (NElts *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
3910 (
sizeof(T) !=
sizeof(uint32_t) || NElts *
sizeof(T) > 256);
3911 constexpr
int SmallIntFactor =
3912 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
3913 constexpr
int FactoredNElts = NElts / SmallIntFactor;
3914 check_lsc_vector_size<FactoredNElts>();
3917 using LoadElemT = __ESIMD_DNS::__raw_t<
3918 std::conditional_t<SmallIntFactor == 1, T,
3919 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
3921 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
3922 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
3923 constexpr uint16_t AddressScale = 1;
3924 constexpr
int ImmOffset = 0;
3925 constexpr
lsc_data_size EDS = finalize_data_size<LoadElemT, DS>();
3929 "Transposed prefetch is supported only for data size u32 or u64");
3930 constexpr
lsc_vector_size LSCVS = to_lsc_vector_size<FactoredNElts>();
3932 constexpr
int N = 1;
3936 __esimd_lsc_prefetch_bti<LoadElemT, L1H, L2H, AddressScale, ImmOffset, EDS,
3937 LSCVS, Transposed, N>(pred.
data(), offsets.data(),
3943 template <
typename T,
int NBlocks,
int Height,
int Width,
bool Transposed,
3946 if constexpr (Transformed)
3947 return roundUpNextMultiple<Height, 4 /
sizeof(T)>() *
3948 getNextPowerOf2<Width>() * NBlocks;
3949 return Width * Height * NBlocks;
3952 #ifndef __ESIMD_DWORD_BLOCK_2D_WIDTH_SCALE
3953 #define __ESIMD_DWORD_BLOCK_2D_WIDTH_SCALE (1)
3956 #ifndef __ESIMD_BLOCK_2D_WIDTH_CHECK
3957 #define __ESIMD_BLOCK_2D_WIDTH_CHECK(OP, BLOCK_WIDTH, NBLOCKS, SIZE) \
3958 static_assert((BLOCK_WIDTH) * (NBLOCKS) * (SIZE) <= 64, \
3959 "Unsupported block width");
3965 template <
typename T,
int BlockWidth,
int BlockHeight,
int NBlocks,
3966 bool Transposed,
bool Transformed, block_2d_op Op>
3967 constexpr
void check_lsc_block_2d_restrictions() {
3968 constexpr
int GRFByteSize = BlockWidth * BlockHeight * NBlocks *
sizeof(T);
3969 static_assert(BlockWidth > 0,
"Block width must be positive");
3970 static_assert(BlockHeight > 0,
"Block height must be positive");
3972 if constexpr (Op == block_2d_op::store)
3973 static_assert(GRFByteSize <= 512,
"2D store supports 512 bytes max");
3975 static_assert(GRFByteSize <= 2048,
3976 "2D load/prefetch supports 2048 bytes max");
3977 static_assert(!Transposed || !Transformed,
3978 "Transposed and transformed is not supported");
3979 static_assert((
sizeof(T) * BlockWidth) % 4 == 0,
3980 "Block width must be aligned by DW");
3981 if constexpr (Transposed) {
3982 static_assert(NBlocks == 1,
"Transposed expected to be 1 block only");
3983 static_assert(
sizeof(T) == 4 ||
sizeof(T) == 8,
3984 "Transposed load is supported only for data size u32 or u64");
3985 static_assert(
sizeof(T) == 8 ? BlockHeight == 8
3986 : BlockHeight >= 1 && BlockHeight <= 32,
3987 "Unsupported block height");
3988 static_assert(
sizeof(T) == 8
3990 : BlockWidth >= 1 &&
3992 8 * __ESIMD_DWORD_BLOCK_2D_WIDTH_SCALE,
3993 "Unsupported block width");
3994 }
else if constexpr (Transformed) {
3995 static_assert(
sizeof(T) == 1 ||
sizeof(T) == 2,
3996 "VNNI transform is supported only for data size u8 or u16");
3998 "Unsupported number of blocks");
3999 static_assert(BlockHeight *
sizeof(T) >= 4 && BlockHeight <= 32,
4000 "Unsupported block height");
4001 static_assert(BlockWidth *
sizeof(T) >= 4 && BlockWidth <= 16 &&
4002 BlockWidth * NBlocks *
sizeof(T) <= 64,
4003 "Unsupported block width");
4005 if constexpr (Op == block_2d_op::store) {
4006 static_assert(NBlocks == 1,
"Unsupported number of blocks for 2D store");
4007 static_assert(BlockHeight <= 8,
"Unsupported block height for store");
4011 "Unsupported number of blocks for 2D load/prefetch");
4012 static_assert(BlockHeight <= 32,
"Unsupported block height for load");
4014 static_assert(BlockWidth *
sizeof(T) >= 4,
"Unsupported block width");
4015 __ESIMD_BLOCK_2D_WIDTH_CHECK(Op, BlockWidth, NBlocks,
sizeof(T));
4018 #undef __ESIMD_DWORD_BLOCK_2D_WIDTH_SCALE
4019 #undef __ESIMD_BLOCK_2D_WIDTH_CHECK
4052 typename T,
int BlockWidth,
int BlockHeight,
int NBlocks,
bool Transposed,
4053 bool Transformed,
typename PropertyListT,
4054 int N = get_lsc_block_2d_data_size<__raw_t<T>, NBlocks, BlockHeight,
4055 BlockWidth, Transposed, Transformed>()>
4056 __ESIMD_API
simd<T, N> load_2d_impl(
const T *Ptr,
unsigned SurfaceWidth,
4057 unsigned SurfaceHeight,
4058 unsigned SurfacePitch,
int X,
int Y) {
4060 check_cache_hints<cache_action::load, PropertyListT>();
4061 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
4062 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
4063 using RawT = __raw_t<T>;
4064 check_lsc_block_2d_restrictions<RawT, BlockWidth, BlockHeight, NBlocks,
4065 Transposed, Transformed, block_2d_op::load>();
4071 constexpr
int ElemsPerDword = 4 /
sizeof(RawT);
4072 constexpr
int GRFRowSize = Transposed ? BlockHeight
4073 : Transformed ? BlockWidth * ElemsPerDword
4075 constexpr
int GRFRowPitch = getNextPowerOf2<GRFRowSize>();
4076 constexpr
int GRFColSize =
4079 : (Transformed ? (BlockHeight + ElemsPerDword - 1) / ElemsPerDword
4081 constexpr
int GRFBlockSize = GRFRowPitch * GRFColSize;
4082 constexpr
int GRFBlockPitch =
4083 roundUpNextMultiple<64 /
sizeof(RawT), GRFBlockSize>();
4084 constexpr
int ActualN = NBlocks * GRFBlockPitch;
4086 constexpr
int DstBlockElements = GRFColSize * GRFRowSize;
4087 constexpr
int DstElements = DstBlockElements * NBlocks;
4089 static_assert(N == ActualN || N == DstElements,
"Incorrect element count");
4092 finalize_data_size<RawT, lsc_data_size::default_size>();
4093 uintptr_t Addr =
reinterpret_cast<uintptr_t
>(Ptr);
4097 __esimd_lsc_load2d_stateless<RawT, L1H, L2H, DS, Transpose, NBlocks,
4098 BlockWidth, BlockHeight, Transformed,
4099 ActualN>(Mask.
data(), Addr, SurfaceWidth,
4100 SurfaceHeight, SurfacePitch, X, Y);
4102 if constexpr (ActualN == N) {
4124 for (
auto i = 0; i < NBlocks; i++) {
4126 Dst.template select<DstBlockElements, 1>(i * DstBlockElements);
4128 auto RawBlock = Raw.template select<GRFBlockSize, 1>(i * GRFBlockPitch);
4130 RawBlock.template bit_cast_view<RawT, GRFColSize, GRFRowPitch>()
4131 .template select<GRFColSize, 1, GRFRowSize, 1>(0, 0)
4132 .template bit_cast_view<RawT>();
4161 template <
typename T,
int BlockWidth,
int BlockHeight,
int NBlocks,
4162 typename PropertyListT,
4163 int N = get_lsc_block_2d_data_size<__raw_t<T>, NBlocks, BlockHeight,
4166 __ESIMD_API
void prefetch_2d_impl(
const T *Ptr,
unsigned SurfaceWidth,
4167 unsigned SurfaceHeight,
unsigned SurfacePitch,
4169 using RawT = __raw_t<T>;
4170 check_cache_hints<cache_action::prefetch, PropertyListT>();
4171 check_lsc_block_2d_restrictions<RawT, BlockWidth, BlockHeight, NBlocks,
false,
4173 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
4174 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
4176 finalize_data_size<RawT, lsc_data_size::default_size>();
4177 uintptr_t Addr =
reinterpret_cast<uintptr_t
>(Ptr);
4180 __esimd_lsc_prefetch2d_stateless<RawT, L1H, L2H, DS, Transpose, NBlocks,
4181 BlockWidth, BlockHeight,
false, N>(
4182 Mask.
data(), Addr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
4209 template <
typename T,
int BlockWidth,
int BlockHeight,
typename PropertyListT,
4211 __raw_t<T>, 1u, BlockHeight, BlockWidth,
false ,
4213 __ESIMD_API
void store_2d_impl(T *Ptr,
unsigned SurfaceWidth,
4214 unsigned SurfaceHeight,
unsigned SurfacePitch,
4216 using RawT = __raw_t<T>;
4219 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
4220 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
4221 check_lsc_block_2d_restrictions<RawT, BlockWidth, BlockHeight, 1,
false,
4222 false, block_2d_op::store>();
4224 finalize_data_size<RawT, lsc_data_size::default_size>();
4225 uintptr_t Addr =
reinterpret_cast<uintptr_t
>(Ptr);
4228 constexpr
int Pitch = getNextPowerOf2<BlockWidth>();
4229 constexpr
int NElts = BlockHeight * Pitch;
4233 if constexpr (NElts == N) {
4238 auto Data2D = Vals.template bit_cast_view<RawT, BlockHeight, BlockWidth>();
4239 auto Raw2D = Raw.template bit_cast_view<RawT, BlockHeight, Pitch>();
4240 Raw2D.template select<BlockHeight, 1, BlockWidth, 1>(0, 0) = Data2D;
4243 __esimd_lsc_store2d_stateless<RawT, L1H, L2H, DS, Transpose, 1u, BlockWidth,
4244 BlockHeight,
false, NElts>(
4245 Mask.
data(), Addr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y,
4283 template <
typename T,
int N,
typename AccessorT>
4285 std::enable_if_t<detail::is_device_accessor_with_v<
4286 AccessorT, detail::accessor_mode_cap::can_read>,
4290 #ifdef __ESIMD_FORCE_STATELESS_MEM
4291 return gather<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset),
4292 byte_offsets, mask);
4297 byte_offsets += glob_offset;
4301 acc, byte_offsets, mask, PassThru);
4303 return detail::gather_impl<T, N>(acc, byte_offsets, glob_offset, mask);
4321 template <
typename T,
int N,
typename AccessorT>
4323 std::enable_if_t<detail::is_device_accessor_with_v<
4324 AccessorT, detail::accessor_mode_cap::can_read>,
4328 return gather<T, N>(acc, ByteOffsets, glob_offset);
4331 #ifdef __ESIMD_FORCE_STATELESS_MEM
4332 template <
typename T,
int N,
typename AccessorTy,
typename Toffset>
4333 __ESIMD_API std::enable_if_t<
4334 detail::is_device_accessor_with_v<AccessorTy,
4335 detail::accessor_mode_cap::can_read> &&
4336 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
4340 return gather<T, N>(acc, convert<uint64_t>(offsets), glob_offset, mask);
4417 typename T,
int N,
int VS,
typename AccessorT,
typename OffsetT,
4419 __ESIMD_API std::enable_if_t<
4420 (detail::is_device_accessor_with_v<AccessorT,
4421 detail::accessor_mode_cap::can_read> &&
4422 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4426 #ifdef __ESIMD_FORCE_STATELESS_MEM
4427 return gather<T, N, VS>(detail::accessorToPointer<T>(acc), byte_offsets, mask,
4432 acc, byte_offsets, mask, pass_thru);
4466 typename T,
int N,
int VS,
typename AccessorT,
typename OffsetT,
4468 __ESIMD_API std::enable_if_t<
4469 (detail::is_device_accessor_with_v<AccessorT,
4470 detail::accessor_mode_cap::can_read> &&
4471 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4475 #ifdef __ESIMD_FORCE_STATELESS_MEM
4476 return gather<T, N, VS>(detail::accessorToPointer<T>(acc), byte_offsets, mask,
4480 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
4482 "gather() requires at least element-size alignment");
4484 if constexpr (detail::has_cache_hints<PropertyListT>() || VS > 1 ||
4489 acc, byte_offsets, mask, PassThru);
4491 return detail::gather_impl<T, N>(acc, byte_offsets, 0, mask);
4520 typename T,
int N,
int VS,
typename AccessorT,
typename OffsetT,
4522 __ESIMD_API std::enable_if_t<
4523 (detail::is_device_accessor_with_v<AccessorT,
4524 detail::accessor_mode_cap::can_read> &&
4525 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4528 PropertyListT props = {}) {
4530 return gather<T, N, VS>(acc, byte_offsets, Mask, props);
4547 typename T,
int N,
typename AccessorT,
typename OffsetT,
typename MaskT,
4549 __ESIMD_API std::enable_if_t<
4550 (detail::is_device_accessor_with_v<AccessorT,
4551 detail::accessor_mode_cap::can_read> &&
4552 std::is_same_v<MaskT, simd_mask<N>> &&
4553 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4556 simd<T, N> pass_thru, PropertyListT props = {}) {
4557 return gather<T, N, 1>(acc, byte_offsets, mask, pass_thru, props);
4572 typename T,
int N,
typename AccessorT,
typename OffsetT,
typename MaskT,
4574 __ESIMD_API std::enable_if_t<
4575 (detail::is_device_accessor_with_v<AccessorT,
4576 detail::accessor_mode_cap::can_read> &&
4577 std::is_same_v<MaskT, simd_mask<N>> &&
4578 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4581 PropertyListT props = {}) {
4582 return gather<T, N, 1>(acc, byte_offsets, mask, props);
4593 typename T,
int N,
typename AccessorT,
typename OffsetT,
4595 __ESIMD_API std::enable_if_t<
4596 (detail::is_device_accessor_with_v<AccessorT,
4597 detail::accessor_mode_cap::can_read> &&
4598 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4601 return gather<T, N, 1>(acc, byte_offsets, props);
4613 typename T,
int N,
int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
4615 __ESIMD_API std::enable_if_t<
4616 (detail::is_device_accessor_with_v<AccessorT,
4617 detail::accessor_mode_cap::can_read> &&
4618 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4619 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4622 simd<T, N> pass_thru, PropertyListT props = {}) {
4623 return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
4634 int VS,
typename T,
int N,
typename AccessorT,
typename OffsetSimdViewT,
4636 __ESIMD_API std::enable_if_t<
4637 (detail::is_device_accessor_with_v<AccessorT,
4638 detail::accessor_mode_cap::can_read> &&
4639 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4640 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4643 simd<T, N> pass_thru, PropertyListT props = {}) {
4644 static_assert(N / VS ==
4645 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
4646 "Size of pass_thru parameter must correspond to the size of "
4647 "byte_offsets parameter.");
4648 return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
4662 int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
4663 typename PassThruSimdViewT,
4664 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
4665 typename T = PassThruSimdViewT::value_type::element_type,
4667 __ESIMD_API std::enable_if_t<
4668 (detail::is_device_accessor_with_v<AccessorT,
4669 detail::accessor_mode_cap::can_read> &&
4670 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4671 detail::is_simd_view_type_v<PassThruSimdViewT> &&
4672 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4675 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
4676 static_assert(N / VS ==
4677 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
4678 "Size of pass_thru parameter must correspond to the size of "
4679 "byte_offsets parameter.");
4680 return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru.read(),
4695 int VS = 1,
typename AccessorT,
typename OffsetT,
4696 typename PassThruSimdViewT,
4697 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
4698 typename T = PassThruSimdViewT::value_type::element_type,
4700 __ESIMD_API std::enable_if_t<
4701 (detail::is_device_accessor_with_v<AccessorT,
4702 detail::accessor_mode_cap::can_read> &&
4703 detail::is_simd_view_type_v<PassThruSimdViewT> &&
4704 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4708 PropertyListT props = {}) {
4709 return gather<T, N, VS>(acc, byte_offsets, mask, pass_thru.read(), props);
4721 typename T,
int N,
int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
4723 __ESIMD_API std::enable_if_t<
4724 (detail::is_device_accessor_with_v<AccessorT,
4725 detail::accessor_mode_cap::can_read> &&
4726 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4727 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4730 PropertyListT props = {}) {
4731 return gather<T, N, VS>(acc, byte_offsets.read(), mask, props);
4742 typename T,
int N,
int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
4744 __ESIMD_API std::enable_if_t<
4745 (detail::is_device_accessor_with_v<AccessorT,
4746 detail::accessor_mode_cap::can_read> &&
4747 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4748 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4750 gather(AccessorT acc, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
4751 return gather<T, N, VS>(acc, byte_offsets.read(), props);
4806 typename T,
int N,
int VS = 1,
typename AccessorTy,
typename OffsetT,
4808 __ESIMD_API std::enable_if_t<
4809 detail::is_device_accessor_with_v<AccessorTy,
4810 detail::accessor_mode_cap::can_write> &&
4811 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
4814 #ifdef __ESIMD_FORCE_STATELESS_MEM
4815 scatter<T, N, VS>(__ESIMD_DNS::accessorToPointer<T>(acc), byte_offsets, vals,
4819 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
4821 "gather() requires at least element-size alignment");
4823 if constexpr (detail::has_cache_hints<PropertyListT>() || VS > 1 ||
4826 PropertyListT>(acc, byte_offsets, vals, mask);
4828 detail::scatter_impl<T, N, AccessorTy>(acc, vals, byte_offsets, 0, mask);
4853 typename T,
int N,
int VS = 1,
typename AccessorTy,
typename OffsetT,
4855 __ESIMD_API std::enable_if_t<
4856 detail::is_device_accessor_with_v<AccessorTy,
4857 detail::accessor_mode_cap::can_write> &&
4858 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
4860 PropertyListT props = {}) {
4862 scatter<T, N, VS>(acc, byte_offsets, vals, Mask, props);
4891 typename T,
int N,
int VS = 1,
typename AccessorTy,
4892 typename OffsetSimdViewT,
4894 __ESIMD_API std::enable_if_t<
4895 detail::is_device_accessor_with_v<AccessorTy,
4896 detail::accessor_mode_cap::can_write> &&
4897 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4898 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
4901 scatter<T, N, VS>(acc, byte_offsets.read(), vals, mask, props);
4930 int VS,
typename AccessorTy,
typename T,
int N,
typename OffsetSimdViewT,
4932 __ESIMD_API std::enable_if_t<
4933 detail::is_device_accessor_with_v<AccessorTy,
4934 detail::accessor_mode_cap::can_write> &&
4935 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4936 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
4939 static_assert(N / VS ==
4940 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
4941 "Size of vals parameter must correspond to the size of "
4942 "byte_offsets parameter.");
4943 scatter<T, N, VS>(acc, byte_offsets.read(), vals, mask, props);
4967 int VS,
typename AccessorTy,
typename T,
int N,
typename OffsetSimdViewT,
4969 __ESIMD_API std::enable_if_t<
4970 detail::is_device_accessor_with_v<AccessorTy,
4971 detail::accessor_mode_cap::can_write> &&
4972 detail::is_simd_view_type_v<OffsetSimdViewT> &&
4973 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
4975 PropertyListT props = {}) {
4976 static_assert(N / VS ==
4977 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
4978 "Size of vals parameter must correspond to the size of "
4979 "byte_offsets parameter.");
4980 scatter<T, N, VS>(acc, byte_offsets.read(), vals, props);
5012 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
5013 typename OffsetSimdViewT,
5014 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
5015 typename T = ValuesSimdViewT::value_type::element_type,
5017 __ESIMD_API std::enable_if_t<
5018 detail::is_device_accessor_with_v<AccessorTy,
5019 detail::accessor_mode_cap::can_write> &&
5020 detail::is_simd_view_type_v<OffsetSimdViewT> &&
5021 detail::is_simd_view_type_v<ValuesSimdViewT> &&
5022 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
5023 scatter(AccessorTy acc, OffsetSimdViewT byte_offsets, ValuesSimdViewT vals,
5025 static_assert(N / VS ==
5026 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
5027 "Size of vals parameter must correspond to the size of "
5028 "byte_offsets parameter.");
5029 scatter<T, N, VS>(acc, byte_offsets.read(), vals.read(), mask, props);
5056 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
5057 typename OffsetSimdViewT,
5058 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
5059 typename T = ValuesSimdViewT::value_type::element_type,
5061 __ESIMD_API std::enable_if_t<
5062 detail::is_device_accessor_with_v<AccessorTy,
5063 detail::accessor_mode_cap::can_write> &&
5064 detail::is_simd_view_type_v<OffsetSimdViewT> &&
5065 detail::is_simd_view_type_v<ValuesSimdViewT> &&
5066 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
5067 scatter(AccessorTy acc, OffsetSimdViewT byte_offsets, ValuesSimdViewT vals,
5068 PropertyListT props = {}) {
5069 static_assert(N / VS ==
5070 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
5071 "Size of vals parameter must correspond to the size of "
5072 "byte_offsets parameter.");
5073 scatter<T, N, VS>(acc, byte_offsets.read(), vals.read(), props);
5105 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
typename OffsetT,
5106 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
5107 typename T = ValuesSimdViewT::value_type::element_type,
5109 __ESIMD_API std::enable_if_t<
5110 detail::is_device_accessor_with_v<AccessorTy,
5111 detail::accessor_mode_cap::can_write> &&
5112 detail::is_simd_view_type_v<ValuesSimdViewT> &&
5113 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
5116 PropertyListT props = {}) {
5117 scatter<T, N, VS>(acc, byte_offsets, vals.read(), mask, props);
5144 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
typename OffsetT,
5145 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
5146 typename T = ValuesSimdViewT::value_type::element_type,
5148 __ESIMD_API std::enable_if_t<
5149 detail::is_device_accessor_with_v<AccessorTy,
5150 detail::accessor_mode_cap::can_write> &&
5151 detail::is_simd_view_type_v<ValuesSimdViewT> &&
5152 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
5154 ValuesSimdViewT vals, PropertyListT props = {}) {
5155 scatter<T, N, VS>(acc, byte_offsets, vals.read(), props);
5179 typename T,
int N,
int VS = 1,
typename AccessorTy,
5180 typename OffsetSimdViewT,
5182 __ESIMD_API std::enable_if_t<
5183 detail::is_device_accessor_with_v<AccessorTy,
5184 detail::accessor_mode_cap::can_write> &&
5185 detail::is_simd_view_type_v<OffsetSimdViewT> &&
5186 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
5188 PropertyListT props = {}) {
5190 scatter<T, N, VS>(acc, byte_offsets.read(), vals, Mask, props);
5209 template <
typename T,
int N,
typename AccessorTy>
5212 detail::is_device_accessor_with_v<
5213 AccessorTy, detail::accessor_mode_cap::can_write>>
5217 offsets += glob_offset;
5218 scatter<T, N>(acc, offsets, vals, mask);
5221 template <
typename T,
int N,
typename AccessorTy>
5224 detail::is_device_accessor_with_v<
5225 AccessorTy, detail::accessor_mode_cap::can_write>>
5229 scatter<T, N>(acc, ByteOffsets, vals, glob_offset, mask);
5232 #ifdef __ESIMD_FORCE_STATELESS_MEM
5233 template <
typename T,
int N,
typename AccessorTy,
typename Toffset>
5234 __ESIMD_API std::enable_if_t<
5235 detail::is_device_accessor_with_v<AccessorTy,
5236 detail::accessor_mode_cap::can_write> &&
5237 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
5240 scatter<T, N, AccessorTy>(acc, convert<uint64_t>(offsets), vals, glob_offset,
5252 template <
typename T,
typename AccessorTy>
5256 gather<T, 1, AccessorTy>(acc,
simd<decltype(offset), 1>(offset));
5267 template <
typename T,
typename AccessorTy>
5270 scatter<T, 1, AccessorTy>(acc,
simd<decltype(offset), 1>(offset),
5308 int N,
typename Toffset>
5311 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
5312 static_assert((N == 8 || N == 16 || N == 32),
"Unsupported value of N");
5313 static_assert(
sizeof(T) == 4,
"Unsupported size of type T");
5316 addrs = addrs + offsets_i;
5317 return __esimd_svm_gather4_scaled<detail::__raw_t<T>, N, RGBAMask>(
5337 int N,
typename OffsetSimdViewT,
typename RegionTy>
5338 __ESIMD_API std::enable_if_t<detail::is_simd_view_type_v<OffsetSimdViewT>,
5341 return gather_rgba<RGBAMask, T, N>(p, offsets.read(), mask);
5360 int N,
typename Toffset>
5361 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
5371 (M == CM::ABGR || M == CM::BGR || M == CM::GR || M == CM::R) &&
5372 "Only ABGR, BGR, GR, R channel masks are valid in write operations");
5398 int N,
typename Toffset>
5403 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
5404 static_assert((N == 8 || N == 16 || N == 32),
"Unsupported value of N");
5405 static_assert(
sizeof(T) == 4,
"Unsupported size of type T");
5406 detail::validate_rgba_write_channel_mask<RGBAMask>();
5409 addrs = addrs + offsets_i;
5410 __esimd_svm_scatter4_scaled<detail::__raw_t<T>, N, RGBAMask>(
5411 addrs.
data(), vals.data(), mask.
data());
5430 int N,
typename OffsetSimdViewT,
typename RegionTy>
5431 __ESIMD_API std::enable_if_t<detail::is_simd_view_type_v<OffsetSimdViewT>>
5435 scatter_rgba<RGBAMask, T, N>(p, offsets.read(), vals, mask);
5454 int N,
typename Toffset>
5455 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
5485 typename AccessorT,
int N,
5488 std::enable_if_t<((N == 8 || N == 16 || N == 32) &&
sizeof(T) == 4 &&
5489 detail::is_device_accessor_with_v<
5490 AccessorT, detail::accessor_mode_cap::can_read>),
5495 #ifdef __ESIMD_FORCE_STATELESS_MEM
5496 return gather_rgba<RGBAMask>(
5497 __ESIMD_DNS::accessorToPointer<T>(acc, global_offset), offsets, mask);
5500 constexpr uint32_t Scale = 0;
5502 return __esimd_gather4_masked_scaled2<detail::__raw_t<T>, N, RGBAMask,
5503 decltype(SI), Scale>(
5504 SI, global_offset, offsets.
data(), mask.
data());
5508 #ifdef __ESIMD_FORCE_STATELESS_MEM
5510 typename AccessorT,
int N,
5512 __ESIMD_API std::enable_if_t<
5513 ((N == 8 || N == 16 || N == 32) &&
sizeof(T) == 4 &&
5514 detail::is_device_accessor_with_v<AccessorT,
5515 detail::accessor_mode_cap::can_read> &&
5516 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>),
5520 return gather_rgba<RGBAMask, AccessorT, N, T>(acc, convert<uint64_t>(offsets),
5521 global_offset, mask);
5540 typename AccessorT,
int N,
5543 std::enable_if_t<(N == 8 || N == 16 || N == 32) &&
sizeof(T) == 4 &&
5544 detail::is_device_accessor_with_v<
5545 AccessorT, detail::accessor_mode_cap::can_write>>
5550 detail::validate_rgba_write_channel_mask<RGBAMask>();
5551 #ifdef __ESIMD_FORCE_STATELESS_MEM
5552 scatter_rgba<RGBAMask>(__ESIMD_DNS::accessorToPointer<T>(acc, global_offset),
5553 offsets, vals, mask);
5556 constexpr uint32_t Scale = 0;
5558 __esimd_scatter4_scaled<T, N, decltype(SI), RGBAMask, Scale>(
5559 mask.
data(), SI, global_offset, offsets.
data(), vals.data());
5563 #ifdef __ESIMD_FORCE_STATELESS_MEM
5565 typename AccessorT,
int N,
5567 __ESIMD_API std::enable_if_t<
5568 (N == 8 || N == 16 || N == 32) &&
sizeof(T) == 4 &&
5569 detail::is_device_accessor_with_v<AccessorT,
5570 detail::accessor_mode_cap::can_write> &&
5571 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
5575 scatter_rgba<RGBAMask, AccessorT, N, T>(acc, convert<uint64_t>(offsets), vals,
5576 global_offset, mask);
5583 #ifndef __ESIMD_FP_ATOMIC_OP_TYPE_CHECK
5584 #define __ESIMD_FP_ATOMIC_OP_TYPE_CHECK(T) \
5585 static_assert(is_type<T, float, sycl::half, double>(), \
5586 "float, double or sycl::half type is expected");
5595 static_assert(
sizeof(T) > 1,
"Unsupported data type");
5598 if constexpr (!IsLSC)
5600 "Execution size 1, 2, 4, 8, 16, 32 are supported");
5602 static_assert(NumSrc == __ESIMD_DNS::get_num_args<Op>(),
5603 "Wrong number of operands");
5604 constexpr
bool IsInt2BytePlus =
5605 std::is_integral_v<T> && (
sizeof(T) >=
sizeof(uint16_t));
5607 if constexpr (Op == __ESIMD_NS::atomic_op::xchg ||
5608 Op == __ESIMD_NS::atomic_op::cmpxchg ||
5609 Op == __ESIMD_NS::atomic_op::inc ||
5612 static_assert(IsInt2BytePlus,
"Integral 16-bit or wider type is expected");
5617 Op == __ESIMD_NS::atomic_op::fadd ||
5618 Op == __ESIMD_NS::atomic_op::fsub ||
5619 Op == __ESIMD_NS::atomic_op::fcmpxchg) {
5623 Op == __ESIMD_NS::atomic_op::sub ||
5629 Op == __ESIMD_NS::atomic_op::smin ||
5630 Op == __ESIMD_NS::atomic_op::smax) {
5631 static_assert(IsInt2BytePlus,
"Integral 16-bit or wider type is expected");
5632 constexpr
bool IsSignedMinmax = (Op == __ESIMD_NS::atomic_op::smin) ||
5633 (Op == __ESIMD_NS::atomic_op::smax);
5637 if constexpr (IsSignedMinmax || IsUnsignedMinmax) {
5638 constexpr
bool SignOK = std::is_signed_v<T> == IsSignedMinmax;
5639 static_assert(SignOK,
"Signed/unsigned integer type expected for "
5640 "signed/unsigned min/max operation");
5644 #undef __ESIMD_FP_ATOMIC_OP_TYPE_CHECK
5693 template <u
int32_t SLMSize> __ESIMD_API
void slm_init() {
5694 __esimd_slm_init(SLMSize);
5705 __ESIMD_API
void slm_init(uint32_t size) { __esimd_slm_init(size); }
5749 #ifndef __ESIMD_GATHER_SCATTER_LLVM_IR
5774 typename T,
int N,
int VS,
5776 __ESIMD_API std::enable_if_t<
5777 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
5779 simd<T, N> pass_thru, PropertyListT props = {}) {
5780 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
5783 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
5785 "slm_gather() requires at least element-size alignment");
5790 return __ESIMD_DNS::slm_gather_impl<T, VS,
5792 byte_offsets, mask, pass_thru);
5794 if constexpr (
sizeof(T) == 8) {
5796 Res.template bit_cast_view<uint32_t>().template select<N, 2>(0) =
5797 __esimd_slm_gather_ld<uint32_t, N, Alignment>(
5799 (pass_thru.template bit_cast_view<uint32_t>()
5800 .template select<N, 2>(0))
5802 simd<uint32_t, N / VS> Offset = byte_offsets +
sizeof(uint32_t);
5803 Res.template bit_cast_view<uint32_t>().template select<N, 2>(1) =
5804 __esimd_slm_gather_ld<uint32_t, N, sizeof(uint32_t)>(
5805 Offset.data(), mask.
data(),
5806 (pass_thru.template bit_cast_view<uint32_t>()
5807 .template select<N, 2>(1))
5811 using MsgT = detail::__raw_t<T>;
5812 return __esimd_slm_gather_ld<MsgT, N, Alignment>(
5842 typename T,
int N,
int VS,
5844 __ESIMD_API std::enable_if_t<
5845 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
5847 PropertyListT props = {}) {
5849 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
5851 "slm_gather() requires at least element-size alignment");
5856 return detail::slm_gather_impl<T, VS, detail::lsc_data_size::default_size>(
5857 byte_offsets, mask, PassThru);
5859 if constexpr (
sizeof(T) == 8) {
5863 Res.template bit_cast_view<uint32_t>().template select<N, 2>(0) =
5864 __esimd_slm_gather_ld<uint32_t, N, Alignment>(
5865 byte_offsets.
data(), mask.
data(), PassThru.data());
5866 simd<uint32_t, N / VS> Offset = byte_offsets +
sizeof(uint32_t);
5867 Res.template bit_cast_view<uint32_t>().template select<N, 2>(1) =
5868 __esimd_slm_gather_ld<uint32_t, N, sizeof(uint32_t)>(
5869 Offset.data(), mask.
data(), PassThru.data());
5872 using MsgT = detail::__raw_t<T>;
5874 return __esimd_slm_gather_ld<MsgT, N, Alignment>(
5875 byte_offsets.
data(), mask.
data(), PassThru.data());
5878 detail::LocalAccessorMarker acc;
5879 return detail::gather_impl<T, N>(acc, byte_offsets, 0, mask);
5901 typename T,
int N,
int VS,
5903 __ESIMD_API std::enable_if_t<
5904 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
5907 return slm_gather<T, N, VS>(byte_offsets, Mask, props);
5935 __ESIMD_API std::enable_if_t<
5936 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
5938 simd<T, N> pass_thru, PropertyListT props = {}) {
5939 constexpr
int VS = 1;
5940 return slm_gather<T, N, VS>(byte_offsets, mask, pass_thru, props);
5965 __ESIMD_API std::enable_if_t<
5966 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
5968 PropertyListT props = {}) {
5969 constexpr
int VS = 1;
5970 return slm_gather<T, N, VS>(byte_offsets, mask, props);
5990 __ESIMD_API std::enable_if_t<
5991 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
5993 constexpr
int VS = 1;
5994 return slm_gather<T, N, VS>(byte_offsets, props);
6024 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
6026 __ESIMD_API std::enable_if_t<
6027 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6028 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6031 simd<T, N> pass_thru, PropertyListT props = {}) {
6032 return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru, props);
6062 int VS,
typename T,
int N,
typename OffsetSimdViewT,
6064 __ESIMD_API std::enable_if_t<
6065 (detail::is_simd_view_type_v<OffsetSimdViewT> &&
6066 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
6069 simd<T, N> pass_thru, PropertyListT props = {}) {
6070 static_assert(N / VS ==
6071 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
6072 "Size of pass_thru parameter must correspond to the size of "
6073 "byte_offsets parameter.");
6074 return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru, props);
6107 int VS = 1,
typename OffsetSimdViewT,
typename PassThruSimdViewT,
6108 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
6109 typename T = PassThruSimdViewT::value_type::element_type,
6111 __ESIMD_API std::enable_if_t<
6112 (detail::is_simd_view_type_v<OffsetSimdViewT> &&
6113 detail::is_simd_view_type_v<PassThruSimdViewT> &&
6114 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
6117 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
6118 static_assert(N / VS ==
6119 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
6120 "Size of pass_thru parameter must correspond to the size of "
6121 "byte_offsets parameter.");
6122 return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru.read(),
6156 int VS = 1,
typename PassThruSimdViewT,
6157 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
6158 typename T = PassThruSimdViewT::value_type::element_type,
6160 __ESIMD_API std::enable_if_t<
6161 (detail::is_simd_view_type_v<PassThruSimdViewT> &&
6162 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
6165 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
6166 return slm_gather<T, N, VS>(byte_offsets, mask, pass_thru.read(), props);
6191 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
6193 __ESIMD_API std::enable_if_t<
6194 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6195 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6198 PropertyListT props = {}) {
6199 return slm_gather<T, N, VS>(byte_offsets.read(), mask, props);
6219 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
6221 __ESIMD_API std::enable_if_t<
6222 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6223 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6225 slm_gather(OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
6226 return slm_gather<T, N, VS>(byte_offsets.read(), props);
6280 typename T,
int N,
int VS = 1,
6282 __ESIMD_API std::enable_if_t<
6283 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6286 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
6289 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(T));
6291 "slm_scatter() requires at least element-size alignment");
6296 __ESIMD_DNS::slm_scatter_impl<T, VS, detail::lsc_data_size::default_size>(
6297 byte_offsets, vals, mask);
6299 if constexpr (
sizeof(T) == 8) {
6300 __esimd_slm_scatter_st<uint32_t, N, Alignment>(
6301 vals.template bit_cast_view<uint32_t>()
6302 .template select<N, 2>(0)
6305 simd<uint32_t, N / VS> Offset = byte_offsets +
sizeof(uint32_t);
6306 __esimd_slm_scatter_st<uint32_t, N, sizeof(uint32_t)>(
6307 vals.template bit_cast_view<uint32_t>()
6308 .template select<N, 2>(1)
6310 Offset.data(), mask.
data());
6313 using MsgT = detail::__raw_t<T>;
6314 __esimd_slm_scatter_st<MsgT, N, Alignment>(
6319 detail::LocalAccessorMarker acc;
6320 detail::scatter_impl<T, N>(acc, vals, byte_offsets, 0, mask);
6341 typename T,
int N,
int VS = 1,
6343 __ESIMD_API std::enable_if_t<
6344 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6346 PropertyListT props = {}) {
6348 slm_scatter<T, N, VS>(byte_offsets, vals, Mask, props);
6375 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
6377 __ESIMD_API std::enable_if_t<
6378 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6379 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6382 slm_scatter<T, N, VS>(byte_offsets.read(), vals, mask, props);
6401 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
6403 __ESIMD_API std::enable_if_t<
6404 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6405 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6407 PropertyListT props = {}) {
6408 return slm_scatter<T, N, VS>(byte_offsets.read(), vals, props);
6429 int VS,
typename T,
int N,
typename OffsetSimdViewT,
6431 __ESIMD_API std::enable_if_t<
6432 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6433 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6436 static_assert(N / VS ==
6437 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
6438 "Size of vals parameter must correspond to the size of "
6439 "byte_offsets parameter.");
6440 slm_scatter<T, N, VS>(byte_offsets.read(), vals, mask, props);
6461 int VS,
typename T,
int N,
typename OffsetSimdViewT,
6463 __ESIMD_API std::enable_if_t<
6464 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6465 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6467 PropertyListT props = {}) {
6468 static_assert(N / VS ==
6469 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
6470 "Size of vals parameter must correspond to the size of "
6471 "byte_offsets parameter.");
6472 slm_scatter<T, N, VS>(byte_offsets.read(), vals, props);
6496 int VS = 1,
typename ValuesSimdViewT,
typename OffsetSimdViewT,
6497 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
6498 typename T = ValuesSimdViewT::value_type::element_type,
6500 __ESIMD_API std::enable_if_t<
6501 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6502 detail::is_simd_view_type_v<ValuesSimdViewT> &&
6503 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6506 static_assert(N / VS ==
6507 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
6508 "Size of vals parameter must correspond to the size of "
6509 "byte_offsets parameter.");
6510 slm_scatter<T, N, VS>(byte_offsets.read(), vals.read(), mask, props);
6533 int VS = 1,
typename ValuesSimdViewT,
typename OffsetSimdViewT,
6534 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
6535 typename T = ValuesSimdViewT::value_type::element_type,
6537 __ESIMD_API std::enable_if_t<
6538 detail::is_simd_view_type_v<OffsetSimdViewT> &&
6539 detail::is_simd_view_type_v<ValuesSimdViewT> &&
6540 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6542 PropertyListT props = {}) {
6543 static_assert(N / VS ==
6544 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
6545 "Size of vals parameter must correspond to the size of "
6546 "byte_offsets parameter.");
6547 slm_scatter<T, N, VS>(byte_offsets.read(), vals.read(), props);
6571 int VS = 1,
typename ValuesSimdViewT,
typename OffsetT,
6572 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
6573 typename T = ValuesSimdViewT::value_type::element_type,
6575 __ESIMD_API std::enable_if_t<
6576 detail::is_simd_view_type_v<ValuesSimdViewT> &&
6577 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6580 slm_scatter<T, N, VS>(byte_offsets, vals.read(), mask, props);
6603 int VS = 1,
typename ValuesSimdViewT,
typename OffsetT,
6604 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
6605 typename T = ValuesSimdViewT::value_type::element_type,
6607 __ESIMD_API std::enable_if_t<
6608 detail::is_simd_view_type_v<ValuesSimdViewT> &&
6609 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
6611 PropertyListT props = {}) {
6612 slm_scatter<T, N, VS>(byte_offsets, vals.read(), props);
6620 template <
typename T>
6635 template <
typename T,
int N, rgba_channel_mask RGBAMask>
6636 __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (
sizeof(T) == 4),
6640 return __esimd_gather4_masked_scaled2<T, N, RGBAMask>(
6641 SI, 0 , offsets.
data(), mask.
data());
6654 template <
typename T,
int N, rgba_channel_mask Mask>
6655 __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (
sizeof(T) == 4)>
6659 detail::validate_rgba_write_channel_mask<Mask>();
6661 constexpr int16_t Scale = 0;
6662 constexpr
int global_offset = 0;
6663 __esimd_scatter4_scaled<T, N, decltype(si), Mask, Scale>(
6664 mask.data(), si, global_offset, offsets.
data(), vals.data());
6682 template <
typename T,
int N,
6684 __ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>,
simd<T, N>>
6686 constexpr
size_t Align = Flags::template alignment<simd<T, N>>;
6687 return __esimd_slm_block_ld<detail::__raw_t<T>, N, Align>(byte_offset);
6741 __ESIMD_API std::enable_if_t<
6742 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
6744 constexpr
size_t DefaultAlignment = detail::OperandSize::OWORD;
6746 detail::getPropertyValue<PropertyListT, alignment_key>(DefaultAlignment);
6747 return __esimd_slm_block_ld<detail::__raw_t<T>, N,
Alignment>(byte_offset);
6779 __ESIMD_API std::enable_if_t<
6780 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
6782 PropertyListT props = {}) {
6784 constexpr
size_t DefaultAlignment =
sizeof(T) <= 4 ? 4 :
sizeof(T);
6786 detail::getPropertyValue<PropertyListT, alignment_key>(DefaultAlignment);
6788 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
6789 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
6790 "Incorrect alignment for the data type");
6792 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
6793 constexpr
int SmallIntFactor32Bit =
6794 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
6795 static_assert(N > 0 && N % SmallIntFactor32Bit == 0,
6796 "Number of elements is not supported by Transposed load");
6802 constexpr
bool Use64BitData =
6803 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
6804 (N *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
6805 (
sizeof(T) !=
sizeof(uint32_t) || N *
sizeof(T) > 256);
6806 constexpr
int SmallIntFactor =
6807 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
6808 constexpr
int FactoredN = N / SmallIntFactor;
6809 detail::check_lsc_vector_size<FactoredN>();
6812 using LoadElemT = __ESIMD_DNS::__raw_t<
6813 std::conditional_t<SmallIntFactor == 1, T,
6814 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
6816 constexpr uint16_t AddressScale = 1;
6817 constexpr
int ImmOffset = 0;
6820 constexpr
auto VS = detail::to_lsc_vector_size<FactoredN>();
6822 constexpr
int NLanes = 1;
6828 AddressScale, ImmOffset, DS, VS, Transposed, NLanes>(
6829 pred.
data(), Offsets.data());
6830 return Result.template bit_cast_view<T>();
6865 __ESIMD_API std::enable_if_t<
6866 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
6868 PropertyListT props = {}) {
6870 constexpr
size_t DefaultAlignment =
sizeof(T) <= 4 ? 4 :
sizeof(T);
6872 detail::getPropertyValue<PropertyListT, alignment_key>(DefaultAlignment);
6874 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
6875 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
6876 "Incorrect alignment for the data type");
6878 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
6879 constexpr
int SmallIntFactor32Bit =
6880 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
6881 static_assert(N > 0 && N % SmallIntFactor32Bit == 0,
6882 "Number of elements is not supported by Transposed load");
6888 constexpr
bool Use64BitData =
6889 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
6890 (N *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
6891 (
sizeof(T) !=
sizeof(uint32_t) || N *
sizeof(T) > 256);
6892 constexpr
int SmallIntFactor =
6893 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
6894 constexpr
int FactoredN = N / SmallIntFactor;
6895 detail::check_lsc_vector_size<FactoredN>();
6898 using LoadElemT = __ESIMD_DNS::__raw_t<
6899 std::conditional_t<SmallIntFactor == 1, T,
6900 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
6902 constexpr uint16_t AddressScale = 1;
6903 constexpr
int ImmOffset = 0;
6906 constexpr
auto VS = detail::to_lsc_vector_size<FactoredN>();
6908 constexpr
int NLanes = 1;
6913 pass_thru.template bit_cast_view<LoadElemT>();
6916 AddressScale, ImmOffset, DS, VS, Transposed,
6917 NLanes>(pred.
data(), Offsets.data(),
6919 return Result.template bit_cast_view<T>();
6954 typename PassThruSimdViewT,
6955 typename T = PassThruSimdViewT::value_type::element_type,
6956 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
6958 __ESIMD_API std::enable_if_t<
6959 detail::is_simd_view_type_v<PassThruSimdViewT> &&
6960 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6963 PropertyListT props = {}) {
6964 return slm_block_load<T, N>(offset, pred, pass_thru.read(), props);
6991 typename T,
int N,
typename AccessorT,
6993 __ESIMD_API std::enable_if_t<
6994 detail::is_local_accessor_with_v<AccessorT,
6995 detail::accessor_mode_cap::can_read> &&
6996 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
6998 block_load(AccessorT lacc, uint32_t byte_offset, PropertyListT props = {}) {
6999 byte_offset += detail::localAccessorToOffset(lacc);
7000 return slm_block_load<T, N>(byte_offset, props);
7026 typename T,
int N,
typename AccessorT,
7028 __ESIMD_API std::enable_if_t<
7029 detail::is_local_accessor_with_v<AccessorT,
7030 detail::accessor_mode_cap::can_read> &&
7031 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7034 return slm_block_load<T, N>(detail::localAccessorToOffset(lacc), props);
7065 typename T,
int N,
typename AccessorT,
7067 __ESIMD_API std::enable_if_t<
7068 detail::is_local_accessor_with_v<AccessorT,
7069 detail::accessor_mode_cap::can_read> &&
7070 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7073 PropertyListT props = {}) {
7074 byte_offset += detail::localAccessorToOffset(lacc);
7075 return slm_block_load<T, N>(byte_offset, pred, props);
7104 typename T,
int N,
typename AccessorT,
7106 __ESIMD_API std::enable_if_t<
7107 detail::is_local_accessor_with_v<AccessorT,
7108 detail::accessor_mode_cap::can_read> &&
7109 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7112 return slm_block_load<T, N>(detail::localAccessorToOffset(lacc), pred, props);
7143 typename T,
int N,
typename AccessorT,
7145 __ESIMD_API std::enable_if_t<
7146 detail::is_local_accessor_with_v<AccessorT,
7147 detail::accessor_mode_cap::can_read> &&
7148 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7151 simd<T, N> pass_thru, PropertyListT props = {}) {
7152 byte_offset += __ESIMD_DNS::localAccessorToOffset(lacc);
7153 return slm_block_load<T, N>(byte_offset, pred, pass_thru, props);
7186 typename PassThruSimdViewT,
7187 typename T = PassThruSimdViewT::value_type::element_type,
7188 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
7191 __ESIMD_API std::enable_if_t<
7192 detail::is_simd_view_type_v<PassThruSimdViewT> &&
7193 detail::is_local_accessor_with_v<AccessorT,
7194 detail::accessor_mode_cap::can_read> &&
7195 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7198 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
7199 return block_load<T, N>(lacc, byte_offset, pred, pass_thru.read(), props);
7229 typename T,
int N,
typename AccessorT,
7231 __ESIMD_API std::enable_if_t<
7232 detail::is_local_accessor_with_v<AccessorT,
7233 detail::accessor_mode_cap::can_read> &&
7234 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7237 PropertyListT props = {}) {
7238 return slm_block_load<T, N>(__ESIMD_DNS::localAccessorToOffset(lacc), pred,
7271 typename PassThruSimdViewT,
7272 typename T = PassThruSimdViewT::value_type::element_type,
7273 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
7276 __ESIMD_API std::enable_if_t<
7277 detail::is_simd_view_type_v<PassThruSimdViewT> &&
7278 detail::is_local_accessor_with_v<AccessorT,
7279 detail::accessor_mode_cap::can_read> &&
7280 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
7283 PropertyListT props = {}) {
7284 return block_load<T, N>(lacc, pred, pass_thru.read(), props);
7302 template <
typename T,
int N,
typename Flags>
7303 __ESIMD_API std::enable_if_t<is_simd_flag_type_v<Flags>>
7305 constexpr
size_t Align = Flags::template alignment<simd<T, N>>;
7306 __esimd_slm_block_st<detail::__raw_t<T>, N, Align>(offset, vals.
data());
7367 __ESIMD_API std::enable_if_t<
7368 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7370 PropertyListT props = {}) {
7372 constexpr
size_t DefaultAlignment =
sizeof(T) <= 4 ? 4 :
sizeof(T);
7374 detail::getPropertyValue<PropertyListT, alignment_key>(DefaultAlignment);
7376 (
Alignment >= __ESIMD_DNS::OperandSize::DWORD &&
sizeof(T) <= 4) ||
7377 (
Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
sizeof(T) > 4),
7378 "Incorrect alignment for the data type");
7380 constexpr
int SmallIntFactor64Bit =
sizeof(uint64_t) /
sizeof(T);
7381 constexpr
int SmallIntFactor32Bit =
7382 sizeof(uint32_t) /
sizeof(T) > 1 ?
sizeof(uint32_t) /
sizeof(T) : 1;
7384 static_assert(N > 0 && N % SmallIntFactor32Bit == 0,
7385 "Number of elements is not supported by Transposed store");
7391 constexpr
bool Use64BitData =
7392 Alignment >= __ESIMD_DNS::OperandSize::QWORD &&
7393 (N *
sizeof(T)) %
sizeof(uint64_t) == 0 &&
7394 (
sizeof(T) !=
sizeof(uint32_t) || N *
sizeof(T) > 256);
7395 constexpr
int SmallIntFactor =
7396 Use64BitData ? SmallIntFactor64Bit : SmallIntFactor32Bit;
7397 constexpr
int FactoredN = N / SmallIntFactor;
7398 detail::check_lsc_vector_size<FactoredN>();
7401 using StoreElemT = __ESIMD_DNS::__raw_t<
7402 std::conditional_t<SmallIntFactor == 1, T,
7403 std::conditional_t<Use64BitData, uint64_t, uint32_t>>>;
7405 constexpr uint16_t AddressScale = 1;
7406 constexpr
int ImmOffset = 0;
7409 constexpr
auto VS = detail::to_lsc_vector_size<FactoredN>();
7411 constexpr
int NLanes = 1;
7416 AddressScale, ImmOffset, DS, VS, Transposed, NLanes>(
7417 pred.
data(), Offsets.data(),
7418 sycl::bit_cast<__ESIMD_DNS::vector_type_t<StoreElemT, FactoredN>>(
7440 __ESIMD_API std::enable_if_t<
7441 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7443 PropertyListT props = {}) {
7444 constexpr
size_t DefaultAlignment = detail::OperandSize::OWORD;
7446 detail::getPropertyValue<PropertyListT, alignment_key>(DefaultAlignment);
7447 using StoreElemT = detail::__raw_t<T>;
7448 __esimd_slm_block_st<StoreElemT, N, Alignment>(
7481 typename ValuesSimdViewT,
7482 typename T = ValuesSimdViewT::value_type::element_type,
7483 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
7485 __ESIMD_API std::enable_if_t<
7486 detail::is_simd_view_type_v<ValuesSimdViewT> &&
7487 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7489 PropertyListT props = {}) {
7490 slm_block_store<T, N>(byte_offset, vals.read(), pred, props);
7511 typename ValuesSimdViewT,
7512 typename T = ValuesSimdViewT::value_type::element_type,
7513 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
7515 __ESIMD_API std::enable_if_t<
7516 detail::is_simd_view_type_v<ValuesSimdViewT> &&
7517 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7519 PropertyListT props = {}) {
7520 slm_block_store<T, N>(byte_offset, vals.read(), props);
7540 typename T,
int N,
typename AccessorT,
7542 __ESIMD_API std::enable_if_t<
7543 detail::is_local_accessor_with_v<AccessorT,
7544 detail::accessor_mode_cap::can_write> &&
7545 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7547 PropertyListT props = {}) {
7548 byte_offset += detail::localAccessorToOffset(lacc);
7549 slm_block_store<T, N>(byte_offset, vals, props);
7568 typename T,
int N,
typename AccessorT,
7570 __ESIMD_API std::enable_if_t<
7571 detail::is_local_accessor_with_v<AccessorT,
7572 detail::accessor_mode_cap::can_write> &&
7573 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7575 slm_block_store<T, N>(detail::localAccessorToOffset(lacc), vals, props);
7606 typename T,
int N,
typename AccessorT,
7608 __ESIMD_API std::enable_if_t<
7609 detail::is_local_accessor_with_v<AccessorT,
7610 detail::accessor_mode_cap::can_write> &&
7611 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7614 byte_offset += detail::localAccessorToOffset(lacc);
7615 slm_block_store<T, N>(byte_offset, vals, pred, props);
7644 typename T,
int N,
typename AccessorT,
7646 __ESIMD_API std::enable_if_t<
7647 detail::is_local_accessor_with_v<AccessorT,
7648 detail::accessor_mode_cap::can_write> &&
7649 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7651 PropertyListT props = {}) {
7652 slm_block_store<T, N>(detail::localAccessorToOffset(lacc), vals, pred, props);
7674 typename ValuesSimdViewT,
7675 typename T = ValuesSimdViewT::value_type::element_type,
7676 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
7679 __ESIMD_API std::enable_if_t<
7680 detail::is_simd_view_type_v<ValuesSimdViewT> &&
7681 detail::is_local_accessor_with_v<AccessorT,
7682 detail::accessor_mode_cap::can_write> &&
7683 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7684 block_store(AccessorT lacc, uint32_t byte_offset, ValuesSimdViewT vals,
7685 PropertyListT props = {}) {
7686 block_store<T, N>(lacc, byte_offset, vals.read(), props);
7707 typename ValuesSimdViewT,
7708 typename T = ValuesSimdViewT::value_type::element_type,
7709 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
7712 __ESIMD_API std::enable_if_t<
7713 detail::is_simd_view_type_v<ValuesSimdViewT> &&
7714 detail::is_local_accessor_with_v<AccessorT,
7715 detail::accessor_mode_cap::can_write> &&
7716 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7717 block_store(AccessorT lacc, ValuesSimdViewT vals, PropertyListT props = {}) {
7718 block_store<T, N>(lacc, vals.read(), props);
7750 typename ValuesSimdViewT,
7751 typename T = ValuesSimdViewT::value_type::element_type,
7752 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
7755 __ESIMD_API std::enable_if_t<
7756 detail::is_simd_view_type_v<ValuesSimdViewT> &&
7757 detail::is_local_accessor_with_v<AccessorT,
7758 detail::accessor_mode_cap::can_write> &&
7759 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7760 block_store(AccessorT lacc, uint32_t byte_offset, ValuesSimdViewT vals,
7762 block_store<T, N>(lacc, byte_offset, vals.read(), pred, props);
7793 typename ValuesSimdViewT,
7794 typename T = ValuesSimdViewT::value_type::element_type,
7795 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
7798 __ESIMD_API std::enable_if_t<
7799 detail::is_simd_view_type_v<ValuesSimdViewT> &&
7800 detail::is_local_accessor_with_v<AccessorT,
7801 detail::accessor_mode_cap::can_write> &&
7802 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
7804 PropertyListT props = {}) {
7805 block_store<T, N>(lacc, vals.read(), pred, props);
7814 template <
typename T, __ESIMD_NS::atomic_op Op>
7817 __ESIMD_DNS::to_lsc_atomic_op<Op>();
7818 return static_cast<int>(LSCOp);
7835 template <atomic_op Op,
typename T,
int N, lsc_data_size DS>
7836 __ESIMD_API std::enable_if_t<get_num_args<Op>() == 0,
simd<T, N>>
7838 check_lsc_data_size<T, DS>();
7840 constexpr uint16_t AddressScale = 1;
7841 constexpr
int ImmOffset = 0;
7846 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
7849 AddressScale, ImmOffset, EDS, VS, Transposed,
7851 return lsc_format_ret<T>(Tmp);
7868 template <atomic_op Op,
typename T,
int N, lsc_data_size DS>
7869 __ESIMD_API std::enable_if_t<get_num_args<Op>() == 1,
simd<T, N>>
7872 check_lsc_data_size<T, DS>();
7874 constexpr uint16_t AddressScale = 1;
7875 constexpr
int ImmOffset = 0;
7879 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
7880 if constexpr (std::is_same_v<T, double> || std::is_same_v<T, float>) {
7882 AddressScale, ImmOffset, EDS, VS,
7883 Transposed, N>(pred.
data(), offsets.
data(),
7890 AddressScale, ImmOffset, EDS, VS, Transposed,
7893 return lsc_format_ret<T>(Tmp);
7912 template <atomic_op Op,
typename T,
int N, lsc_data_size DS>
7916 check_lsc_data_size<T, DS>();
7918 constexpr uint16_t AddressScale = 1;
7919 constexpr
int ImmOffset = 0;
7923 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
7924 if constexpr (std::is_same_v<T, double> || std::is_same_v<T, float>) {
7926 AddressScale, ImmOffset, EDS, VS,
7927 Transposed, N>(pred.
data(), offsets.
data(),
7935 AddressScale, ImmOffset, EDS, VS, Transposed,
7937 Msg_data0.
data(), Msg_data1.
data());
7938 return lsc_format_ret<T>(Tmp);
7981 template <atomic_op Op,
typename T,
int N>
7982 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
simd<T, N>>
7986 if constexpr (
sizeof(T) == 2 ||
sizeof(T) == 8 ||
7992 if constexpr (std::is_integral_v<T>) {
7993 return slm_atomic_update<atomic_op::bit_or, T, N>(byte_offset,
7996 using Tint = detail::uint_type_t<
sizeof(T)>;
7997 simd<Tint, N> Res = slm_atomic_update<atomic_op::bit_or, Tint, N>(
7999 return Res.template bit_cast_view<T>();
8002 detail::check_atomic<Op, T, N, 0>();
8004 return __esimd_dword_atomic0<Op, T, N>(mask.data(), si, byte_offset.
data());
8016 template <atomic_op Op,
typename T,
int N,
typename AccessorT>
8017 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
8018 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8022 byte_offset += detail::localAccessorToOffset(lacc);
8023 return slm_atomic_update<Op, T, N>(byte_offset, mask);
8060 template <atomic_op Op,
typename T,
int N>
8061 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
simd<T, N>>
8071 byte_offset,
src0, mask);
8073 if constexpr (std::is_integral_v<T>) {
8074 return slm_atomic_update<atomic_op::xchg, T, N>(byte_offset,
src0, mask);
8076 using Tint = detail::uint_type_t<
sizeof(T)>;
8077 simd<Tint, N> Res = slm_atomic_update<atomic_op::xchg, Tint, N>(
8078 byte_offset,
src0.template bit_cast_view<Tint>(), mask);
8079 return Res.template bit_cast_view<T>();
8082 detail::check_atomic<Op, T, N, 1>();
8084 return __esimd_dword_atomic1<Op, T, N>(mask.data(), si, byte_offset.
data(),
8106 template <
atomic_op Op,
typename SrcSimdViewT,
8107 typename T = SrcSimdViewT::value_type::element_type,
int N>
8108 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8109 detail::is_simd_view_type_v<SrcSimdViewT>,
8113 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8114 "Size of src0 parameter must correspond to the size of "
8115 "byte_offset parameter.");
8116 return slm_atomic_update<Op, T, N>(byte_offset,
src0.read(), mask);
8136 template <atomic_op Op,
typename OffsetSimdViewT,
typename T,
int N>
8137 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8138 detail::is_simd_view_type_v<OffsetSimdViewT>,
8142 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8143 "Size of src0 parameter must correspond to the size of "
8144 "byte_offset parameter.");
8145 return slm_atomic_update<Op, T, N>(byte_offset.read(),
src0, mask);
8165 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8166 typename T = SrcSimdViewT::value_type::element_type,
8167 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY()>
8168 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8169 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8170 detail::is_simd_view_type_v<SrcSimdViewT>,
8174 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8175 "Size of src0 parameter must correspond to the size of "
8176 "byte_offset parameter.");
8177 return slm_atomic_update<Op, T, N>(byte_offset.read(),
src0.read(), mask);
8198 template <atomic_op Op,
typename T,
int N,
typename AccessorT>
8199 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8200 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8204 byte_offset += detail::localAccessorToOffset(lacc);
8205 return slm_atomic_update<Op, T, N>(byte_offset,
src0, mask);
8226 template <
atomic_op Op,
typename OffsetSimdViewT,
typename T,
int N,
8228 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8229 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8230 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8234 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8235 "Size of src0 parameter must correspond to the size of "
8236 "byte_offset parameter.");
8237 return atomic_update<Op, T, N>(lacc, byte_offset.read(),
src0, mask);
8258 template <
atomic_op Op,
typename SrcSimdViewT,
8259 typename T = SrcSimdViewT::value_type::element_type,
int N,
8261 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8262 detail::is_simd_view_type_v<SrcSimdViewT> &&
8263 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8267 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8268 "Size of src0 parameter must correspond to the size of "
8269 "byte_offset parameter.");
8270 return atomic_update<Op, T, N>(lacc, byte_offset,
src0.read(), mask);
8291 template <
atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
8292 typename T = SrcSimdViewT::value_type::element_type,
8293 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8295 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
8296 detail::is_simd_view_type_v<SrcSimdViewT> &&
8297 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8298 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8302 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8303 "Size of src0 parameter must correspond to the size of "
8304 "byte_offset parameter.");
8305 return atomic_update<Op, T, N>(lacc, byte_offset.read(),
src0.read(), mask);
8339 template <atomic_op Op,
typename T,
int N>
8340 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
simd<T, N>>
8354 detail::check_atomic<Op, T, N, 2>();
8356 return __esimd_dword_atomic2<Op, T, N>(mask.data(), si, byte_offset.
data(),
8378 template <atomic_op Op,
typename SrcSimdViewT,
typename T,
int N>
8379 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8380 detail::is_simd_view_type_v<SrcSimdViewT>,
8384 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8385 "Size of src0 parameter must correspond to the size of "
8386 "byte_offset and src1 parameters.");
8387 return slm_atomic_update<Op, T, N>(byte_offset,
src0.read(),
src1, mask);
8407 template <atomic_op Op,
typename SrcSimdViewT,
typename T,
int N>
8408 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8409 detail::is_simd_view_type_v<SrcSimdViewT>,
8413 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8414 "Size of src1 parameter must correspond to the size of "
8415 "byte_offset and src0 parameters.");
8416 return slm_atomic_update<Op, T, N>(byte_offset,
src0,
src1.read(), mask);
8436 template <
atomic_op Op,
typename SrcSimdViewT,
8437 typename T = SrcSimdViewT::value_type::element_type,
int N>
8438 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8439 detail::is_simd_view_type_v<SrcSimdViewT>,
8444 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8445 "Size of src1 and src0 parameters must correspond to the size of "
8446 "byte_offset parameter.");
8447 return slm_atomic_update<Op, T, N>(byte_offset,
src0.read(),
src1.read(),
8468 template <atomic_op Op,
typename OffsetSimdViewT,
typename T,
int N>
8469 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8470 detail::is_simd_view_type_v<OffsetSimdViewT>,
8475 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8476 "Size of src1 and src0 parameters must correspond to the size of "
8477 "byte_offset parameter.");
8478 return slm_atomic_update<Op, T, N>(byte_offset.read(),
src0,
src1, mask);
8498 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8500 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8501 detail::is_simd_view_type_v<SrcSimdViewT> &&
8502 detail::is_simd_view_type_v<OffsetSimdViewT>,
8506 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
8507 N == OffsetSimdViewT::getSizeX() *
8508 OffsetSimdViewT::getSizeY(),
8509 "Size of src0 parameter must correspond to the size of "
8510 "byte_offset and src1 parameters.");
8511 return slm_atomic_update<Op, T, N>(byte_offset.read(),
src0.read(),
src1,
8532 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8534 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8535 detail::is_simd_view_type_v<SrcSimdViewT> &&
8536 detail::is_simd_view_type_v<OffsetSimdViewT>,
8540 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
8541 N == OffsetSimdViewT::getSizeX() *
8542 OffsetSimdViewT::getSizeY(),
8543 "Size of src1 parameter must correspond to the size of "
8544 "byte_offset and src0 parameters.");
8545 return slm_atomic_update<Op, T, N>(byte_offset.read(),
src0,
src1.read(),
8566 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8567 typename T = SrcSimdViewT::value_type::element_type,
8568 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY()>
8569 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8570 detail::is_simd_view_type_v<SrcSimdViewT> &&
8571 detail::is_simd_view_type_v<OffsetSimdViewT>,
8576 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8577 "Size of src1 and src0 parameters must correspond to the size of "
8578 "byte_offset parameter.");
8579 return slm_atomic_update<Op, T, N>(byte_offset.read(),
src0,
src1, mask);
8588 template <atomic_op Op,
typename T,
int N,
typename AccessorT>
8589 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8590 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8594 byte_offset += detail::localAccessorToOffset(lacc);
8595 return slm_atomic_update<Op, T, N>(byte_offset,
src0,
src1, mask);
8606 template <
atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
8608 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8609 detail::is_simd_view_type_v<SrcSimdViewT> &&
8610 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8614 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8615 "Size of src0 parameter must correspond to the size of "
8616 "byte_offset and src1 parameters.");
8617 return atomic_update<Op, T, N>(lacc, byte_offset,
src0.read(),
src1, mask);
8628 template <
atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
8630 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8631 detail::is_simd_view_type_v<SrcSimdViewT> &&
8632 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8636 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8637 "Size of src1 parameter must correspond to the size of "
8638 "byte_offset and src0 parameters.");
8639 return atomic_update<Op, T, N>(lacc, byte_offset,
src0,
src1.read(), mask);
8650 template <
atomic_op Op,
typename SrcSimdViewT,
8651 typename T = SrcSimdViewT::value_type::element_type,
int N,
8653 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8654 detail::is_simd_view_type_v<SrcSimdViewT> &&
8655 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8660 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8661 "Size of src1 and src0 parameters must correspond to the size of "
8662 "byte_offset parameter.");
8663 return atomic_update<Op, T, N>(lacc, byte_offset,
src0.read(),
src1.read(),
8675 template <
atomic_op Op,
typename OffsetSimdViewT,
typename T,
int N,
8677 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8678 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8679 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8684 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8685 "Size of src1 and src0 parameters must correspond to the size of "
8686 "byte_offset parameter.");
8687 return atomic_update<Op, T, N>(lacc, byte_offset.read(),
src0,
src1, mask);
8698 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8699 typename T,
int N,
typename AccessorT>
8700 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8701 detail::is_simd_view_type_v<SrcSimdViewT> &&
8702 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8703 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8707 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8708 "Size of src0 parameter must correspond to the size of "
8709 "byte_offset and src1 parameters.");
8710 return atomic_update<Op, T, N>(lacc, byte_offset.read(),
src0.read(),
src1,
8722 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8723 typename T,
int N,
typename AccessorT>
8724 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8725 detail::is_simd_view_type_v<SrcSimdViewT> &&
8726 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8727 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8731 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
8732 N == OffsetSimdViewT::getSizeX() *
8733 OffsetSimdViewT::getSizeY(),
8734 "Size of src1 parameter must correspond to the size of "
8735 "byte_offset and src0 parameters.");
8736 return atomic_update<Op, T, N>(lacc, byte_offset.read(),
src0,
src1.read(),
8748 template <
atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
8749 typename T = SrcSimdViewT::value_type::element_type,
8750 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
8752 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
8753 detail::is_simd_view_type_v<SrcSimdViewT> &&
8754 detail::is_simd_view_type_v<OffsetSimdViewT> &&
8755 __ESIMD_DNS::is_rw_local_accessor_v<AccessorT>,
8760 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
8761 "Size of src1 and src0 parameters must correspond to the size of "
8762 "byte_offset parameter.");
8763 return atomic_update<Op, T, N>(lacc, byte_offset.read(),
src0.read(),
8785 typename PropertyListT,
typename Toffset>
8786 __ESIMD_API std::enable_if_t<get_num_args<Op>() == 0,
simd<T, N>>
8788 static_assert(
sizeof(T) > 1,
"Unsupported data type");
8789 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
8791 check_lsc_data_size<T, DS>();
8792 check_cache_hints<cache_action::atomic, PropertyListT>();
8793 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
8794 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
8795 constexpr uint16_t AddressScale = 1;
8796 constexpr
int ImmOffset = 0;
8801 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
8803 addrs += convert<uintptr_t>(offsets);
8805 __esimd_lsc_xatomic_stateless_0<MsgT, IOp, L1H, L2H, AddressScale,
8806 ImmOffset, EDS, VS, Transposed, N>(
8808 return lsc_format_ret<T>(Tmp);
8826 typename PropertyListT,
typename Toffset>
8827 __ESIMD_API std::enable_if_t<get_num_args<Op>() == 1,
simd<T, N>>
8830 static_assert(
sizeof(T) > 1,
"Unsupported data type");
8831 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
8832 check_lsc_data_size<T, DS>();
8834 check_cache_hints<cache_action::atomic, PropertyListT>();
8835 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
8836 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
8837 constexpr uint16_t AddressScale = 1;
8838 constexpr
int ImmOffset = 0;
8843 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
8845 addrs += convert<uintptr_t>(offsets);
8846 if constexpr (std::is_same_v<T, double> || std::is_same_v<T, float>) {
8847 return __esimd_lsc_xatomic_stateless_1<T, IOp, L1H, L2H, AddressScale,
8848 ImmOffset, EDS, VS, Transposed, N>(
8853 __esimd_lsc_xatomic_stateless_1<MsgT, IOp, L1H, L2H, AddressScale,
8854 ImmOffset, EDS, VS, Transposed, N>(
8856 return lsc_format_ret<T>(Tmp);
8876 typename PropertyListT,
typename Toffset>
8877 __ESIMD_API std::enable_if_t<get_num_args<Op>() == 2,
simd<T, N>>
8880 static_assert(
sizeof(T) > 1,
"Unsupported data type");
8881 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
8882 check_lsc_data_size<T, DS>();
8884 check_cache_hints<cache_action::atomic, PropertyListT>();
8885 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
8886 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
8887 constexpr uint16_t AddressScale = 1;
8888 constexpr
int ImmOffset = 0;
8893 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
8895 addrs += convert<uintptr_t>(offsets);
8896 if constexpr (std::is_same_v<T, double> || std::is_same_v<T, float>) {
8897 return __esimd_lsc_xatomic_stateless_2<T, IOp, L1H, L2H, AddressScale,
8898 ImmOffset, EDS, VS, Transposed, N>(
8905 __esimd_lsc_xatomic_stateless_2<MsgT, IOp, L1H, L2H, AddressScale,
8906 ImmOffset, EDS, VS, Transposed, N>(
8908 return lsc_format_ret<T>(Tmp);
8927 template <
atomic_op Op,
typename T,
int N,
8929 typename PropertyListT,
typename AccessorTy,
typename Toffset>
8931 std::enable_if_t<get_num_args<Op>() == 0 &&
8932 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
8936 #ifdef __ESIMD_FORCE_STATELESS_MEM
8937 return atomic_update_impl<Op, T, N, DS, PropertyListT>(
8938 accessorToPointer<T>(acc), byte_offsets, pred);
8940 static_assert(
sizeof(T) > 1,
"Unsupported data type");
8941 static_assert(std::is_integral_v<Toffset> &&
sizeof(Toffset) == 4,
8942 "Unsupported offset type");
8943 check_lsc_data_size<T, DS>();
8945 check_cache_hints<cache_action::atomic, PropertyListT>();
8946 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
8947 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
8948 constexpr uint16_t AddressScale = 1;
8949 constexpr
int ImmOffset = 0;
8954 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
8957 __esimd_lsc_xatomic_bti_0<MsgT, IOp, L1H, L2H, AddressScale, ImmOffset,
8958 EDS, VS, Transposed, N>(
8959 pred.
data(), byte_offsets.
data(), si);
8960 return lsc_format_ret<T>(Tmp);
8982 typename PropertyListT,
typename AccessorTy,
typename Toffset>
8984 std::enable_if_t<get_num_args<Op>() == 1 &&
8985 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
8989 #ifdef __ESIMD_FORCE_STATELESS_MEM
8990 return atomic_update_impl<Op, T, N, DS, PropertyListT>(
8991 accessorToPointer<T>(acc), byte_offset,
src0, pred);
8993 static_assert(
sizeof(T) > 1,
"Unsupported data type");
8994 static_assert(std::is_integral_v<Toffset> &&
sizeof(Toffset) == 4,
8995 "Unsupported offset type");
8996 check_lsc_data_size<T, DS>();
8998 check_cache_hints<cache_action::atomic, PropertyListT>();
8999 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
9000 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
9001 constexpr uint16_t AddressScale = 1;
9002 constexpr
int ImmOffset = 0;
9007 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
9009 if constexpr (std::is_same_v<T, double> || std::is_same_v<T, float>) {
9010 return __esimd_lsc_xatomic_bti_1<T, IOp, L1H, L2H, AddressScale, ImmOffset,
9011 EDS, VS, Transposed, N>(
9016 __esimd_lsc_xatomic_bti_1<MsgT, IOp, L1H, L2H, AddressScale, ImmOffset,
9017 EDS, VS, Transposed, N>(
9019 return lsc_format_ret<T>(Tmp);
9043 typename PropertyListT,
typename AccessorTy,
typename Toffset>
9045 std::enable_if_t<get_num_args<Op>() == 2 &&
9046 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
9050 #ifdef __ESIMD_FORCE_STATELESS_MEM
9051 return atomic_update_impl<Op, T, N, DS, PropertyListT>(
9052 __ESIMD_DNS::accessorToPointer<T>(acc), byte_offset,
src0,
src1, pred);
9054 static_assert(std::is_integral_v<Toffset> &&
sizeof(Toffset) == 4,
9055 "Unsupported offset type");
9056 check_lsc_vector_size<1>();
9057 check_lsc_data_size<T, DS>();
9059 check_cache_hints<cache_action::atomic, PropertyListT>();
9060 constexpr
auto L1H = getCacheHintForIntrin<PropertyListT, cache_level::L1>();
9061 constexpr
auto L2H = getCacheHintForIntrin<PropertyListT, cache_level::L2>();
9062 constexpr uint16_t AddressScale = 1;
9063 constexpr
int ImmOffset = 0;
9068 constexpr
int IOp = lsc_to_internal_atomic_op<T, Op>();
9070 if constexpr (std::is_same_v<T, double> || std::is_same_v<T, float>) {
9071 return __esimd_lsc_xatomic_bti_2<T, IOp, L1H, L2H, AddressScale, ImmOffset,
9072 EDS, VS, Transposed, N>(
9078 __esimd_lsc_xatomic_bti_2<MsgT, IOp, L1H, L2H, AddressScale, ImmOffset,
9079 EDS, VS, Transposed, N>(
9082 return lsc_format_ret<T>(Tmp);
9133 atomic_op Op,
typename T,
int N,
typename Toffset,
9135 __ESIMD_API std::enable_if_t<
9136 __ESIMD_DNS::get_num_args<Op>() == 0 &&
9137 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9140 PropertyListT props = {}) {
9141 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
9143 if constexpr (detail::has_cache_hints<PropertyListT>() ||
9147 p, byte_offset, mask);
9148 }
else if constexpr (N == 16 || N == 32) {
9162 for (
int I = 0; I < N; I += 8) {
9165 Res.template select<8, 1>(I) =
9166 atomic_update<Op, T, 8>(p, ByteOffset8, Mask8, props);
9170 if constexpr (std::is_integral_v<T>) {
9171 return atomic_update<atomic_op::bit_or, T, N>(p, byte_offset,
9174 using Tint = detail::uint_type_t<
sizeof(T)>;
9175 simd<Tint, N> Res = atomic_update<atomic_op::bit_or, Tint, N>(
9176 reinterpret_cast<Tint *
>(p), byte_offset,
simd<Tint, N>(0), mask,
9178 return Res.template bit_cast_view<T>();
9181 detail::check_atomic<Op, T, N, 0>();
9185 using Tx =
typename detail::__raw_t<T>;
9186 return __esimd_svm_atomic0<Op, Tx, N>(vAddr.data(), mask.
data());
9209 atomic_op Op,
typename T,
int N,
typename Toffset,
9211 __ESIMD_API std::enable_if_t<
9212 __ESIMD_DNS::get_num_args<Op>() == 0 &&
9213 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9217 return atomic_update<Op, T, N>(p, byte_offset, mask, props);
9241 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
9243 __ESIMD_API std::enable_if_t<
9244 __ESIMD_DNS::get_num_args<Op>() == 0 &&
9245 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9246 detail::is_simd_view_type_v<OffsetSimdViewT>,
9249 PropertyListT props = {}) {
9250 return atomic_update<Op, T, N>(p, offsets.read(), mask, props);
9272 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
9274 __ESIMD_API std::enable_if_t<
9275 __ESIMD_DNS::get_num_args<Op>() == 0 &&
9276 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9277 detail::is_simd_view_type_v<OffsetSimdViewT>,
9280 return atomic_update<Op, T, N>(p, byte_offset.read(), props);
9301 atomic_op Op,
typename OffsetSimdViewT,
typename T,
9302 int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
9304 __ESIMD_API std::enable_if_t<
9305 __ESIMD_DNS::get_num_args<Op>() == 0 &&
9306 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9307 detail::is_simd_view_type_v<OffsetSimdViewT>,
9309 atomic_update(T *p, OffsetSimdViewT byte_offset, PropertyListT props = {}) {
9310 return atomic_update<Op, T, N>(p, byte_offset.read(), props);
9327 template <atomic_op Op,
typename T,
int N,
typename Toffset>
9328 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
simd<T, N>>
9382 atomic_op Op,
typename T,
int N,
typename Toffset,
9384 __ESIMD_API std::enable_if_t<
9385 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9386 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9390 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
9393 if constexpr (detail::has_cache_hints<PropertyListT>() ||
9399 p, byte_offset,
src0, mask);
9400 }
else if constexpr (N == 16 || N == 32) {
9413 for (
int I = 0; I < N; I += 8) {
9417 Res.template select<8, 1>(I) =
9418 atomic_update<Op, T, 8>(p, ByteOffset8, Src08, Mask8, props);
9422 if constexpr (std::is_integral_v<T>) {
9423 return atomic_update<atomic_op::xchg, T, N>(p, byte_offset,
src0, mask,
9426 using Tint = detail::uint_type_t<
sizeof(T)>;
9427 simd<Tint, N> Res = atomic_update<atomic_op::xchg, Tint, N>(
9428 reinterpret_cast<Tint *
>(p), byte_offset,
9429 src0.template bit_cast_view<Tint>(), mask, props);
9430 return Res.template bit_cast_view<T>();
9433 detail::check_atomic<Op, T, N, 1>();
9438 using Tx =
typename detail::__raw_t<T>;
9439 return __esimd_svm_atomic1<Op, Tx, N>(vAddr.data(),
src0.data(),
9474 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
9476 __ESIMD_API std::enable_if_t<
9477 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9478 detail::is_simd_view_type_v<SrcSimdViewT> &&
9479 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9483 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9484 "Size of src0 parameter must correspond to the size of "
9485 "byte_offset parameter.");
9486 return atomic_update<Op, T, N>(p, byte_offset,
src0.read(), mask, props);
9513 atomic_op Op,
typename T,
int N,
typename Toffset,
9515 __ESIMD_API std::enable_if_t<
9516 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9517 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9520 PropertyListT props = {}) {
9522 return atomic_update<Op, T, N>(p, byte_offset,
src0, mask, props);
9551 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
9553 __ESIMD_API std::enable_if_t<
9554 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9555 detail::is_simd_view_type_v<SrcSimdViewT> &&
9556 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9559 PropertyListT props = {}) {
9560 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9561 "Size of src0 parameter must correspond to the size of "
9562 "byte_offset parameter.");
9563 return atomic_update<Op, T, N>(p, byte_offset,
src0.read(), props);
9594 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
9596 __ESIMD_API std::enable_if_t<
9597 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9598 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9599 detail::is_simd_view_type_v<OffsetSimdViewT>,
9602 PropertyListT props = {}) {
9603 return atomic_update<Op, T, N>(p, offsets.read(),
src0, mask, props);
9635 atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
typename T,
9638 __ESIMD_API std::enable_if_t<
9639 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9640 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9641 detail::is_simd_view_type_v<OffsetSimdViewT> &&
9642 detail::is_simd_view_type_v<SrcSimdViewT>,
9647 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() &&
9648 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9649 "Size of src0 and offsets parameters must correspond to the size of "
9651 return atomic_update<Op, T, N>(p, offsets.read(),
src0.read(), mask, props);
9680 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
9682 __ESIMD_API std::enable_if_t<
9683 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9684 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9685 detail::is_simd_view_type_v<OffsetSimdViewT>,
9688 PropertyListT props = {}) {
9690 return atomic_update<Op, T, N>(p, offsets.read(),
src0, mask, props);
9718 atomic_op Op,
typename OffsetSimdViewT,
typename SrcSimdViewT,
typename T,
9719 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9721 __ESIMD_API std::enable_if_t<
9722 __ESIMD_DNS::get_num_args<Op>() == 1 &&
9723 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
9724 detail::is_simd_view_type_v<OffsetSimdViewT> &&
9725 detail::is_simd_view_type_v<SrcSimdViewT>,
9728 PropertyListT props = {}) {
9729 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
9730 "Size of src0 parameter must correspond to the size of "
9731 "offsets parameter.");
9732 return atomic_update<Op, T, N>(p, offsets.read(),
src0.read(), props);
9753 template <atomic_op Op,
typename Tx,
int N,
typename Toffset>
9754 __ESIMD_API std::enable_if_t<
9755 std::is_integral_v<Toffset> &&
9809 atomic_op Op,
typename T,
int N,
typename Toffset,
9811 __ESIMD_API std::enable_if_t<
9812 __ESIMD_DNS::get_num_args<Op>() == 2 &&
9813 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9817 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
9822 if constexpr (detail::has_cache_hints<PropertyListT>() ||
9831 }
else if constexpr (N == 16 || N == 32) {
9844 for (
int I = 0; I < N; I += 8) {
9849 Res.template select<8, 1>(I) =
9850 atomic_update<Op, T, 8>(p, ByteOffset8, Src08, Src18, Mask8, props);
9854 detail::check_atomic<Op, T, N, 2>();
9858 using Tx =
typename detail::__raw_t<T>;
9859 return __esimd_svm_atomic2<Op, Tx, N>(vAddr.data(),
src0.data(),
9888 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
9890 __ESIMD_API std::enable_if_t<
9891 __ESIMD_DNS::get_num_args<Op>() == 2 &&
9892 detail::is_simd_view_type_v<SrcSimdViewT> &&
9893 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9897 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9898 "Size of src0 parameter must correspond to the size of "
9899 "byte_offset parameter.");
9900 return atomic_update<Op, T, N>(p, byte_offset,
src0.read(),
src1, mask,
9928 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
9930 __ESIMD_API std::enable_if_t<
9931 __ESIMD_DNS::get_num_args<Op>() == 2 &&
9932 detail::is_simd_view_type_v<SrcSimdViewT> &&
9933 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9937 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9938 "Size of src1 parameter must correspond to the size of "
9939 "byte_offset parameter.");
9940 return atomic_update<Op, T, N>(p, byte_offset,
src0,
src1.read(), mask,
9968 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
9970 __ESIMD_API std::enable_if_t<
9971 __ESIMD_DNS::get_num_args<Op>() == 2 &&
9972 detail::is_simd_view_type_v<SrcSimdViewT> &&
9973 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
9978 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
9979 "Size of src1 and src0 parameters must correspond to the size of "
9980 "byte_offset parameter.");
9981 return atomic_update<Op, T, N>(p, byte_offset,
src0.read(),
src1.read(), mask,
10005 atomic_op Op,
typename T,
int N,
typename Toffset,
10007 __ESIMD_API std::enable_if_t<
10008 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10009 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10014 return atomic_update<Op, T, N>(p, byte_offset,
src0,
src1, mask, props);
10039 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
10041 __ESIMD_API std::enable_if_t<
10042 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10043 detail::is_simd_view_type_v<SrcSimdViewT> &&
10044 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10048 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
10049 "Size of src0 parameter must correspond to the size of "
10050 "byte_offset parameter.");
10051 return atomic_update<Op, T, N>(p, byte_offset,
src0.read(),
src1, props);
10076 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
10078 __ESIMD_API std::enable_if_t<
10079 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10080 detail::is_simd_view_type_v<SrcSimdViewT> &&
10081 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10084 SrcSimdViewT
src1, PropertyListT props = {}) {
10085 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
10086 "Size of src1 parameter must correspond to the size of "
10087 "byte_offset parameter.");
10088 return atomic_update<Op, T, N>(p, byte_offset,
src0,
src1.read(), props);
10113 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
10115 __ESIMD_API std::enable_if_t<
10116 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10117 detail::is_simd_view_type_v<SrcSimdViewT> &&
10118 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10121 SrcSimdViewT
src1, PropertyListT props = {}) {
10123 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
10124 "Size of src1 and src0 parameters must correspond to the size of "
10125 "byte_offset parameter.");
10126 return atomic_update<Op, T, N>(p, byte_offset,
src0.read(),
src1.read(),
10151 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
10153 __ESIMD_API std::enable_if_t<
10154 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10155 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10156 detail::is_simd_view_type_v<OffsetSimdViewT>,
10160 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0,
src1, mask,
10183 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
10186 __ESIMD_API std::enable_if_t<
10187 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10188 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10189 detail::is_simd_view_type_v<OffsetSimdViewT> &&
10190 detail::is_simd_view_type_v<SrcSimdViewT>,
10195 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
10196 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
10197 "Size of src0 and byte_offset parameters must correspond to the size of "
10198 "mask parameter.");
10199 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0.read(),
src1, mask,
10222 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
10225 __ESIMD_API std::enable_if_t<
10226 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10227 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10228 detail::is_simd_view_type_v<OffsetSimdViewT> &&
10229 detail::is_simd_view_type_v<SrcSimdViewT>,
10234 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
10235 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
10236 "Size of src1 and byte_offset parameters must correspond to the size of "
10237 "mask parameter.");
10238 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0,
src1.read(), mask,
10261 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
10264 __ESIMD_API std::enable_if_t<
10265 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10266 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10267 detail::is_simd_view_type_v<OffsetSimdViewT> &&
10268 detail::is_simd_view_type_v<SrcSimdViewT>,
10272 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
10273 N == OffsetSimdViewT::getSizeX() *
10274 OffsetSimdViewT::getSizeY(),
10275 "Size of src0, src1 and byte_offset parameters must correspond "
10277 "mask parameter.");
10278 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0.read(),
10279 src1.read(), mask, props);
10301 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
10303 __ESIMD_API std::enable_if_t<
10304 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10305 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10306 detail::is_simd_view_type_v<OffsetSimdViewT>,
10311 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0,
src1, mask,
10332 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
10335 __ESIMD_API std::enable_if_t<
10336 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10337 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10338 detail::is_simd_view_type_v<OffsetSimdViewT> &&
10339 detail::is_simd_view_type_v<SrcSimdViewT>,
10344 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
10345 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
10346 "Size of src0 and byte_offset parameters must correspond to the size of "
10347 "src1 parameter.");
10348 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0.read(),
src1,
10369 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
10372 __ESIMD_API std::enable_if_t<
10373 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10374 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10375 detail::is_simd_view_type_v<OffsetSimdViewT> &&
10376 detail::is_simd_view_type_v<SrcSimdViewT>,
10379 SrcSimdViewT
src1, PropertyListT props = {}) {
10381 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
10382 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
10383 "Size of src1 and byte_offset parameters must correspond to the size of "
10384 "src0 parameter.");
10385 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0,
src1.read(),
10406 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
10407 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
10409 __ESIMD_API std::enable_if_t<
10410 __ESIMD_DNS::get_num_args<Op>() == 2 &&
10411 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10412 detail::is_simd_view_type_v<OffsetSimdViewT> &&
10413 detail::is_simd_view_type_v<SrcSimdViewT>,
10416 SrcSimdViewT
src1, PropertyListT props = {}) {
10417 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
10418 "Size of src0, src1 and byte_offset parameters must be equal.");
10419 return atomic_update<Op, T, N>(p, byte_offset.read(),
src0.read(),
10420 src1.read(), props);
10439 template <atomic_op Op,
typename Tx,
int N,
typename Toffset>
10440 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
simd<Tx, N>>
10493 atomic_op Op,
typename T,
int N,
typename Toffset,
typename AccessorTy,
10495 __ESIMD_API std::enable_if_t<
10496 __ESIMD_DNS::get_num_args<Op>() == 0 &&
10497 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10498 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10501 PropertyListT props = {}) {
10502 #ifdef __ESIMD_FORCE_STATELESS_MEM
10503 return atomic_update<Op, T, N>(__ESIMD_DNS::accessorToPointer<T>(acc),
10504 byte_offset, mask, props);
10506 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
10508 if constexpr (detail::has_cache_hints<PropertyListT>() ||
10512 acc, byte_offset, mask);
10515 if constexpr (std::is_integral_v<T>) {
10516 return atomic_update<atomic_op::bit_or, T, N>(
10517 acc, byte_offset,
simd<T, N>(0), mask, props);
10519 using Tint = detail::uint_type_t<
sizeof(T)>;
10520 simd<Tint, N> Res = atomic_update<atomic_op::bit_or, Tint, N>(
10522 return Res.template bit_cast_view<T>();
10525 detail::check_atomic<Op, T, N, 0>();
10526 static_assert(
sizeof(Toffset) == 4,
"Only 32 bit offset is supported");
10528 static_assert(
sizeof(T) == 4,
"Only 32 bit data is supported");
10530 using Tx =
typename detail::__raw_t<T>;
10531 return __esimd_dword_atomic0<Op, Tx, N>(mask.
data(), si,
10532 byte_offset.
data());
10559 atomic_op Op,
typename T,
int N,
typename Toffset,
typename AccessorTy,
10561 __ESIMD_API std::enable_if_t<
10562 __ESIMD_DNS::get_num_args<Op>() == 0 &&
10563 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10564 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10567 PropertyListT props = {}) {
10569 return atomic_update<Op, T, N>(acc, byte_offset, mask, props);
10597 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
10598 typename AccessorTy,
10600 __ESIMD_API std::enable_if_t<
10601 __ESIMD_DNS::get_num_args<Op>() == 0 &&
10602 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10603 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10604 detail::is_simd_view_type_v<OffsetSimdViewT>,
10607 PropertyListT props = {}) {
10608 return atomic_update<Op, T, N>(acc, byte_offset.read(), mask, props);
10631 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
10632 typename AccessorTy,
10634 __ESIMD_API std::enable_if_t<
10635 __ESIMD_DNS::get_num_args<Op>() == 0 &&
10636 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10637 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10638 detail::is_simd_view_type_v<OffsetSimdViewT>,
10641 PropertyListT props = {}) {
10643 return atomic_update<Op, T, N>(acc, byte_offset.read(), mask, props);
10664 template <
atomic_op Op,
typename T,
int N,
typename Toffset,
10665 typename AccessorTy>
10667 std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
10668 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
10692 template <atomic_op Op,
typename T,
int N,
typename AccessorTy>
10694 std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
10695 __ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
10756 atomic_op Op,
typename T,
int N,
typename Toffset,
typename AccessorTy,
10758 __ESIMD_API std::enable_if_t<
10759 __ESIMD_DNS::get_num_args<Op>() == 1 &&
10760 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10761 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10765 #ifdef __ESIMD_FORCE_STATELESS_MEM
10766 return atomic_update<Op, T, N>(__ESIMD_DNS::accessorToPointer<T>(acc),
10767 byte_offset,
src0, mask, props);
10769 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
10770 static_assert(
sizeof(Toffset) == 4,
"Only 32 bit offset is supported");
10772 if constexpr (detail::has_cache_hints<PropertyListT>() ||
10778 acc, byte_offset,
src0, mask);
10780 if constexpr (std::is_integral_v<T>) {
10781 return atomic_update<atomic_op::xchg, T, N>(acc, byte_offset,
src0, mask,
10784 using Tint = detail::uint_type_t<
sizeof(T)>;
10785 simd<Tint, N> Res = atomic_update<atomic_op::xchg, Tint, N>(
10786 acc, byte_offset,
src0.template bit_cast_view<Tint>(), mask, props);
10787 return Res.template bit_cast_view<T>();
10790 detail::check_atomic<Op, T, N, 1>();
10791 static_assert(
sizeof(T) == 4,
"Only 32 bit data is supported");
10793 using Tx =
typename detail::__raw_t<T>;
10794 return __esimd_dword_atomic1<Op, Tx, N>(
10795 mask.
data(), si, byte_offset.
data(),
10835 atomic_op Op,
typename SrcSimdViewT,
typename Toffset,
10836 typename T = SrcSimdViewT::value_type::element_type,
int N,
10837 typename AccessorTy,
10839 __ESIMD_API std::enable_if_t<
10840 __ESIMD_DNS::get_num_args<Op>() == 1 &&
10841 detail::is_simd_view_type_v<SrcSimdViewT> &&
10842 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10843 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10847 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
10848 "Size of src0 parameter must correspond to the size of "
10849 "byte_offset parameter.");
10850 return atomic_update<Op, T, N>(acc, byte_offset,
src0.read(), mask, props);
10886 atomic_op Op,
typename T,
int N,
typename Toffset,
typename AccessorTy,
10888 __ESIMD_API std::enable_if_t<
10889 __ESIMD_DNS::get_num_args<Op>() == 1 &&
10890 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10891 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10894 PropertyListT props = {}) {
10896 return atomic_update<Op, T, N>(acc, byte_offset,
src0, mask, props);
10932 atomic_op Op,
typename SrcSimdViewT,
typename Toffset,
10933 typename T = SrcSimdViewT::value_type::element_type,
int N,
10934 typename AccessorTy,
10936 __ESIMD_API std::enable_if_t<
10937 __ESIMD_DNS::get_num_args<Op>() == 1 &&
10938 detail::is_simd_view_type_v<SrcSimdViewT> &&
10939 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10940 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
10943 PropertyListT props = {}) {
10944 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
10945 "Size of src0 parameter must correspond to the size of "
10946 "byte_offset parameter.");
10947 return atomic_update<Op, T, N>(acc, byte_offset,
src0.read(), props);
10983 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
10984 typename AccessorTy,
10986 __ESIMD_API std::enable_if_t<
10987 __ESIMD_DNS::get_num_args<Op>() == 1 &&
10988 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
10989 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
10990 detail::is_simd_view_type_v<OffsetSimdViewT>,
10994 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0, mask, props);
11029 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
11030 typename T = SrcSimdViewT::value_type::element_type,
11031 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11032 typename AccessorTy,
11034 __ESIMD_API std::enable_if_t<
11035 __ESIMD_DNS::get_num_args<Op>() == 1 &&
11036 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11037 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11038 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11039 detail::is_simd_view_type_v<SrcSimdViewT>,
11043 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11044 "Size of src0 parameter must correspond to the size of "
11045 "byte_offset parameter.");
11046 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0.read(), mask,
11081 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
11082 typename AccessorTy,
11084 __ESIMD_API std::enable_if_t<
11085 __ESIMD_DNS::get_num_args<Op>() == 1 &&
11086 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11087 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11088 detail::is_simd_view_type_v<OffsetSimdViewT>,
11091 PropertyListT props = {}) {
11093 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0, mask, props);
11126 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
11127 typename T = SrcSimdViewT::value_type::element_type,
11128 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11129 typename AccessorTy,
11131 __ESIMD_API std::enable_if_t<
11132 __ESIMD_DNS::get_num_args<Op>() == 1 &&
11133 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11134 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11135 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11136 detail::is_simd_view_type_v<SrcSimdViewT>,
11139 PropertyListT props = {}) {
11140 static_assert(N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11141 "Size of src0 parameter must correspond to the size of "
11142 "byte_offset parameter.");
11143 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0.read(), props);
11167 template <
atomic_op Op,
typename T,
int N,
typename Toffset,
11168 typename AccessorTy>
11169 __ESIMD_API std::enable_if_t<
11170 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11197 template <atomic_op Op,
typename Tx,
int N,
typename AccessorTy>
11198 __ESIMD_API std::enable_if_t<
11199 __ESIMD_DNS::is_rw_local_accessor_v<AccessorTy> &&
11261 atomic_op Op,
typename T,
int N,
typename Toffset,
typename AccessorTy,
11263 __ESIMD_API std::enable_if_t<
11264 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11265 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11266 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11270 #ifdef __ESIMD_FORCE_STATELESS_MEM
11271 return atomic_update<Op, T, N>(__ESIMD_DNS::accessorToPointer<T>(acc),
11272 byte_offset,
src0,
src1, mask, props);
11274 static_assert(std::is_integral_v<Toffset>,
"Unsupported offset type");
11275 static_assert(
sizeof(Toffset) == 4,
"Only 32 bit offset is supported");
11279 if constexpr (detail::has_cache_hints<PropertyListT>() ||
11287 acc, byte_offset,
src1,
src0, mask);
11289 detail::check_atomic<Op, T, N, 2>();
11290 static_assert(
sizeof(T) == 4,
"Only 32 bit data is supported");
11292 using Tx =
typename detail::__raw_t<T>;
11293 return __esimd_dword_atomic2<Op, Tx, N>(
11294 mask.
data(), si, byte_offset.
data(),
11334 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
11335 typename AccessorTy,
11337 __ESIMD_API std::enable_if_t<
11338 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11339 detail::is_simd_view_type_v<SrcSimdViewT> &&
11340 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11341 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11345 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11346 "Size of src0 parameter must correspond to the size of "
11347 "byte_offset parameter.");
11348 return atomic_update<Op, T, N>(acc, byte_offset,
src0.read(),
src1, mask,
11385 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
11386 typename AccessorTy,
11388 __ESIMD_API std::enable_if_t<
11389 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11390 detail::is_simd_view_type_v<SrcSimdViewT> &&
11391 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11392 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11396 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11397 "Size of src1 parameter must correspond to the size of "
11398 "byte_offset parameter.");
11399 return atomic_update<Op, T, N>(acc, byte_offset,
src0,
src1.read(), mask,
11437 typename T = SrcSimdViewT::value_type::element_type,
int N,
11438 typename Toffset,
typename AccessorTy,
11440 __ESIMD_API std::enable_if_t<
11441 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11442 detail::is_simd_view_type_v<SrcSimdViewT> &&
11443 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11444 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11449 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11450 "Size of src0 and src1 parameters must correspond to the size of "
11451 "byte_offset parameter.");
11452 return atomic_update<Op, T, N>(acc, byte_offset,
src0.read(),
src1.read(),
11478 atomic_op Op,
typename T,
int N,
typename Toffset,
typename AccessorTy,
11480 __ESIMD_API std::enable_if_t<
11481 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11482 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11483 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11488 return atomic_update<Op, T, N>(acc, byte_offset,
src0,
src1, mask, props);
11524 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
11525 typename AccessorTy,
11527 __ESIMD_API std::enable_if_t<
11528 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11529 detail::is_simd_view_type_v<SrcSimdViewT> &&
11530 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11531 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11535 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11536 "Size of src0 parameter must correspond to the size of "
11537 "byte_offset parameter.");
11538 return atomic_update<Op, T, N>(acc, byte_offset,
src0.read(),
src1, props);
11574 atomic_op Op,
typename SrcSimdViewT,
typename T,
int N,
typename Toffset,
11575 typename AccessorTy,
11577 __ESIMD_API std::enable_if_t<
11578 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11579 detail::is_simd_view_type_v<SrcSimdViewT> &&
11580 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11581 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11584 SrcSimdViewT
src1, PropertyListT props = {}) {
11585 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11586 "Size of src1 parameter must correspond to the size of "
11587 "byte_offset parameter.");
11588 return atomic_update<Op, T, N>(acc, byte_offset,
src0,
src1.read(), props);
11625 typename T = SrcSimdViewT::value_type::element_type,
int N,
11626 typename Toffset,
typename AccessorTy,
11628 __ESIMD_API std::enable_if_t<
11629 __ESIMD_DNS::get_num_args<Op>() == 2 && std::is_integral_v<Toffset> &&
11630 detail::is_simd_view_type_v<SrcSimdViewT> &&
11631 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11632 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
11635 SrcSimdViewT
src1, PropertyListT props = {}) {
11637 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11638 "Size of src0 and src1 parameters must correspond to the size of "
11639 "byte_offset parameter.");
11640 return atomic_update<Op, T, N>(acc, byte_offset,
src0.read(),
src1.read(),
11668 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
11669 typename AccessorTy,
11671 __ESIMD_API std::enable_if_t<
11672 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11673 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11674 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11675 detail::is_simd_view_type_v<OffsetSimdViewT>,
11679 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0,
src1, mask,
11708 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
11709 int N,
typename AccessorTy,
11711 __ESIMD_API std::enable_if_t<
11712 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11713 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11714 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11715 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11716 detail::is_simd_view_type_v<SrcSimdViewT>,
11721 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
11722 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11723 "Size of src0 and byte_offset parameters must correspond to the size of "
11724 "src1 parameter.");
11725 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0.read(),
src1,
11754 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
11755 int N,
typename AccessorTy,
11757 __ESIMD_API std::enable_if_t<
11758 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11759 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11760 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11761 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11762 detail::is_simd_view_type_v<SrcSimdViewT>,
11767 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
11768 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11769 "Size of src1 and byte_offset parameters must correspond to the size of "
11770 "src0 parameter.");
11771 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0,
src1.read(),
11800 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
11801 typename T = SrcSimdViewT::value_type::element_type,
int N,
11802 typename AccessorTy,
11804 __ESIMD_API std::enable_if_t<
11805 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11806 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11807 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11808 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11809 detail::is_simd_view_type_v<SrcSimdViewT>,
11813 static_assert(N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
11814 N == OffsetSimdViewT::getSizeX() *
11815 OffsetSimdViewT::getSizeY(),
11816 "Size of src0, src1 and byte_offset parameters must correspond "
11818 "mask parameter.");
11819 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0.read(),
11820 src1.read(), mask, props);
11845 atomic_op Op,
typename T,
int N,
typename OffsetSimdViewT,
11846 typename AccessorTy,
11848 __ESIMD_API std::enable_if_t<
11849 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11850 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11851 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11852 detail::is_simd_view_type_v<OffsetSimdViewT>,
11857 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0,
src1, mask,
11884 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
11885 int N,
typename AccessorTy,
11887 __ESIMD_API std::enable_if_t<
11888 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11889 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11890 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11891 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11892 detail::is_simd_view_type_v<SrcSimdViewT>,
11897 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
11898 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11899 "Size of src0 and byte_offset parameters must correspond to the size of "
11900 "src1 parameter.");
11901 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0.read(),
src1,
11928 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
typename T,
11929 int N,
typename AccessorTy,
11931 __ESIMD_API std::enable_if_t<
11932 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11933 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11934 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11935 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11936 detail::is_simd_view_type_v<SrcSimdViewT>,
11939 SrcSimdViewT
src1, PropertyListT props = {}) {
11941 N == SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY() &&
11942 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11943 "Size of src1 and byte_offset parameters must correspond to the size of "
11944 "src0 parameter.");
11945 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0,
src1.read(),
11972 atomic_op Op,
typename SrcSimdViewT,
typename OffsetSimdViewT,
11973 typename T = SrcSimdViewT::value_type::element_type,
11974 int N = SrcSimdViewT::getSizeX() * SrcSimdViewT::getSizeY(),
11975 typename AccessorTy,
11977 __ESIMD_API std::enable_if_t<
11978 __ESIMD_DNS::get_num_args<Op>() == 2 &&
11979 __ESIMD_DNS::is_rw_device_accessor_v<AccessorTy> &&
11980 ext::oneapi::experimental::is_property_list_v<PropertyListT> &&
11981 detail::is_simd_view_type_v<OffsetSimdViewT> &&
11982 detail::is_simd_view_type_v<SrcSimdViewT>,
11985 SrcSimdViewT
src1, PropertyListT props = {}) {
11987 N == OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
11988 "Size of src0, src1 and byte_offset parameters must correspond.");
11989 return atomic_update<Op, T, N>(acc, byte_offset.read(),
src0.read(),
11990 src1.read(), props);
12013 template <
atomic_op Op,
typename Tx,
int N,
typename Toffset,
12014 typename AccessorTy>
12015 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
12040 template <atomic_op Op,
typename Tx,
int N,
typename AccessorTy>
12041 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
12076 template <u
int8_t cntl> __ESIMD_API
void fence() { __esimd_fence(cntl); }
12091 "SLM fence must have 'none' lsc_fence_op and 'group' scope");
12092 constexpr
int N = 16;
12094 __esimd_lsc_fence<static_cast<uint8_t>(Kind),
static_cast<uint8_t
>(FenceOp),
12095 static_cast<uint8_t
>(Scope), N>(Mask.
data());
12127 template <
typename T,
int m,
int N,
typename AccessorTy,
unsigned plane = 0>
12130 constexpr
unsigned Width = N *
sizeof(T);
12131 static_assert(Width * m <= 256u,
12132 "data does not fit into a single dataport transaction");
12133 static_assert(Width <= 64u,
"valid block width is in range [1, 64]");
12134 static_assert(m <= 64u,
"valid block height is in range [1, 64]");
12135 static_assert(plane <= 3u,
"valid plane index is in range [0, 3]");
12139 using SurfIndTy = decltype(si);
12140 constexpr
unsigned int RoundedWidth =
12141 Width < 4 ? 4 : detail::getNextPowerOf2<Width>();
12142 constexpr
int BlockWidth =
sizeof(T) * N;
12143 constexpr
int Mod = 0;
12145 if constexpr (Width < RoundedWidth) {
12146 constexpr
unsigned int n1 = RoundedWidth /
sizeof(T);
12148 __esimd_media_ld<T, m, n1, Mod, SurfIndTy, (int)plane, BlockWidth>(
12150 return temp.template select<m, 1, N, 1>(0, 0);
12152 return __esimd_media_ld<T, m, N, Mod, SurfIndTy, (int)plane, BlockWidth>(
12169 template <
typename T,
int m,
int N,
typename AccessorTy,
unsigned plane = 0>
12172 constexpr
unsigned Width = N *
sizeof(T);
12173 static_assert(Width * m <= 256u,
12174 "data does not fit into a single dataport transaction");
12175 static_assert(Width <= 64u,
"valid block width is in range [1, 64]");
12176 static_assert(m <= 64u,
"valid block height is in range [1, 64]");
12177 static_assert(plane <= 3u,
"valid plane index is in range [0, 3]");
12179 using SurfIndTy = decltype(si);
12180 constexpr
unsigned int RoundedWidth =
12181 Width < 4 ? 4 : detail::getNextPowerOf2<Width>();
12182 constexpr
unsigned int n1 = RoundedWidth /
sizeof(T);
12183 constexpr
int BlockWidth =
sizeof(T) * N;
12184 constexpr
int Mod = 0;
12186 if constexpr (Width < RoundedWidth) {
12188 auto temp_ref = temp.template bit_cast_view<T, m, n1>();
12189 auto vals_ref = vals.template bit_cast_view<T, m, N>();
12190 temp_ref.template select<m, 1, N, 1>() = vals_ref;
12191 __esimd_media_st<T, m, n1, Mod, SurfIndTy, plane, BlockWidth>(si,
x,
y,
12194 __esimd_media_st<T, m, N, Mod, SurfIndTy, plane, BlockWidth>(si,
x,
y,
12217 template <
typename T,
int N,
typename AccessorTy,
12220 std::enable_if_t<detail::is_local_accessor_with_v<
12221 AccessorTy, detail::accessor_mode_cap::can_read> &&
12222 is_simd_flag_type_v<Flags>,
12225 return slm_block_load<T, N>(byte_offset + detail::localAccessorToOffset(acc),
12246 template <
typename T,
int N,
typename AccessorT,
typename Flags>
12248 std::enable_if_t<detail::is_local_accessor_with_v<
12249 AccessorT, detail::accessor_mode_cap::can_write> &&
12250 is_simd_flag_type_v<Flags>>
12252 slm_block_store<T, N>(offset + __ESIMD_DNS::localAccessorToOffset(acc), vals,
12328 typename T,
int N,
int VS,
typename AccessorT,
12330 __ESIMD_API std::enable_if_t<
12331 (detail::is_local_accessor_with_v<AccessorT,
12332 detail::accessor_mode_cap::can_read> &&
12333 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12337 return slm_gather<T, N, VS>(byte_offsets +
12338 __ESIMD_DNS::localAccessorToOffset(acc),
12339 mask, pass_thru, props);
12373 typename T,
int N,
int VS,
typename AccessorT,
12375 __ESIMD_API std::enable_if_t<
12376 (detail::is_local_accessor_with_v<AccessorT,
12377 detail::accessor_mode_cap::can_read> &&
12378 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12382 return slm_gather<T, N, VS>(
12383 byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), mask, props);
12411 typename T,
int N,
int VS,
typename AccessorT,
12413 __ESIMD_API std::enable_if_t<
12414 (detail::is_local_accessor_with_v<AccessorT,
12415 detail::accessor_mode_cap::can_read> &&
12416 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12419 PropertyListT props = {}) {
12420 return slm_gather<T, N, VS>(
12421 byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), props);
12438 typename T,
int N,
typename AccessorT,
typename MaskT,
12440 __ESIMD_API std::enable_if_t<
12441 (detail::is_local_accessor_with_v<AccessorT,
12442 detail::accessor_mode_cap::can_read> &&
12443 std::is_same_v<MaskT, simd_mask<N>> &&
12444 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12447 simd<T, N> pass_thru, PropertyListT props = {}) {
12448 return slm_gather<T, N>(byte_offsets +
12449 __ESIMD_DNS::localAccessorToOffset(acc),
12450 mask, pass_thru, props);
12465 typename T,
int N,
typename AccessorT,
typename MaskT,
12467 __ESIMD_API std::enable_if_t<
12468 (detail::is_local_accessor_with_v<AccessorT,
12469 detail::accessor_mode_cap::can_read> &&
12470 std::is_same_v<MaskT, simd_mask<N>> &&
12471 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12474 PropertyListT props = {}) {
12475 return slm_gather<T, N>(
12476 byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), mask, props);
12487 typename T,
int N,
typename AccessorT,
12489 __ESIMD_API std::enable_if_t<
12490 (detail::is_local_accessor_with_v<AccessorT,
12491 detail::accessor_mode_cap::can_read> &&
12492 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12495 PropertyListT props = {}) {
12496 return slm_gather<T, N>(
12497 byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc), props);
12509 typename T,
int N,
int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
12511 __ESIMD_API std::enable_if_t<
12512 (detail::is_local_accessor_with_v<AccessorT,
12513 detail::accessor_mode_cap::can_read> &&
12514 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12515 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12518 simd<T, N> pass_thru, PropertyListT props = {}) {
12519 return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
12530 int VS,
typename T,
int N,
typename AccessorT,
typename OffsetSimdViewT,
12532 __ESIMD_API std::enable_if_t<
12533 (detail::is_local_accessor_with_v<AccessorT,
12534 detail::accessor_mode_cap::can_read> &&
12535 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12536 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12539 simd<T, N> pass_thru, PropertyListT props = {}) {
12540 static_assert(N / VS ==
12541 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
12542 "Size of pass_thru parameter must correspond to the size of "
12543 "byte_offsets parameter.");
12544 return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
12558 int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
12559 typename PassThruSimdViewT,
12560 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
12561 typename T = PassThruSimdViewT::value_type::element_type,
12563 __ESIMD_API std::enable_if_t<
12564 (detail::is_local_accessor_with_v<AccessorT,
12565 detail::accessor_mode_cap::can_read> &&
12566 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12567 detail::is_simd_view_type_v<PassThruSimdViewT> &&
12568 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12571 PassThruSimdViewT pass_thru, PropertyListT props = {}) {
12572 static_assert(N / VS ==
12573 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
12574 "Size of pass_thru parameter must correspond to the size of "
12575 "byte_offsets parameter.");
12576 return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru.read(),
12591 int VS = 1,
typename AccessorT,
typename PassThruSimdViewT,
12592 int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
12593 typename T = PassThruSimdViewT::value_type::element_type,
12595 __ESIMD_API std::enable_if_t<
12596 (detail::is_local_accessor_with_v<AccessorT,
12597 detail::accessor_mode_cap::can_read> &&
12598 detail::is_simd_view_type_v<PassThruSimdViewT> &&
12599 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12603 PropertyListT props = {}) {
12604 return gather<T, N, VS>(acc, byte_offsets, mask, pass_thru.read(), props);
12616 typename T,
int N,
int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
12618 __ESIMD_API std::enable_if_t<
12619 (detail::is_local_accessor_with_v<AccessorT,
12620 detail::accessor_mode_cap::can_read> &&
12621 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12622 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12625 PropertyListT props = {}) {
12626 return gather<T, N, VS>(acc, byte_offsets.read(), mask, props);
12637 typename T,
int N,
int VS = 1,
typename AccessorT,
typename OffsetSimdViewT,
12639 __ESIMD_API std::enable_if_t<
12640 (detail::is_local_accessor_with_v<AccessorT,
12641 detail::accessor_mode_cap::can_read> &&
12642 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12643 ext::oneapi::experimental::is_property_list_v<PropertyListT>),
12645 gather(AccessorT acc, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
12646 return gather<T, N, VS>(acc, byte_offsets.read(), props);
12666 template <
typename T,
int N,
typename AccessorTy>
12668 std::enable_if_t<detail::is_local_accessor_with_v<
12669 AccessorTy, detail::accessor_mode_cap::can_read>,
12673 return slm_gather<T, N>(
12674 offsets + glob_offset + __ESIMD_DNS::localAccessorToOffset(acc), mask);
12740 typename T,
int N,
int VS = 1,
typename AccessorT,
12742 __ESIMD_API std::enable_if_t<
12743 detail::is_local_accessor_with_v<AccessorT,
12744 detail::accessor_mode_cap::can_write> &&
12745 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12748 slm_scatter<T, N, VS>(byte_offsets + __ESIMD_DNS::localAccessorToOffset(acc),
12749 vals, mask, props);
12775 typename T,
int N,
int VS = 1,
typename AccessorT,
12777 __ESIMD_API std::enable_if_t<
12778 detail::is_local_accessor_with_v<AccessorT,
12779 detail::accessor_mode_cap::can_write> &&
12780 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12782 PropertyListT props = {}) {
12784 scatter<T, N, VS>(acc, byte_offsets, vals, Mask, props);
12815 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
typename AccessorT,
12817 __ESIMD_API std::enable_if_t<
12818 detail::is_local_accessor_with_v<AccessorT,
12819 detail::accessor_mode_cap::can_write> &&
12820 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12821 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12824 scatter<T, N, VS>(acc, byte_offsets.read(), vals, mask, props);
12853 int VS,
typename AccessorTy,
typename T,
int N,
typename OffsetSimdViewT,
12855 __ESIMD_API std::enable_if_t<
12856 detail::is_local_accessor_with_v<AccessorTy,
12857 detail::accessor_mode_cap::can_write> &&
12858 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12859 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12862 static_assert(N / VS ==
12863 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
12864 "Size of vals parameter must correspond to the size of "
12865 "byte_offsets parameter.");
12866 scatter<T, N, VS>(acc, byte_offsets.read(), vals, mask, props);
12890 int VS,
typename AccessorTy,
typename T,
int N,
typename OffsetSimdViewT,
12892 __ESIMD_API std::enable_if_t<
12893 detail::is_local_accessor_with_v<AccessorTy,
12894 detail::accessor_mode_cap::can_write> &&
12895 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12896 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12898 PropertyListT props = {}) {
12899 static_assert(N / VS ==
12900 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
12901 "Size of vals parameter must correspond to the size of "
12902 "byte_offsets parameter.");
12903 scatter<T, N, VS>(acc, byte_offsets.read(), vals, props);
12935 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
12936 typename OffsetSimdViewT,
12937 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
12938 typename T = ValuesSimdViewT::value_type::element_type,
12940 __ESIMD_API std::enable_if_t<
12941 detail::is_local_accessor_with_v<AccessorTy,
12942 detail::accessor_mode_cap::can_write> &&
12943 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12944 detail::is_simd_view_type_v<ValuesSimdViewT> &&
12945 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12946 scatter(AccessorTy acc, OffsetSimdViewT byte_offsets, ValuesSimdViewT vals,
12948 static_assert(N / VS ==
12949 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
12950 "Size of vals parameter must correspond to the size of "
12951 "byte_offsets parameter.");
12952 scatter<T, N, VS>(acc, byte_offsets.read(), vals.read(), mask, props);
12979 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
12980 typename OffsetSimdViewT,
12981 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
12982 typename T = ValuesSimdViewT::value_type::element_type,
12984 __ESIMD_API std::enable_if_t<
12985 detail::is_local_accessor_with_v<AccessorTy,
12986 detail::accessor_mode_cap::can_write> &&
12987 detail::is_simd_view_type_v<OffsetSimdViewT> &&
12988 detail::is_simd_view_type_v<ValuesSimdViewT> &&
12989 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
12990 scatter(AccessorTy acc, OffsetSimdViewT byte_offsets, ValuesSimdViewT vals,
12991 PropertyListT props = {}) {
12992 static_assert(N / VS ==
12993 OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
12994 "Size of vals parameter must correspond to the size of "
12995 "byte_offsets parameter.");
12996 scatter<T, N, VS>(acc, byte_offsets.read(), vals.read(), props);
13028 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
typename OffsetT,
13029 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
13030 typename T = ValuesSimdViewT::value_type::element_type,
13032 __ESIMD_API std::enable_if_t<
13033 detail::is_local_accessor_with_v<AccessorTy,
13034 detail::accessor_mode_cap::can_write> &&
13035 detail::is_simd_view_type_v<ValuesSimdViewT> &&
13036 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13039 PropertyListT props = {}) {
13040 scatter<T, N, VS>(acc, byte_offsets, vals.read(), mask, props);
13067 int VS = 1,
typename AccessorTy,
typename ValuesSimdViewT,
typename OffsetT,
13068 int N = ValuesSimdViewT::getSizeX() * ValuesSimdViewT::getSizeY(),
13069 typename T = ValuesSimdViewT::value_type::element_type,
13071 __ESIMD_API std::enable_if_t<
13072 detail::is_local_accessor_with_v<AccessorTy,
13073 detail::accessor_mode_cap::can_write> &&
13074 detail::is_simd_view_type_v<ValuesSimdViewT> &&
13075 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13077 ValuesSimdViewT vals, PropertyListT props = {}) {
13078 scatter<T, N, VS>(acc, byte_offsets, vals.read(), props);
13106 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
typename AccessorT,
13108 __ESIMD_API std::enable_if_t<
13109 detail::is_local_accessor_with_v<AccessorT,
13110 detail::accessor_mode_cap::can_write> &&
13111 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13112 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13114 PropertyListT props = {}) {
13116 scatter<T, N, VS>(acc, byte_offsets.read(), vals, Mask, props);
13137 template <
typename T,
int N,
typename AccessorTy>
13138 __ESIMD_API std::enable_if_t<detail::is_local_accessor_with_v<
13139 AccessorTy, detail::accessor_mode_cap::can_write>>
13142 slm_scatter<T, N>(offsets + glob_offset +
13143 __ESIMD_DNS::localAccessorToOffset(acc),
13211 typename T,
int N,
int VS,
typename OffsetT,
13213 __ESIMD_API std::enable_if_t<
13214 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13216 PropertyListT props = {}) {
13217 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
13219 PropertyListT>(p, byte_offsets, mask);
13239 typename T,
int N,
int VS,
typename OffsetT,
13241 __ESIMD_API std::enable_if_t<
13242 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13244 PropertyListT props = {}) {
13269 typename T,
int N,
typename OffsetT,
13271 __ESIMD_API std::enable_if_t<
13272 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13274 PropertyListT props = {}) {
13275 constexpr
int VS = 1;
13294 typename T,
int N,
typename OffsetT,
13296 __ESIMD_API std::enable_if_t<
13297 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13299 constexpr
int VS = 1;
13324 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
13326 __ESIMD_API std::enable_if_t<
13327 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13328 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13330 PropertyListT props = {}) {
13352 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
13354 __ESIMD_API std::enable_if_t<
13355 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13356 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13357 prefetch(
const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
13382 int VS = 1,
typename OffsetSimdViewT,
typename T,
13383 int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
13385 __ESIMD_API std::enable_if_t<
13386 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13387 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13389 PropertyListT props = {}) {
13390 prefetch<T, N, VS>(p, byte_offsets.read(), mask, props);
13411 int VS = 1,
typename OffsetSimdViewT,
typename T,
13412 int N = OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY() * VS,
13414 __ESIMD_API std::enable_if_t<
13415 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13416 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13417 prefetch(
const T *p, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
13418 prefetch<T, N, VS>(p, byte_offsets.read(), props);
13448 typename T,
int VS = 1,
typename OffsetT,
13450 __ESIMD_API std::enable_if_t<
13451 std::is_integral_v<OffsetT> &&
13452 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13454 PropertyListT props = {}) {
13456 PropertyListT>(p, byte_offset, mask);
13475 typename T,
int VS = 1,
typename OffsetT,
13477 __ESIMD_API std::enable_if_t<
13478 std::is_integral_v<OffsetT> &&
13479 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13480 prefetch(
const T *p, OffsetT byte_offset, PropertyListT props = {}) {
13502 typename T,
int VS = 1,
13504 __ESIMD_API std::enable_if_t<
13505 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13523 typename T,
int VS = 1,
13525 __ESIMD_API std::enable_if_t<
13526 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13597 typename T,
int N,
int VS,
typename AccessorT,
typename OffsetT,
13599 __ESIMD_API std::enable_if_t<
13600 detail::is_device_accessor_with_v<AccessorT,
13601 detail::accessor_mode_cap::can_read> &&
13602 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13605 #ifdef __ESIMD_FORCE_STATELESS_MEM
13609 static_assert(N / VS >= 1 && N % VS == 0,
"N must be divisible by VS");
13611 PropertyListT>(acc, byte_offsets, mask);
13633 typename T,
int N,
int VS,
typename AccessorT,
typename OffsetT,
13635 __ESIMD_API std::enable_if_t<
13636 detail::is_device_accessor_with_v<AccessorT,
13637 detail::accessor_mode_cap::can_read> &&
13638 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13640 PropertyListT props = {}) {
13666 typename T,
int N,
typename AccessorT,
typename OffsetT,
13668 __ESIMD_API std::enable_if_t<
13669 detail::is_device_accessor_with_v<AccessorT,
13670 detail::accessor_mode_cap::can_read> &&
13671 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13673 PropertyListT props = {}) {
13674 constexpr
int VS = 1;
13694 typename T,
int N,
typename AccessorT,
typename OffsetT,
13696 __ESIMD_API std::enable_if_t<
13697 detail::is_device_accessor_with_v<AccessorT,
13698 detail::accessor_mode_cap::can_read> &&
13699 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13701 PropertyListT props = {}) {
13702 constexpr
int VS = 1;
13728 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
typename AccessorT,
13730 __ESIMD_API std::enable_if_t<
13731 detail::is_device_accessor_with_v<AccessorT,
13732 detail::accessor_mode_cap::can_read> &&
13733 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13734 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13736 PropertyListT props = {}) {
13759 typename T,
int N,
int VS = 1,
typename OffsetSimdViewT,
typename AccessorT,
13761 __ESIMD_API std::enable_if_t<
13762 detail::is_device_accessor_with_v<AccessorT,
13763 detail::accessor_mode_cap::can_read> &&
13764 detail::is_simd_view_type_v<OffsetSimdViewT> &&
13765 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13767 PropertyListT props = {}) {
13797 typename T,
int VS = 1,
typename AccessorT,
typename OffsetT,
13799 __ESIMD_API std::enable_if_t<
13800 std::is_integral_v<OffsetT> &&
13801 detail::is_device_accessor_with_v<AccessorT,
13802 detail::accessor_mode_cap::can_read> &&
13803 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13805 PropertyListT props = {}) {
13806 #ifdef __ESIMD_FORCE_STATELESS_MEM
13807 prefetch<T, VS>(detail::accessorToPointer<T>(acc), byte_offset, mask, props);
13810 PropertyListT>(acc, byte_offset, mask);
13830 typename T,
int VS = 1,
typename AccessorT,
typename OffsetT,
13832 __ESIMD_API std::enable_if_t<
13833 std::is_integral_v<OffsetT> &&
13834 detail::is_device_accessor_with_v<AccessorT,
13835 detail::accessor_mode_cap::can_read> &&
13836 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13837 prefetch(AccessorT acc, OffsetT byte_offset, PropertyListT props = {}) {
13859 typename T,
int VS = 1,
typename AccessorT,
13861 __ESIMD_API std::enable_if_t<
13862 detail::is_device_accessor_with_v<AccessorT,
13863 detail::accessor_mode_cap::can_read> &&
13864 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13882 typename T,
int VS = 1,
typename AccessorT,
13884 __ESIMD_API std::enable_if_t<
13885 detail::is_device_accessor_with_v<AccessorT,
13886 detail::accessor_mode_cap::can_read> &&
13887 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13933 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
13934 bool Transposed =
false,
bool Transformed =
false,
13936 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>(),
13938 __ESIMD_API std::enable_if_t<
13939 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
simd<T, N>>
13940 load_2d(
const T *Ptr,
unsigned SurfaceWidth,
unsigned SurfaceHeight,
13941 unsigned SurfacePitch,
int X,
int Y, PropertyListT props = {}) {
13942 return detail::load_2d_impl<T, BlockWidth, BlockHeight, NBlocks, Transposed,
13943 Transformed, PropertyListT>(
13944 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
13976 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
13978 T, NBlocks, BlockHeight, BlockWidth,
false ,
13981 __ESIMD_API std::enable_if_t<
13982 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
13984 unsigned SurfacePitch,
int X,
int Y, PropertyListT props = {}) {
13985 detail::prefetch_2d_impl<T, BlockWidth, BlockHeight, NBlocks, PropertyListT>(
13986 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
14012 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
14014 T, 1u, BlockHeight, BlockWidth,
false ,
14017 __ESIMD_API std::enable_if_t<
14018 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14019 store_2d(T *Ptr,
unsigned SurfaceWidth,
unsigned SurfaceHeight,
14020 unsigned SurfacePitch,
int X,
int Y,
simd<T, N> Vals,
14021 PropertyListT props = {}) {
14022 detail::store_2d_impl<T, BlockWidth, BlockHeight, PropertyListT>(
14023 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
14051 typename AccessorT,
int N,
14054 std::enable_if_t<detail::is_local_accessor_with_v<
14055 AccessorT, detail::accessor_mode_cap::can_read>,
14059 return slm_gather_rgba<T, N, RGBAMask>(
14060 offsets + global_offset + __ESIMD_DNS::localAccessorToOffset(acc), mask);
14080 typename AccessorT,
int N,
14082 __ESIMD_API std::enable_if_t<detail::is_local_accessor_with_v<
14083 AccessorT, detail::accessor_mode_cap::can_write>>
14087 detail::validate_rgba_write_channel_mask<RGBAMask>();
14088 slm_scatter_rgba<T, N, RGBAMask>(offsets + global_offset +
14089 __ESIMD_DNS::localAccessorToOffset(acc),
14116 template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0, uint8_t num_src1,
14119 typename T2,
int n2,
typename T3,
int n3>
14120 __ESIMD_API __ESIMD_NS::simd<T1, n1>
14121 raw_sends(__ESIMD_NS::simd<T1, n1> msg_dst, __ESIMD_NS::simd<T2, n2> msg_src0,
14122 __ESIMD_NS::simd<T3, n3> msg_src1, uint32_t ex_desc,
14123 uint32_t msg_desc, __ESIMD_NS::simd_mask<exec_size> mask = 1) {
14124 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
14125 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
14126 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
14127 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msg_src0");
14128 constexpr
unsigned _Width3 = n3 *
sizeof(T3);
14129 static_assert(_Width3 % 32 == 0,
"Invalid size for raw send msg_src1");
14131 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
14132 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
14133 using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
14135 constexpr uint8_t modifier =
14138 return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, exec_size>(
14139 modifier, exec_size, mask.
data(), num_src0, num_src1, num_dst, sfid,
14140 ex_desc, msg_desc, msg_src0.data(), msg_src1.data(), msg_dst.data());
14160 template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0, uint8_t num_dst,
14163 typename T2,
int n2>
14164 __ESIMD_API __ESIMD_NS::simd<T1, n1>
14165 raw_send(__ESIMD_NS::simd<T1, n1> msg_dst, __ESIMD_NS::simd<T2, n2> msg_src0,
14166 uint32_t ex_desc, uint32_t msg_desc,
14167 __ESIMD_NS::simd_mask<exec_size> mask = 1) {
14168 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
14169 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
14170 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
14171 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msg_src0");
14173 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
14174 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
14176 constexpr uint8_t modifier =
14178 return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, exec_size>(
14179 modifier, exec_size, mask.
data(), num_src0, num_dst, sfid, ex_desc,
14180 msg_desc, msg_src0.data(), msg_dst.data());
14200 template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0, uint8_t num_src1,
14203 typename T2,
int n2>
14205 __ESIMD_NS::simd<T2, n2> msg_src1, uint32_t ex_desc,
14207 __ESIMD_NS::simd_mask<exec_size> mask = 1) {
14208 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
14209 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msg_src0");
14210 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
14211 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msg_src1");
14213 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
14214 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
14216 constexpr uint8_t modifier =
14218 __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, exec_size>(
14219 modifier, exec_size, mask.
data(), num_src0, num_src1, sfid, ex_desc,
14220 msg_desc, msg_src0.data(), msg_src1.data());
14238 template <uint8_t exec_size, uint8_t sfid, uint8_t num_src0,
14241 __ESIMD_API
void raw_send(__ESIMD_NS::simd<T1, n1> msg_src0, uint32_t ex_desc,
14243 __ESIMD_NS::simd_mask<exec_size> mask = 1) {
14244 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
14245 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msg_src0");
14246 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
14247 constexpr uint8_t modifier =
14249 __esimd_raw_send2_noresult<ElemT1, n1, exec_size>(
14250 modifier, exec_size, mask.
data(), num_src0, sfid, ex_desc, msg_desc,
14267 __esimd_nbarrier(0 ,
id, 0 );
14275 __esimd_nbarrier_init(NbarCount);
14292 template <
bool Fence = true>
14295 uint32_t num_producers, uint32_t num_consumers) {
14296 if constexpr (Fence)
14299 __esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers,
14312 template <
typename T,
int N,
class T1,
class SFINAE>
14313 template <
int ChunkSize,
typename PropertyListT>
14314 std::enable_if_t<ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14317 PropertyListT) SYCL_ESIMD_FUNCTION {
14319 constexpr
unsigned Size =
sizeof(T) * N;
14320 constexpr
size_t Align =
14321 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(UT));
14323 constexpr
unsigned BlockSize = OperandSize::OWORD * 8;
14324 constexpr
unsigned NumBlocks = Size / BlockSize;
14325 constexpr
unsigned RemSize = Size % BlockSize;
14327 if constexpr (Align >= OperandSize::DWORD && Size % OperandSize::OWORD == 0 &&
14329 if constexpr (NumBlocks > 0) {
14330 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14331 ForHelper<NumBlocks>::unroll([BlockN, Addr,
this](
unsigned Block) {
14332 select<BlockN, 1>(Block * BlockN) =
14333 block_load<UT, BlockN>(Addr + (Block * BlockN), PropertyListT{});
14336 if constexpr (RemSize > 0) {
14337 constexpr
unsigned RemN = RemSize /
sizeof(T);
14338 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14339 select<RemN, 1>(NumBlocks * BlockN) =
14340 block_load<UT, RemN>(Addr + (NumBlocks * BlockN), PropertyListT{});
14342 }
else if constexpr (
sizeof(T) == 8) {
14345 bit_cast_view<int32_t>() = BC;
14347 constexpr
unsigned NumChunks = N / ChunkSize;
14348 if constexpr (NumChunks > 0) {
14350 ForHelper<NumChunks>::unroll([Addr, &Offsets,
this](
unsigned Block) {
14351 select<ChunkSize, 1>(Block * ChunkSize) = gather<UT, ChunkSize>(
14352 Addr + (Block * ChunkSize), Offsets, PropertyListT{});
14355 constexpr
unsigned RemN = N % ChunkSize;
14356 if constexpr (RemN > 0) {
14357 if constexpr (RemN == 1) {
14358 select<1, 1>(NumChunks * ChunkSize) = Addr[NumChunks * ChunkSize];
14359 }
else if constexpr (RemN == 8 || RemN == 16) {
14361 select<RemN, 1>(NumChunks * ChunkSize) = gather<UT, RemN>(
14362 Addr + (NumChunks * ChunkSize), Offsets, PropertyListT{});
14364 constexpr
int N1 = RemN < 8 ? 8 : RemN < 16 ? 16 : 32;
14365 simd_mask_type<N1> Pred(0);
14366 Pred.template select<RemN, 1>() = 1;
14368 simd<UT, N1> Vals = gather<UT, N1>(Addr + (NumChunks * ChunkSize),
14369 Offsets, Pred, PropertyListT{});
14370 select<RemN, 1>(NumChunks * ChunkSize) =
14371 Vals.template select<RemN, 1>();
14377 template <
typename T,
int N,
class T1,
class SFINAE>
14378 template <
typename Flags,
int ChunkSize>
14379 std::enable_if_t<is_simd_flag_type_v<Flags>>
14382 Flags) SYCL_ESIMD_FUNCTION {
14383 constexpr
unsigned Align = Flags::template alignment<T1>;
14384 copy_from<ChunkSize>(Addr, properties{alignment<Align>});
14387 template <
typename T,
int N,
class T1,
class SFINAE>
14388 template <
int ChunkSize,
typename PropertyListT,
typename AccessorT,
14390 ESIMD_INLINE std::enable_if_t<
14391 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14392 simd_obj_impl<T, N, T1, SFINAE>::copy_to_impl(
14393 AccessorT acc, TOffset offset, PropertyListT)
const SYCL_ESIMD_FUNCTION {
14395 constexpr
unsigned Size =
sizeof(T) * N;
14396 constexpr
size_t Align =
14397 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(UT));
14399 constexpr
unsigned BlockSize = OperandSize::OWORD * 8;
14400 constexpr
unsigned NumBlocks = Size / BlockSize;
14401 constexpr
unsigned RemSize = Size % BlockSize;
14404 if constexpr (Align >= OperandSize::OWORD && Size % OperandSize::OWORD == 0 &&
14406 if constexpr (NumBlocks > 0) {
14407 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14408 ForHelper<NumBlocks>::unroll([BlockN, acc, offset, &Tmp](
unsigned Block) {
14409 block_store<UT, BlockN, AccessorT>(
14410 acc, offset + (Block * BlockSize),
14411 Tmp.template select<BlockN, 1>(Block * BlockN), PropertyListT{});
14414 if constexpr (RemSize > 0) {
14415 constexpr
unsigned RemN = RemSize /
sizeof(T);
14416 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14417 block_store<UT, RemN, AccessorT>(
14418 acc, offset + (NumBlocks * BlockSize),
14419 Tmp.template select<RemN, 1>(NumBlocks * BlockN), PropertyListT{});
14421 }
else if constexpr (
sizeof(T) == 8) {
14423 BC.
copy_to(acc, offset, PropertyListT{});
14425 constexpr
unsigned NumChunks = N / ChunkSize;
14426 if constexpr (NumChunks > 0) {
14428 ForHelper<NumChunks>::unroll(
14429 [acc, offset, &Offsets, &Tmp](
unsigned Block) {
14430 scatter<UT, ChunkSize>(
14431 acc, Offsets + (offset + (Block * ChunkSize *
sizeof(T))),
14432 Tmp.template select<ChunkSize, 1>(Block * ChunkSize),
14436 constexpr
unsigned RemN = N % ChunkSize;
14437 if constexpr (RemN > 0) {
14438 if constexpr (RemN == 1 || RemN == 8 || RemN == 16) {
14441 acc, Offsets + (offset + (NumChunks * ChunkSize *
sizeof(T))),
14442 Tmp.template select<RemN, 1>(NumChunks * ChunkSize),
14445 constexpr
int N1 = RemN < 8 ? 8 : RemN < 16 ? 16 : 32;
14446 simd_mask_type<N1> Pred(0);
14447 Pred.template select<RemN, 1>() = 1;
14449 Vals.template select<RemN, 1>() =
14450 Tmp.template select<RemN, 1>(NumChunks * ChunkSize);
14453 acc, Offsets + (offset + (NumChunks * ChunkSize *
sizeof(T))), Vals,
14454 Pred, PropertyListT{});
14460 template <
typename T,
int N,
class T1,
class SFINAE>
14461 template <
int ChunkSize,
typename Flags,
typename AccessorT,
typename TOffset>
14462 ESIMD_INLINE std::enable_if_t<is_simd_flag_type_v<Flags>>
14463 simd_obj_impl<T, N, T1, SFINAE>::copy_to_impl(
14464 AccessorT acc, TOffset offset)
const SYCL_ESIMD_FUNCTION {
14465 constexpr
unsigned Align = Flags::template alignment<T1>;
14466 copy_to_impl<ChunkSize>(acc, offset, properties{alignment<Align>});
14469 template <
typename T,
int N,
class T1,
class SFINAE>
14470 template <
int ChunkSize,
typename PropertyListT,
typename AccessorT,
14472 ESIMD_INLINE std::enable_if_t<
14473 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14474 simd_obj_impl<T, N, T1, SFINAE>::copy_from_impl(
14475 AccessorT acc, TOffset offset, PropertyListT) SYCL_ESIMD_FUNCTION {
14477 static_assert(
sizeof(UT) ==
sizeof(T));
14478 constexpr
unsigned Size =
sizeof(T) * N;
14479 constexpr
size_t Align =
14480 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(UT));
14482 constexpr
unsigned BlockSize = OperandSize::OWORD * 8;
14483 constexpr
unsigned NumBlocks = Size / BlockSize;
14484 constexpr
unsigned RemSize = Size % BlockSize;
14486 if constexpr (Align >= OperandSize::DWORD && Size % OperandSize::OWORD == 0 &&
14488 if constexpr (NumBlocks > 0) {
14489 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14490 ForHelper<NumBlocks>::unroll([BlockN, acc, offset,
this](
unsigned Block) {
14491 select<BlockN, 1>(Block * BlockN) = block_load<UT, BlockN, AccessorT>(
14492 acc, offset + (Block * BlockSize), PropertyListT{});
14495 if constexpr (RemSize > 0) {
14496 constexpr
unsigned RemN = RemSize /
sizeof(T);
14497 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14498 select<RemN, 1>(NumBlocks * BlockN) = block_load<UT, RemN, AccessorT>(
14499 acc, offset + (NumBlocks * BlockSize), PropertyListT{});
14501 }
else if constexpr (
sizeof(T) == 8) {
14503 bit_cast_view<int32_t>() = BC;
14505 constexpr
unsigned NumChunks = N / ChunkSize;
14506 if constexpr (NumChunks > 0) {
14508 ForHelper<NumChunks>::unroll(
14509 [acc, offset, &Offsets,
this](
unsigned Block) {
14510 select<ChunkSize, 1>(Block * ChunkSize) =
14511 gather<UT, ChunkSize, AccessorT>(
14512 acc, Offsets + (offset + (Block * ChunkSize *
sizeof(T))),
14516 constexpr
unsigned RemN = N % ChunkSize;
14517 if constexpr (RemN > 0) {
14518 if constexpr (RemN == 1 || RemN == 8 || RemN == 16) {
14520 select<RemN, 1>(NumChunks * ChunkSize) = gather<UT, RemN, AccessorT>(
14521 acc, Offsets, offset + (NumChunks * ChunkSize *
sizeof(T)));
14523 constexpr
int N1 = RemN < 8 ? 8 : RemN < 16 ? 16 : 32;
14524 simd_mask_type<N1> Pred(0);
14525 Pred.template select<RemN, 1>() = 1;
14528 acc, Offsets + (offset + (NumChunks * ChunkSize *
sizeof(T))), Pred,
14530 select<RemN, 1>(NumChunks * ChunkSize) =
14531 Vals.template select<RemN, 1>();
14537 template <
typename T,
int N,
class T1,
class SFINAE>
14538 template <
int ChunkSize,
typename Flags,
typename AccessorT,
typename TOffset>
14539 ESIMD_INLINE std::enable_if_t<is_simd_flag_type_v<Flags>>
14540 simd_obj_impl<T, N, T1, SFINAE>::copy_from_impl(AccessorT acc, TOffset offset)
14541 SYCL_ESIMD_FUNCTION {
14542 constexpr
unsigned Align = Flags::template alignment<T1>;
14543 copy_from_impl<ChunkSize>(acc, offset, properties{alignment<Align>});
14546 template <
typename T,
int N,
class T1,
class SFINAE>
14547 template <
typename AccessorT,
typename Flags,
int ChunkSize>
14548 ESIMD_INLINE std::enable_if_t<
14549 detail::is_device_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
14550 is_simd_flag_type_v<Flags>>
14553 Flags) SYCL_ESIMD_FUNCTION {
14555 copy_from_impl<ChunkSize, Flags>(acc, offset);
14558 template <
typename T,
int N,
class T1,
class SFINAE>
14559 template <
typename AccessorT,
int ChunkSize,
typename PropertyListT>
14560 ESIMD_INLINE std::enable_if_t<
14561 detail::is_device_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
14562 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14565 PropertyListT) SYCL_ESIMD_FUNCTION {
14567 copy_from_impl<ChunkSize, PropertyListT>(acc, offset);
14570 template <
typename T,
int N,
class T1,
class SFINAE>
14571 template <
typename AccessorT,
typename Flags,
int ChunkSize>
14572 ESIMD_INLINE std::enable_if_t<
14573 detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
14574 is_simd_flag_type_v<Flags>,
14577 Flags) SYCL_ESIMD_FUNCTION {
14579 copy_from_impl<ChunkSize, Flags>(acc, offset);
14582 template <
typename T,
int N,
class T1,
class SFINAE>
14583 template <
typename AccessorT,
int ChunkSize,
typename PropertyListT>
14584 ESIMD_INLINE std::enable_if_t<
14585 detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_read> &&
14586 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
14589 PropertyListT) SYCL_ESIMD_FUNCTION {
14591 copy_from_impl<ChunkSize, PropertyListT>(acc, offset);
14594 template <
typename T,
int N,
class T1,
class SFINAE>
14595 template <
int ChunkSize,
typename PropertyListT>
14596 std::enable_if_t<ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14599 PropertyListT)
const SYCL_ESIMD_FUNCTION {
14601 constexpr
unsigned Size =
sizeof(T) * N;
14602 constexpr
size_t Align =
14603 detail::getPropertyValue<PropertyListT, alignment_key>(
sizeof(UT));
14605 constexpr
unsigned BlockSize = OperandSize::OWORD * 8;
14606 constexpr
unsigned NumBlocks = Size / BlockSize;
14607 constexpr
unsigned RemSize = Size % BlockSize;
14610 if constexpr (Align >= OperandSize::OWORD && Size % OperandSize::OWORD == 0 &&
14612 if constexpr (NumBlocks > 0) {
14613 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14614 ForHelper<NumBlocks>::unroll([BlockN, Addr, &Tmp](
unsigned Block) {
14615 block_store<UT, BlockN>(Addr + (Block * BlockN),
14616 Tmp.template select<BlockN, 1>(Block * BlockN),
14620 if constexpr (RemSize > 0) {
14621 constexpr
unsigned RemN = RemSize /
sizeof(T);
14622 constexpr
unsigned BlockN = BlockSize /
sizeof(T);
14623 block_store<UT, RemN>(Addr + (NumBlocks * BlockN),
14624 Tmp.template select<RemN, 1>(NumBlocks * BlockN),
14627 }
else if constexpr (
sizeof(T) == 8) {
14629 BC.
copy_to(
reinterpret_cast<int32_t *
>(Addr), PropertyListT{});
14631 constexpr
unsigned NumChunks = N / ChunkSize;
14632 if constexpr (NumChunks > 0) {
14634 ForHelper<NumChunks>::unroll([Addr, &Offsets, &Tmp](
unsigned Block) {
14635 scatter<UT, ChunkSize>(
14636 Addr + (Block * ChunkSize), Offsets,
14637 Tmp.template select<ChunkSize, 1>(Block * ChunkSize),
14641 constexpr
unsigned RemN = N % ChunkSize;
14642 if constexpr (RemN > 0) {
14643 if constexpr (RemN == 1) {
14644 Addr[NumChunks * ChunkSize] = Tmp[NumChunks * ChunkSize];
14645 }
else if constexpr (RemN == 8 || RemN == 16) {
14649 if constexpr (
sizeof(T) == 1 && RemN == 16) {
14650 if constexpr (Align % OperandSize::DWORD > 0) {
14651 ForHelper<RemN>::unroll([Addr, &Tmp](
unsigned Index) {
14652 Addr[Index + NumChunks * ChunkSize] =
14653 Tmp[Index + NumChunks * ChunkSize];
14656 simd_mask_type<8> Pred(0);
14658 Pred.template select<4, 1>() = 1;
14659 Vals.template select<4, 1>() =
14660 Tmp.template bit_cast_view<int32_t>().template select<4, 1>(
14661 NumChunks * ChunkSize);
14664 scatter<int32_t, 8>(
14665 reinterpret_cast<int32_t *
>(Addr + (NumChunks * ChunkSize)),
14666 Offsets, Vals, Pred, PropertyListT{});
14670 scatter<UT, RemN>(Addr + (NumChunks * ChunkSize), Offsets,
14671 Tmp.template select<RemN, 1>(NumChunks * ChunkSize),
14675 constexpr
int N1 = RemN < 8 ? 8 : RemN < 16 ? 16 : 32;
14676 simd_mask_type<N1> Pred(0);
14677 Pred.template select<RemN, 1>() = 1;
14679 Vals.template select<RemN, 1>() =
14680 Tmp.template select<RemN, 1>(NumChunks * ChunkSize);
14682 scatter<UT, N1>(Addr + (NumChunks * ChunkSize), Offsets, Vals, Pred,
14689 template <
typename T,
int N,
class T1,
class SFINAE>
14690 template <
typename Flags,
int ChunkSize>
14691 std::enable_if_t<is_simd_flag_type_v<Flags>>
14694 Flags)
const SYCL_ESIMD_FUNCTION {
14695 constexpr
unsigned Align = Flags::template alignment<T1>;
14696 copy_to<ChunkSize>(Addr, properties{alignment<Align>});
14699 template <
typename T,
int N,
class T1,
class SFINAE>
14700 template <
typename AccessorT,
typename Flags,
int ChunkSize>
14701 ESIMD_INLINE std::enable_if_t<detail::is_device_accessor_with_v<
14702 AccessorT, accessor_mode_cap::can_write> &&
14703 is_simd_flag_type_v<Flags>>
14706 Flags)
const SYCL_ESIMD_FUNCTION {
14707 copy_to_impl<ChunkSize, Flags>(acc, offset);
14710 template <
typename T,
int N,
class T1,
class SFINAE>
14711 template <
typename AccessorT,
int ChunkSize,
typename PropertyListT>
14712 ESIMD_INLINE std::enable_if_t<
14713 detail::is_device_accessor_with_v<AccessorT,
14714 accessor_mode_cap::can_write> &&
14715 ext::oneapi::experimental::is_property_list_v<PropertyListT>>
14718 PropertyListT)
const SYCL_ESIMD_FUNCTION {
14719 copy_to_impl<ChunkSize, PropertyListT>(acc, offset);
14722 template <
typename T,
int N,
class T1,
class SFINAE>
14723 template <
typename AccessorT,
typename Flags,
int ChunkSize>
14724 ESIMD_INLINE std::enable_if_t<
14725 detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_write> &&
14726 is_simd_flag_type_v<Flags>,
14729 Flags)
const SYCL_ESIMD_FUNCTION {
14730 copy_to_impl<ChunkSize, Flags>(acc, offset);
14733 template <
typename T,
int N,
class T1,
class SFINAE>
14734 template <
typename AccessorT,
int ChunkSize,
typename PropertyListT>
14735 ESIMD_INLINE std::enable_if_t<
14736 detail::is_local_accessor_with_v<AccessorT, accessor_mode_cap::can_write> &&
14737 ext::oneapi::experimental::is_property_list_v<PropertyListT>,
14740 AccessorT acc, uint32_t offset, PropertyListT)
const SYCL_ESIMD_FUNCTION {
14741 copy_to_impl<ChunkSize, PropertyListT>(acc, offset);
const auto & data() const noexcept
std::enable_if_t< __vectorizable< _Up >) &&is_simd_flag_type< _Flags >::value > copy_to(_Up *__buffer, _Flags) const
get_vector_element_type< Derived > element_type
Element type of the derived (user) class.
raw_vector_type data() const
ESIMD_INLINE std::enable_if_t< is_simd_flag_type_v< Flags > > copy_to(Ty *addr, Flags) const SYCL_ESIMD_FUNCTION
Copy all vector elements of this object into a contiguous block in memory.
ESIMD_INLINE std::enable_if_t< is_simd_flag_type_v< Flags > > copy_from(const Ty *addr, Flags) SYCL_ESIMD_FUNCTION
Copy a contiguous block of data from memory into this simd_obj_impl object.
The main simd vector class.
typename base_type::raw_vector_type raw_vector_type
RAII-style class used to implement "semi-dynamic" SLM allocation.
~slm_allocator()
Releases the SLM chunk allocated in the constructor.
slm_allocator()
Allocates the amount of SLM which is class' template parameter.
ESIMD_INLINE int get_offset() const
#define __ESIMD_FP_ATOMIC_OP_TYPE_CHECK(T)
raw_send_eot
Specify if end of thread should be set.
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
raw_send_sendc
Specify if sendc should be used.
unsigned int SurfaceIndex
Surface index type.
constexpr int get_num_channels_enabled(rgba_channel_mask M)
atomic_op
Represents an atomic operation.
@ fsub
ACM/PVC: Subtraction (floating point): *addr = *addr - src0.
@ fmax
ACM/PVC: Minimum (floating point): *addr = min(*addr, src0).
@ fadd
ACM/PVC: Addition (floating point): *addr = *addr + src0.
@ xchg
Exchange. *addr == src0;
@ fmin
ACM/PVC: Maximum (floating point): *addr = max(*addr, src0).
@ fcmpxchg
ACM/PVC: Compare and exchange (floating point).
__ESIMD_API SZ simd< T, SZ > src1
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > block_load(const T *ptr, PropertyListT props={})
Each of the following block load functions loads a contiguous memory block from the address reference...
__ESIMD_API void named_barrier_wait(uint8_t id)
Wait on a named barrier Available only on PVC.
__ESIMD_API void named_barrier_init()
Initialize number of named barriers for a kernel Available only on PVC.
__ESIMD_API void named_barrier_signal(uint8_t barrier_id, uint8_t producer_consumer_mode, uint32_t num_producers, uint32_t num_consumers)
Perform signal operation for the given named barrier Available only on PVC.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > atomic_update(AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); ...
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4)> slm_scatter_rgba(simd< uint32_t, N > offsets, simd< T, N *get_num_channels_enabled(Mask)> vals, simd_mask< N > mask=1)
Gather data from the Shared Local Memory at specified offsets and return it as simd vector.
__ESIMD_API T slm_scalar_load(uint32_t offset)
Load a scalar value from the Shared Local Memory.
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags >, simd< T, N > > slm_block_load(uint32_t byte_offset, Flags)
Loads a contiguous block of SLM memory referenced by the given byte-offset offset,...
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > slm_scatter(simd< uint32_t, N/VS > byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
template <typename T, int N, int VS = 1, typename PropertyListT = empty_properties_t> void slm_scatte...
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > > slm_block_store(uint32_t offset, simd< T, N > vals, Flags)
Stores elements of the vector vals to a contiguous block of SLM memory at the given byte-offset offse...
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, simd< T, N > > slm_atomic_update(simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
__ESIMD_API void slm_init()
Declare per-work-group slm size.
__ESIMD_API std::enable_if_t<(N==8||N==16||N==32) &&(sizeof(T)==4), simd< T, N *get_num_channels_enabled(RGBAMask)> > slm_gather_rgba(simd< uint32_t, N > offsets, simd_mask< N > mask=1)
Gather data from the Shared Local Memory at specified offsets and return it as simd vector.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > slm_gather(simd< uint32_t, N/VS > byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
template <typename T, int N, int VS, typename PropertyListT = empty_properties_t> simd<T,...
__ESIMD_API void slm_scalar_store(uint32_t offset, T val)
Store a scalar value into the Shared Local Memory.
__ESIMD_API simd< T, N *get_num_channels_enabled(RGBAMask)> gather_rgba(const T *p, simd< Toffset, N > offsets, simd_mask< N > mask=1)
Gather and transpose pixels from given memory locations defined by the base pointer p and offsets.
__ESIMD_API T scalar_load(AccessorTy acc, detail::DeviceAccessorOffsetT offset)
Load a scalar value from an accessor.
__ESIMD_API std::enable_if_t< is_simd_flag_type_v< Flags > > block_store(Tx *addr, simd< Tx, N > vals, Flags)
Stores elements of the vector vals to a contiguous block of memory at the given address addr.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > store_2d(T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, simd< T, N > Vals, PropertyListT props={})
2D USM pointer block store.
__ESIMD_API void scatter_rgba(T *p, simd< Toffset, N > offsets, simd< T, N *get_num_channels_enabled(RGBAMask)> vals, simd_mask< N > mask=1)
Transpose and scatter pixels to given memory locations defined by the base pointer p and offsets.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, PropertyListT props={})
template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1, int N = detail::get_lsc_b...
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > scatter(T *p, simd< OffsetT, N/VS > byte_offsets, simd< T, N > vals, simd_mask< N/VS > mask, PropertyListT props={})
template <typename T, int N, int VS = 1, typename OffsetT, typename PropertyListT = empty_properties_...
__ESIMD_API void fence()
esimd::fence sets the memory read/write order.
__ESIMD_API void scalar_store(AccessorTy acc, detail::DeviceAccessorOffsetT offset, T val)
Store a scalar value into an accessor.
__ESIMD_API void media_block_store(AccessorTy acc, unsigned x, unsigned y, simd< T, m *N > vals)
Media block store.
__ESIMD_API std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(AccessorT acc, PropertyListT props={})
template <typename T, int VS = 1, typename AccessorT, typename PropertyListT = empty_properties_t> vo...
fence_mask
Represetns a bit mask to control behavior of esimd::fence.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, PropertyListT props={})
template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1, bool Transposed = false,...
__ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc)
Get surface index corresponding to a SYCL accessor.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(const T *p, simd< OffsetT, N/VS > byte_offsets, simd_mask< N/VS > mask, PropertyListT props={})
template <typename T, int N, int VS, typename OffsetT, typename PropertyListT = empty_properties_t> v...
__ESIMD_API simd< T, m *N > media_block_load(AccessorTy acc, unsigned x, unsigned y)
Media block load.
__ESIMD_API std::enable_if_t< ext::oneapi::experimental::is_property_list_v< PropertyListT >, simd< T, N > > gather(const T *p, simd< OffsetT, N/VS > byte_offsets, simd_mask< N/VS > mask, simd< T, N > pass_thru, PropertyListT props={})
template <typename T, int N, int VS, typename OffsetT, typename PropertyListT = empty_properties_t> s...
__ESIMD_API void barrier()
Generic work-group barrier.
@ global_coherent_fence
“Commit enable” - wait for fence to complete before continuing.
@ l2_flush_constant_data
Flush constant cache.
@ local_barrier
Issue SLM memory barrier only. If not set, the memory barrier is global.
@ l1_flush_ro_data
Flush L1 read - only data cache.
@ l2_flush_rw_data
Flush constant cache.
@ l2_flush_texture_data
Flush sampler (texture) cache.
@ l2_flush_instructions
Flush the instruction cache.
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_send(sycl::ext::intel::esimd::simd< T1, n1 > msg_dst, sycl::ext::intel::esimd::simd< T2, n2 > msg_src0, uint32_t ex_desc, uint32_t msg_desc, sycl::ext::intel::esimd::simd_mask< exec_size > mask=1)
Raw send.
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_sends(sycl::ext::intel::esimd::simd< T1, n1 > msg_dst, sycl::ext::intel::esimd::simd< T2, n2 > msg_src0, sycl::ext::intel::esimd::simd< T3, n3 > msg_src1, uint32_t ex_desc, uint32_t msg_desc, sycl::ext::intel::esimd::simd_mask< exec_size > mask=1)
Raw sends.
void add(const void *DeviceGlobalPtr, const char *UniqueId)
ESIMD_INLINE simd< T, N > lsc_format_ret(simd< T1, N > Vals)
__ESIMD_API std::enable_if_t< get_num_args< Op >)==0, simd< T, N > > slm_atomic_update_impl(simd< uint32_t, N > offsets, simd_mask< N > pred)
SLM atomic.
constexpr bool isMaskedGatherScatterLLVMAvailable()
static constexpr SurfaceIndex SLM_BTI
constexpr cache_hint getCacheHintForIntrin()
Extracts a cache hint with the given 'Level' to pass it to ESIMD/GENX intrinsics.
constexpr void check_atomic()
Check the legality of an atomic call in terms of size and type.
static void validate_rgba_write_channel_mask()
__ESIMD_API simd< T, N > slm_atomic_update_impl(simd< uint32_t, N > offsets, simd< T, N > src0, simd< T, N > src1, simd_mask< N > pred)
SLM atomic.
__ESIMD_API std::enable_if_t< detail::is_property_list_v< PropertyListT > > block_store_impl(T *p, simd< T, NElts > vals, simd_mask< 1 > pred)
__ESIMD_API simd< T, N *NElts > gather_impl(const T *p, simd< OffsetT, N > offsets, simd_mask< N > pred, simd< T, N *NElts > pass_thru)
USM pointer gather.
lsc_data_size
Data size or format to read or store.
__ESIMD_API void scatter_impl(T *p, simd< Toffset, N > offsets, simd< T, N *NElts > vals, simd_mask< N > pred)
USM pointer scatter.
ESIMD_INLINE simd< RT, N > lsc_format_input(simd< T, N > Vals)
constexpr int lsc_to_internal_atomic_op()
__ESIMD_API std::enable_if_t< get_num_args< Op >)==0, simd< T, N > > atomic_update_impl(T *p, simd< Toffset, N > offsets, simd_mask< N > pred)
USM pointer atomic.
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
__ESIMD_API std::enable_if_t< is_property_list_v< PropertyListT >, simd< T, NElts > > block_load_impl(const T *p, simd_mask< 1 > pred, simd< T, NElts > pass_thru)
USM pointer transposed gather with 1 channel.
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
uint32_t DeviceAccessorOffsetT
constexpr alignment_key::value_t< K > alignment
cache_hint
L1, L2 or L3 cache hints.
fence_scope
The scope that fence() operation should apply to.
@ group
Wait until all previous memory transactions from this thread are observed within the local thread-gro...
fence_flush_op
The cache flush operation to apply to caches after fence() is complete.
memory_kind
The target memory kind for fence() operation.
@ local
image (also known as typed global memory)
constexpr int get_lsc_block_2d_data_size()
prefetch_impl< _B > prefetch
unsigned umin(Tp x, Tp y)
unsigned umax(Tp x, Tp y)
void prefetch_impl(T *ptr, size_t bytes, Properties properties)
properties< std::tuple<> > empty_properties_t
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmax(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::bit_xor< T > bit_xor
std::bit_and< T > bit_and
std::enable_if_t< sizeof(To)==sizeof(From) &&std::is_trivially_copyable< From >::value &&std::is_trivially_copyable< To >::value, To > bit_cast(const From &from) noexcept
constexpr stream_manipulator dec
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed_v< T >, int32_t, uint32_t >, std::conditional_t< std::is_signed_v< T >, int64_t, uint64_t > > type