24 #ifndef __SYCL_DEVICE_ONLY__
33 const std::array<__ESIMD_NS::rgba_channel, 4> ChannelMaskArray{
34 __ESIMD_NS::rgba_channel::R, __ESIMD_NS::rgba_channel::G,
35 __ESIMD_NS::rgba_channel::B, __ESIMD_NS::rgba_channel::A};
37 #endif // ifndef __SYCL_DEVICE_ONLY__
41 namespace ext::intel::esimd::detail {
44 class AccessorPrivateProxy {
46 template <
typename AccessorTy>
47 static auto getQualifiedPtrOrImageObj(
const AccessorTy &Acc) {
48 #ifdef __SYCL_DEVICE_ONLY__
49 if constexpr (sycl::detail::acc_properties::is_image_accessor_v<AccessorTy>)
50 return Acc.getNativeImageObj();
52 return Acc.getQualifiedPtr();
53 #else // __SYCL_DEVICE_ONLY__
55 #endif // __SYCL_DEVICE_ONLY__
58 #ifndef __SYCL_DEVICE_ONLY__
59 static void *getPtr(
const sycl::detail::AccessorBaseHost &Acc) {
62 #endif // __SYCL_DEVICE_ONLY__
65 template <
int ElemsPerAddr,
66 typename = std::enable_if_t<(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
68 constexpr
unsigned int ElemsPerAddrEncoding() {
70 if constexpr (ElemsPerAddr == 1)
72 else if constexpr (ElemsPerAddr == 2)
74 else if constexpr (ElemsPerAddr == 4)
80 constexpr
unsigned int ElemsPerAddrDecoding(
unsigned int ElemsPerAddrEncoded) {
82 return (1 << ElemsPerAddrEncoded);
90 template <
typename Ty,
int N,
int NumBlk = 0,
int ElemsPerAddr = 0>
92 __ESIMD_DNS::vector_type_t<Ty,
93 N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
94 __esimd_svm_gather(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
95 __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
96 #ifdef __SYCL_DEVICE_ONLY__
100 auto NumBlkDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk);
101 __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
103 auto ElemsPerAddrDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
105 ElemsPerAddrDecoded = ElemsPerAddrDecoded / 2;
107 for (
int I = 0; I < N; I++) {
109 Ty *Addr =
reinterpret_cast<Ty *
>(addrs[I]);
110 if (
sizeof(Ty) <= 2) {
111 for (
int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
112 V[I * NumBlkDecoded + J] = *(Addr + J);
114 for (
int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
115 V[J * N + I] = *(Addr + J);
121 #endif // __SYCL_DEVICE_ONLY__
124 template <
typename Ty,
int N,
int NumBlk = 0,
int ElemsPerAddr = 0>
125 __ESIMD_INTRIN
void __esimd_svm_scatter(
126 __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
127 __ESIMD_DNS::vector_type_t<Ty,
128 N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
130 __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
131 #ifdef __SYCL_DEVICE_ONLY__
135 auto NumBlkDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk);
136 auto ElemsPerAddrDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
138 ElemsPerAddrDecoded = ElemsPerAddrDecoded / 2;
140 for (
int I = 0; I < N; I++) {
142 Ty *Addr =
reinterpret_cast<Ty *
>(addrs[I]);
143 if (
sizeof(Ty) <= 2) {
144 for (
int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
145 *(Addr + J) = vals[I * NumBlkDecoded + J];
147 for (
int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
148 *(Addr + J) = vals[J * N + I];
153 #endif // __SYCL_DEVICE_ONLY__
156 template <
typename Ty,
int N>
157 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
158 __esimd_svm_block_ld_unaligned(uint64_t addr)
159 #ifdef __SYCL_DEVICE_ONLY__
163 __ESIMD_DNS::vector_type_t<Ty, N> V;
165 for (
int I = 0; I < N; I++) {
166 Ty *Addr =
reinterpret_cast<Ty *
>(addr + I *
sizeof(Ty));
171 #endif // __SYCL_DEVICE_ONLY__
174 template <
typename Ty,
int N>
175 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
176 __esimd_svm_block_ld(uint64_t addr)
177 #ifdef __SYCL_DEVICE_ONLY__
181 __ESIMD_DNS::vector_type_t<Ty, N> V;
183 for (
int I = 0; I < N; I++) {
184 Ty *Addr =
reinterpret_cast<Ty *
>(addr + I *
sizeof(Ty));
189 #endif // __SYCL_DEVICE_ONLY__
192 template <
typename Ty,
int N>
193 __ESIMD_INTRIN
void __esimd_svm_block_st(uint64_t addr,
194 __ESIMD_DNS::vector_type_t<Ty, N> vals)
195 #ifdef __SYCL_DEVICE_ONLY__
199 for (
int I = 0; I < N; I++) {
200 Ty *Addr =
reinterpret_cast<Ty *
>(addr + I *
sizeof(Ty));
204 #endif // __SYCL_DEVICE_ONLY__
207 template <
typename Ty,
int N,
typename SurfIndAliasTy,
int32_t IsModified = 0>
208 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
209 __esimd_oword_ld_unaligned(SurfIndAliasTy surf_ind, uint32_t offset)
210 #ifdef __SYCL_DEVICE_ONLY__
214 __ESIMD_DNS::vector_type_t<Ty, N> retv;
215 sycl::detail::ESIMDDeviceInterface *I =
221 char *SlmBase = I->__cm_emu_get_slm_ptr();
222 for (
int i = 0; i < N; ++i) {
223 Ty *SlmAddr =
reinterpret_cast<Ty *
>(offset + SlmBase);
225 offset +=
sizeof(Ty);
231 std::mutex *mutexLock;
233 I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
235 std::lock_guard<std::mutex> lock(*mutexLock);
237 for (
int idx = 0; idx < N; idx++) {
238 if (offset >= width) {
241 retv[idx] = *((Ty *)(readBase + offset));
243 offset += (uint32_t)
sizeof(Ty);
248 #endif // __SYCL_DEVICE_ONLY__
251 template <
typename Ty,
int N,
typename SurfIndAliasTy>
252 __ESIMD_INTRIN
void __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t offset,
253 __ESIMD_DNS::vector_type_t<Ty, N> vals)
254 #ifdef __SYCL_DEVICE_ONLY__
260 sycl::detail::ESIMDDeviceInterface *I =
265 char *SlmBase = I->__cm_emu_get_slm_ptr();
266 for (
int i = 0; i < N; ++i) {
267 Ty *SlmAddr =
reinterpret_cast<Ty *
>(offset + SlmBase);
269 offset +=
sizeof(Ty);
275 std::mutex *mutexLock;
277 I->sycl_get_cm_buffer_params_ptr(surf_ind, &writeBase, &width, &mutexLock);
279 std::lock_guard<std::mutex> lock(*mutexLock);
281 for (
int idx = 0; idx < N; idx++) {
282 if (offset < width) {
283 *((Ty *)(writeBase + offset)) = vals[idx];
287 offset += (uint32_t)
sizeof(Ty);
294 #endif // __SYCL_DEVICE_ONLY__
297 template <
typename Ty,
int N, __ESIMD_NS::rgba_channel_mask Mask>
300 __esimd_svm_gather4_scaled(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
301 __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
302 #ifdef __SYCL_DEVICE_ONLY__
307 unsigned int Next = 0;
310 for (
const auto &channel : ChannelMaskArray) {
312 for (
int I = 0; I < N; I++, Next++) {
314 Ty *Addr =
reinterpret_cast<Ty *
>(addrs[I] + Offset);
319 Offset += (uint64_t)
sizeof(Ty);
324 #endif // __SYCL_DEVICE_ONLY__
327 template <
typename Ty,
int N, __ESIMD_NS::rgba_channel_mask Mask>
328 __ESIMD_INTRIN
void __esimd_svm_scatter4_scaled(
329 __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
331 __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
332 #ifdef __SYCL_DEVICE_ONLY__
337 unsigned int Next = 0;
340 for (
const auto &channel : ChannelMaskArray) {
342 for (
int I = 0; I < N; I++, Next++) {
344 Ty *Addr =
reinterpret_cast<Ty *
>(addrs[I] + Offset);
349 Offset += (uint64_t)
sizeof(Ty);
352 #endif // __SYCL_DEVICE_ONLY__
375 template <
typename Ty,
int N,
typename SurfIndAliasTy,
int TySizeLog2,
377 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
378 __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
379 __ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets)
380 #ifdef __SYCL_DEVICE_ONLY__
384 static_assert(N == 1 || N == 8 || N == 16 || N == 32);
385 static_assert(TySizeLog2 <= 2 && Scale == 0);
386 static_assert(std::is_integral_v<Ty> || TySizeLog2 == 2);
387 __ESIMD_UNSUPPORTED_ON_HOST;
389 #endif // __SYCL_DEVICE_ONLY__
415 template <
typename Ty,
int N,
typename SurfIndAliasTy,
int TySizeLog2,
418 __esimd_scatter_scaled(__ESIMD_DNS::simd_mask_storage_t<N> pred,
419 SurfIndAliasTy surf_ind, uint32_t global_offset,
420 __ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets,
421 __ESIMD_DNS::vector_type_t<Ty, N> vals)
422 #ifdef __SYCL_DEVICE_ONLY__
426 static_assert(N == 1 || N == 8 || N == 16 || N == 32);
427 static_assert(TySizeLog2 <= 2);
428 static_assert(std::is_integral_v<Ty> || TySizeLog2 == 2);
432 constexpr
size_t OrigSize = __ESIMD_DNS::ElemsPerAddrDecoding(TySizeLog2);
433 using RestoredTy = __ESIMD_DNS::uint_type_t<OrigSize>;
435 sycl::detail::ESIMDDeviceInterface *I =
438 __ESIMD_DNS::vector_type_t<RestoredTy, N> TypeAdjustedVals;
439 if constexpr (OrigSize == 4) {
440 TypeAdjustedVals = __ESIMD_DNS::bitcast<RestoredTy, Ty, N>(vals);
442 static_assert(OrigSize == 1 || OrigSize == 2);
443 TypeAdjustedVals = __ESIMD_DNS::convert_vector<RestoredTy, Ty, N>(vals);
449 assert(global_offset == 0);
450 char *SlmBase = I->__cm_emu_get_slm_ptr();
451 for (
int i = 0; i < N; ++i) {
454 reinterpret_cast<RestoredTy *
>(elem_offsets[i] + SlmBase);
455 *addr = TypeAdjustedVals[i];
462 std::mutex *mutexLock;
464 I->sycl_get_cm_buffer_params_ptr(surf_ind, &writeBase, &width, &mutexLock);
465 writeBase += global_offset;
467 std::lock_guard<std::mutex> lock(*mutexLock);
469 for (
int idx = 0; idx < N; idx++) {
472 reinterpret_cast<RestoredTy *
>(elem_offsets[idx] + writeBase);
473 *addr = TypeAdjustedVals[idx];
481 #endif // __SYCL_DEVICE_ONLY__
484 template <__ESIMD_NS::atomic_op Op,
typename Ty,
int N>
485 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
486 __esimd_svm_atomic0(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
487 __ESIMD_DNS::simd_mask_storage_t<N> pred)
488 #ifdef __SYCL_DEVICE_ONLY__
492 __ESIMD_DNS::vector_type_t<Ty, N> Oldval = 0;
494 for (
int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) {
495 if (pred[AddrIdx] == 0) {
500 if constexpr (Op == __ESIMD_NS::atomic_op::load) {
501 Oldval[AddrIdx] = __ESIMD_DNS::atomic_load<Ty>((Ty *)addrs[AddrIdx]);
502 }
else if constexpr (Op == __ESIMD_NS::atomic_op::inc) {
504 __ESIMD_DNS::atomic_add<Ty>((Ty *)addrs[AddrIdx],
static_cast<Ty
>(1));
507 __ESIMD_DNS::atomic_sub<Ty>((Ty *)addrs[AddrIdx],
static_cast<Ty
>(1));
512 #endif // __SYCL_DEVICE_ONLY__
514 template <__ESIMD_NS::atomic_op Op,
typename Ty,
int N>
515 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
516 __esimd_svm_atomic1(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
517 __ESIMD_DNS::vector_type_t<Ty, N> src0,
518 __ESIMD_DNS::simd_mask_storage_t<N> pred)
519 #ifdef __SYCL_DEVICE_ONLY__
523 __ESIMD_DNS::vector_type_t<Ty, N> Oldval;
525 for (
int AddrIdx = 0; AddrIdx < N; AddrIdx++) {
526 if (pred[AddrIdx] == 0) {
532 if constexpr (Op == __ESIMD_NS::atomic_op::store) {
534 __ESIMD_DNS::atomic_store<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
536 (Op == __ESIMD_NS::atomic_op::fadd)) {
538 __ESIMD_DNS::atomic_add<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
539 }
else if constexpr ((Op == __ESIMD_NS::atomic_op::sub) ||
540 (Op == __ESIMD_NS::atomic_op::fsub)) {
542 __ESIMD_DNS::atomic_sub<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
543 }
else if constexpr ((Op == __ESIMD_NS::atomic_op::minsint) ||
547 __ESIMD_DNS::atomic_min<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
548 }
else if constexpr ((Op == __ESIMD_NS::atomic_op::maxsint) ||
552 __ESIMD_DNS::atomic_max<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
555 __ESIMD_DNS::atomic_and<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
558 __ESIMD_DNS::atomic_or<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
561 __ESIMD_DNS::atomic_xor<Ty>((Ty *)addrs[AddrIdx], src0[AddrIdx]);
567 #endif // __SYCL_DEVICE_ONLY__
569 template <__ESIMD_NS::atomic_op Op,
typename Ty,
int N>
570 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
571 __esimd_svm_atomic2(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
572 __ESIMD_DNS::vector_type_t<Ty, N> src0,
573 __ESIMD_DNS::vector_type_t<Ty, N> src1,
574 __ESIMD_DNS::simd_mask_storage_t<N> pred)
575 #ifdef __SYCL_DEVICE_ONLY__
579 __ESIMD_DNS::vector_type_t<Ty, N> Oldval;
581 for (
int AddrIdx = 0; AddrIdx < N; AddrIdx++) {
582 if (pred[AddrIdx] == 0) {
587 static_assert((Op == __ESIMD_NS::atomic_op::cmpxchg) ||
588 (Op == __ESIMD_NS::atomic_op::fcmpxchg));
589 Oldval[AddrIdx] = __ESIMD_DNS::atomic_cmpxchg((Ty *)addrs[AddrIdx],
590 src0[AddrIdx], src1[AddrIdx]);
594 #endif // __SYCL_DEVICE_ONLY__
596 __ESIMD_INTRIN
void __esimd_slm_init(uint32_t size)
597 #ifdef __SYCL_DEVICE_ONLY__
603 #endif // ifndef __SYCL_DEVICE_ONLY__
606 __ESIMD_INTRIN
void __esimd_barrier()
607 #ifdef __SYCL_DEVICE_ONLY__
613 #endif // __SYCL_DEVICE_ONLY__
616 __ESIMD_INTRIN
void __esimd_fence(uint8_t cntl)
617 #ifdef __SYCL_DEVICE_ONLY__
625 #endif // __SYCL_DEVICE_ONLY__
628 template <
typename Ty,
int N,
typename SurfIndAliasTy,
int TySizeLog2,
630 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
631 __esimd_gather_scaled(__ESIMD_DNS::simd_mask_storage_t<N> pred,
632 SurfIndAliasTy surf_ind, uint32_t global_offset,
633 __ESIMD_DNS::vector_type_t<uint32_t, N> addrs)
634 #ifdef __SYCL_DEVICE_ONLY__
638 __ESIMD_DNS::vector_type_t<Ty, N> retv = 0;
639 sycl::detail::ESIMDDeviceInterface *I =
644 assert(global_offset == 0);
645 char *SlmBase = I->__cm_emu_get_slm_ptr();
646 for (
int i = 0; i < N; ++i) {
648 Ty *addr =
reinterpret_cast<Ty *
>(addrs[i] + SlmBase);
656 std::mutex *mutexLock;
658 I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
659 readBase += global_offset;
661 std::lock_guard<std::mutex> lock(*mutexLock);
663 for (
int idx = 0; idx < N; idx++) {
665 Ty *addr =
reinterpret_cast<Ty *
>(addrs[idx] + readBase);
676 #endif // __SYCL_DEVICE_ONLY__
698 template <
typename Ty,
int N,
typename SurfIndAliasTy,
int TySizeLog2,
700 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
701 __esimd_gather_masked_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
702 __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
703 __ESIMD_DNS::simd_mask_storage_t<N> pred)
704 #ifdef __SYCL_DEVICE_ONLY__
708 static_assert(Scale == 0);
712 constexpr
size_t OrigSize = __ESIMD_DNS::ElemsPerAddrDecoding(TySizeLog2);
713 using RestoredTy = __ESIMD_DNS::uint_type_t<OrigSize>;
715 __ESIMD_DNS::vector_type_t<RestoredTy, N> retv = 0;
716 sycl::detail::ESIMDDeviceInterface *I =
721 assert(global_offset == 0);
722 char *SlmBase = I->__cm_emu_get_slm_ptr();
723 for (
int idx = 0; idx < N; ++idx) {
726 reinterpret_cast<RestoredTy *
>(offsets[idx] + SlmBase);
733 std::mutex *mutexLock;
735 I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
737 readBase += global_offset;
738 std::lock_guard<std::mutex> lock(*mutexLock);
739 for (
int idx = 0; idx < N; idx++) {
742 reinterpret_cast<RestoredTy *
>(offsets[idx] + readBase);
751 if constexpr (OrigSize == 4) {
752 return __ESIMD_DNS::bitcast<Ty, RestoredTy, N>(retv);
754 return __ESIMD_DNS::convert_vector<Ty, RestoredTy, N>(retv);
757 #endif // __SYCL_DEVICE_ONLY__
761 template <
typename Ty,
int N,
typename SurfIndAliasTy,
int32_t IsModified = 0>
762 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
763 __esimd_oword_ld(SurfIndAliasTy surf_ind, uint32_t addr)
764 #ifdef __SYCL_DEVICE_ONLY__
770 __ESIMD_DNS::vector_type_t<Ty, N> retv;
771 sycl::detail::ESIMDDeviceInterface *I =
777 char *SlmBase = I->__cm_emu_get_slm_ptr();
778 for (
int i = 0; i < N; ++i) {
779 Ty *SlmAddr =
reinterpret_cast<Ty *
>(addr + SlmBase);
787 std::mutex *mutexLock;
789 I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
791 std::lock_guard<std::mutex> lock(*mutexLock);
793 for (
int idx = 0; idx < N; idx++) {
797 retv[idx] = *((Ty *)(readBase + addr));
799 addr += (uint32_t)
sizeof(Ty);
804 #endif // __SYCL_DEVICE_ONLY__
808 typename SurfIndAliasTy, int16_t Scale = 0>
811 __esimd_gather4_masked_scaled2(
812 SurfIndAliasTy surf_ind,
int global_offset,
813 __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
814 __ESIMD_DNS::simd_mask_storage_t<N> pred)
815 #ifdef __SYCL_DEVICE_ONLY__
820 sycl::detail::ESIMDDeviceInterface *I =
823 unsigned int Next = 0;
826 ReadBase = I->__cm_emu_get_slm_ptr();
829 std::mutex *mutexLock;
830 I->sycl_get_cm_buffer_params_ptr(surf_ind, &ReadBase, &width, &mutexLock);
831 std::lock_guard<std::mutex> lock(*mutexLock);
834 ReadBase += global_offset;
836 for (
const auto &channel : ChannelMaskArray) {
838 for (
int I = 0; I < N; I++, Next++) {
840 Ty *Addr =
reinterpret_cast<Ty *
>(ReadBase + offsets[I]);
845 ReadBase += (uint64_t)
sizeof(Ty);
850 #endif // __SYCL_DEVICE_ONLY__
853 template <
typename Ty,
int N,
typename SurfIndAliasTy,
855 __ESIMD_INTRIN
void __esimd_scatter4_scaled(
856 __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
857 int global_offset, __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
859 #ifdef __SYCL_DEVICE_ONLY__
863 sycl::detail::ESIMDDeviceInterface *I =
866 unsigned int Next = 0;
869 WriteBase = I->__cm_emu_get_slm_ptr();
872 std::mutex *mutexLock;
873 I->sycl_get_cm_buffer_params_ptr(surf_ind, &WriteBase, &width, &mutexLock);
874 std::lock_guard<std::mutex> lock(*mutexLock);
877 WriteBase += global_offset;
879 for (
const auto &channel : ChannelMaskArray) {
881 for (
int I = 0; I < N; I++, Next++) {
883 Ty *Addr =
reinterpret_cast<Ty *
>(WriteBase + offsets[I]);
888 WriteBase += (uint64_t)
sizeof(Ty);
891 #endif // __SYCL_DEVICE_ONLY__
894 template <__ESIMD_NS::atomic_op Op,
typename Ty,
int N,
typename SurfIndAliasTy>
895 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
896 __esimd_dword_atomic0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
897 SurfIndAliasTy surf_ind,
898 __ESIMD_DNS::vector_type_t<uint32_t, N> addrs)
899 #ifdef __SYCL_DEVICE_ONLY__
903 __ESIMD_DNS::vector_type_t<Ty, N> retv;
909 for (
int i = 0; i < N; i++) {
911 Ty *p =
reinterpret_cast<Ty *
>(addrs[i] + WriteBase);
914 case __ESIMD_NS::atomic_op::inc:
915 retv[i] = __ESIMD_DNS::atomic_add<Ty>(p, 1);
918 __ESIMD_UNSUPPORTED_ON_HOST;
923 __ESIMD_UNSUPPORTED_ON_HOST;
927 #endif // __SYCL_DEVICE_ONLY__
929 template <__ESIMD_NS::atomic_op Op,
typename Ty,
int N,
typename SurfIndAliasTy>
930 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
931 __esimd_dword_atomic1(__ESIMD_DNS::simd_mask_storage_t<N> pred,
932 SurfIndAliasTy surf_ind,
933 __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
934 __ESIMD_DNS::vector_type_t<Ty, N> src0)
935 #ifdef __SYCL_DEVICE_ONLY__
939 __ESIMD_UNSUPPORTED_ON_HOST;
941 #endif // __SYCL_DEVICE_ONLY__
943 template <__ESIMD_NS::atomic_op Op,
typename Ty,
int N,
typename SurfIndAliasTy>
944 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
945 __esimd_dword_atomic2(__ESIMD_DNS::simd_mask_storage_t<N> pred,
946 SurfIndAliasTy surf_ind,
947 __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
948 __ESIMD_DNS::vector_type_t<Ty, N> src0,
949 __ESIMD_DNS::vector_type_t<Ty, N> src1)
950 #ifdef __SYCL_DEVICE_ONLY__
954 __ESIMD_UNSUPPORTED_ON_HOST;
956 #endif // __SYCL_DEVICE_ONLY__
973 template <
typename Ty,
int M,
int N,
int Modifier,
typename TACC,
int Plane,
975 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, M * N>
976 __esimd_media_ld(TACC handle,
unsigned x,
unsigned y)
977 #ifdef __SYCL_DEVICE_ONLY__
981 __ESIMD_DNS::vector_type_t<Ty, M * N> vals;
986 std::mutex *mutexLock;
989 "__esimd_media_ld cannot access SLM");
992 handle, &readBase, &imgWidth, &imgHeight, &bpp, &mutexLock);
994 std::lock_guard<std::mutex> lock(*mutexLock);
996 int x_pos_a, y_pos_a, offset, index;
999 std::vector<std::vector<Ty>> in(M, std::vector<Ty>(N));
1002 for (
int i = 0; i <
R; i++) {
1003 for (
int j = 0; j < C; j++) {
1004 x_pos_a =
x + j *
sizeof(Ty);
1005 { y_pos_a =
y + i; }
1008 if ((x_pos_a +
sizeof(Ty)) > imgWidth) {
1017 if (y_pos_a > imgHeight - 1) {
1018 y_pos_a = imgHeight - 1;
1026 int offset =
x % bpp;
1029 while (x_pos_a < 0) {
1035 if (x_pos_a >= imgWidth) {
1037 x_pos_a = x_pos_a - bpp;
1038 for (
uint byte_count = 0; byte_count <
sizeof(Ty); byte_count++) {
1039 if (x_pos_a >= imgWidth) {
1040 x_pos_a = x_pos_a - bpp;
1042 offset = y_pos_a * imgWidth + x_pos_a;
1051 if (
sizeof(Ty) <= bpp) {
1052 for (
uint bpp_count = 0; j < C && bpp_count < bpp;
1053 j++, bpp_count +=
sizeof(Ty)) {
1054 in[i][j] = *((Ty *)(readBase + offset + bpp_count));
1061 unsigned char *pTempBase =
1062 ((
unsigned char *)in[i].data()) + j *
sizeof(Ty);
1063 pTempBase[byte_count] = *((
unsigned char *)(readBase + offset));
1066 x_pos_a = x_pos_a + 1;
1071 offset = y_pos_a * imgWidth + x_pos_a;
1072 { in[i][j] = *((Ty *)(readBase + offset)); }
1077 for (
auto i = 0, k = 0; i < M; i++) {
1078 for (
auto j = 0; j < N; j++) {
1079 vals[k++] = in[i][j];
1085 #endif // __SYCL_DEVICE_ONLY__
1101 template <
typename Ty,
int M,
int N,
int Modifier,
typename TACC,
int Plane,
1103 __ESIMD_INTRIN
void __esimd_media_st(TACC handle,
unsigned x,
unsigned y,
1104 __ESIMD_DNS::vector_type_t<Ty, M * N> vals)
1105 #ifdef __SYCL_DEVICE_ONLY__
1109 sycl::detail::ESIMDDeviceInterface *I =
1116 std::mutex *mutexLock;
1119 "__esimd_media_ld cannot access SLM");
1121 I->sycl_get_cm_image_params_ptr(handle, &writeBase, &imgWidth, &imgHeight,
1124 int x_pos_a, y_pos_a, offset;
1126 assert((x % 4) == 0);
1127 assert((N *
sizeof(Ty)) % 4 == 0);
1130 std::vector<std::vector<Ty>> out(M, std::vector<Ty>(N));
1132 std::lock_guard<std::mutex> lock(*mutexLock);
1134 for (
int i = 0, k = 0; i < M; i++) {
1135 for (
int j = 0; j < N; j++) {
1136 out[i][j] = vals[k++];
1140 for (
int i = 0; i < M; i++) {
1141 for (
int j = 0; j < N; j++) {
1142 x_pos_a =
x + j *
sizeof(Ty);
1143 { y_pos_a =
y + i; }
1144 if ((
int)x_pos_a < 0) {
1147 if ((
int)y_pos_a < 0) {
1150 if ((
int)(x_pos_a +
sizeof(Ty)) > imgWidth) {
1154 if ((
int)y_pos_a > imgHeight - 1) {
1157 offset = y_pos_a * imgWidth + x_pos_a;
1158 *((Ty *)(writeBase + offset)) = out[i][j];
1165 #endif // __SYCL_DEVICE_ONLY__
1169 #ifndef __ESIMD_FORCE_STATELESS_MEM
1191 template <
typename MemObjTy>
1193 #ifdef __SYCL_DEVICE_ONLY__
1195 return __spirv_ConvertPtrToU<MemObjTy, uint32_t>(
obj);
1197 #else // __SYCL_DEVICE_ONLY__
1200 __ESIMD_DNS::AccessorPrivateProxy::getPtr(
obj));
1202 #endif // __SYCL_DEVICE_ONLY__
1204 #endif // !__ESIMD_FORCE_STATELESS_MEM