65 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
67 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
68 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset);
70 template <
typename T,
int N,
int M,
int ParentW
idth = 0>
71 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
72 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
73 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset);
122 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
124 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
125 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
126 __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
127 __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
129 template <
typename T,
int N,
int M,
int ParentW
idth = 0>
130 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
131 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
132 __ESIMD_DNS::vector_type_t<T, M> NewVal,
133 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
134 __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
137 namespace __ESIMD_DNS {
139 template <
class T>
using __st = __raw_t<T>;
142 template <
typename BT,
int BN,
typename RTy>
143 __ESIMD_DNS::vector_type_t<__st<typename RTy::element_type>,
RTy::length>
144 ESIMD_INLINE readRegion(
145 const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base, RTy Region) {
146 using ElemTy = __st<typename RTy::element_type>;
147 auto Base1 = bitcast<ElemTy, __st<BT>, BN>(Base);
148 constexpr
int Bytes = BN *
sizeof(BT);
149 if constexpr (Bytes == RTy::Size_in_bytes)
153 static_assert(!RTy::Is_2D);
154 constexpr
int N = Bytes /
sizeof(ElemTy);
156 constexpr
int M = RTy::Size_x;
157 constexpr
int Stride = RTy::Stride_x;
158 int16_t Offset =
static_cast<int16_t
>(Region.M_offset_x *
sizeof(ElemTy));
160 return __esimd_rdregion<ElemTy, N, M, 0, M, Stride>(Base1, Offset);
165 template <
typename BT,
int BN,
typename T,
typename U>
167 __ESIMD_DNS::vector_type_t<__st<typename T::element_type>,
T::length>
168 readRegion(
const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base,
169 std::pair<T, U> Region) {
171 using PaTy =
typename shape_type<U>::type;
173 using BT1 =
typename PaTy::element_type;
174 using ElemTy = __st<typename T::element_type>;
176 auto Base1 = readRegion<BT, BN>(Base, Region.second);
177 if constexpr (!T::Is_2D || BN1 *
sizeof(BT1) == T::Size_in_bytes)
179 return readRegion<BT1, BN1>(Base1, Region.first);
181 static_assert(T::Is_2D);
182 static_assert(std::is_same<ElemTy, __st<BT1>>::value);
185 constexpr
int M = T::Size_y * PaTy::Size_x;
186 constexpr
int VS = T::Stride_y * PaTy::Size_x;
187 constexpr
int W = PaTy::Size_x;
188 constexpr
int HS = 1;
189 constexpr
int ParentWidth = PaTy::Size_x;
190 uint16_t Offset =
static_cast<uint16_t
>(Region.first.M_offset_y *
191 PaTy::Size_x *
sizeof(ElemTy));
194 __esimd_rdregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(Base1, Offset);
197 constexpr
int N1 = M;
199 constexpr
int VS1 = PaTy::Size_x;
200 constexpr
int W1 = T::Size_x;
201 constexpr
int HS1 = T::Stride_x;
203 static_cast<uint16_t
>(Region.first.M_offset_x *
sizeof(ElemTy));
205 return __esimd_rdregion<ElemTy, N1, M1, VS1, W1, HS1, ParentWidth>(R,
218 template <
typename T,
int N>
219 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
220 __esimd_vload(
const __ESIMD_DNS::vector_type_t<T, N> *ptr);
226 template <
typename T,
int N>
227 __ESIMD_INTRIN
void __esimd_vstore(__ESIMD_DNS::vector_type_t<T, N> *ptr,
228 __ESIMD_DNS::vector_type_t<T, N> vals);
230 template <
typename T,
int N>
231 __ESIMD_INTRIN uint16_t __esimd_any(__ESIMD_DNS::vector_type_t<T, N> src)
232 #ifdef __SYCL_DEVICE_ONLY__
236 for (
unsigned int i = 0; i != N; i++) {
242 #endif // __SYCL_DEVICE_ONLY__
244 template <
typename T,
int N>
245 __ESIMD_INTRIN uint16_t __esimd_all(__ESIMD_DNS::vector_type_t<T, N> src)
246 #ifdef __SYCL_DEVICE_ONLY__
250 for (
unsigned int i = 0; i != N; i++) {
256 #endif // __SYCL_DEVICE_ONLY__
258 #ifndef __SYCL_DEVICE_ONLY__
261 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
263 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
264 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset) {
265 uint16_t EltOffset = Offset /
sizeof(
T);
266 assert(Offset %
sizeof(
T) == 0);
268 int NumRows = M / Width;
269 assert(M % Width == 0);
271 __ESIMD_DNS::vector_type_t<T, M> Result;
273 for (
int i = 0; i < NumRows; ++i) {
274 for (
int j = 0; j < Width; ++j) {
275 Result[Index++] = Input[i * VStride + j * Stride + EltOffset];
281 template <
typename T,
int N,
int M,
int ParentW
idth>
282 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
283 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
284 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset) {
285 __ESIMD_DNS::vector_type_t<T, M> Result;
286 for (
int i = 0; i < M; ++i) {
287 uint16_t EltOffset = Offset[i] /
sizeof(
T);
288 assert(Offset[i] %
sizeof(
T) == 0);
289 assert(EltOffset < N);
290 Result[i] = Input[EltOffset];
295 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
297 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
298 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
299 __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
300 __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
301 uint16_t EltOffset = Offset /
sizeof(
T);
302 assert(Offset %
sizeof(
T) == 0);
304 int NumRows = M / Width;
305 assert(M % Width == 0);
307 __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
309 for (
int i = 0; i < NumRows; ++i) {
310 for (
int j = 0; j < Width; ++j) {
312 Result[i * VStride + j * Stride + EltOffset] = NewVal[Index];
319 template <
typename T,
int N,
int M,
int ParentW
idth>
320 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
321 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
322 __ESIMD_DNS::vector_type_t<T, M> NewVal,
323 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
324 __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
325 __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
326 for (
int i = 0; i < M; ++i) {
328 uint16_t EltOffset = Offset[i] /
sizeof(
T);
329 assert(Offset[i] %
sizeof(
T) == 0);
330 assert(EltOffset < N);
331 Result[EltOffset] = NewVal[i];
337 #endif // __SYCL_DEVICE_ONLY__