65 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
67 __ESIMD_INTRIN std::enable_if_t<(Width > 0) && M % Width == 0,
68 __ESIMD_DNS::vector_type_t<T, M>>
69 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset);
71 template <
typename T,
int N,
int M,
int ParentW
idth = 0>
72 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
73 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
74 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset);
122 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
124 __ESIMD_INTRIN std::enable_if_t<M <= N && (Width > 0) && M % Width == 0,
125 __ESIMD_DNS::vector_type_t<T, N>>
126 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
127 __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
128 __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
130 template <
typename T,
int N,
int M,
int ParentW
idth = 0>
131 __ESIMD_INTRIN std::enable_if_t<M <= N, __ESIMD_DNS::vector_type_t<T, N>>
132 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
133 __ESIMD_DNS::vector_type_t<T, M> NewVal,
134 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
135 __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
138 inline namespace _V1 {
139 namespace ext::intel::esimd::detail {
141 template <
class T>
using __st = __raw_t<T>;
144 template <
typename BT,
int BN,
typename RTy>
145 __ESIMD_DNS::vector_type_t<__st<typename RTy::element_type>,
147 readRegion(
const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base, RTy Region) {
148 using ElemTy = __st<typename RTy::element_type>;
149 auto Base1 = bitcast<ElemTy, __st<BT>, BN>(Base);
150 constexpr
int Bytes = BN *
sizeof(BT);
151 if constexpr (Bytes == RTy::Size_in_bytes)
155 static_assert(!RTy::Is_2D);
156 constexpr
int N = Bytes /
sizeof(ElemTy);
158 constexpr
int M = RTy::Size_x;
159 constexpr
int Stride = RTy::Stride_x;
160 int16_t Offset =
static_cast<int16_t
>(Region.M_offset_x *
sizeof(ElemTy));
162 return __esimd_rdregion<ElemTy, N, M, 0, M, Stride>(Base1, Offset);
167 template <
typename BT,
int BN,
typename T,
typename U>
169 __ESIMD_DNS::vector_type_t<__st<typename T::element_type>,
T::length>
170 readRegion(
const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base,
171 std::pair<T, U> Region) {
173 using PaTy =
typename shape_type<U>::type;
175 using BT1 =
typename PaTy::element_type;
176 using ElemTy = __st<typename T::element_type>;
178 auto Base1 = readRegion<BT, BN>(Base, Region.second);
179 if constexpr (!T::Is_2D || BN1 *
sizeof(BT1) == T::Size_in_bytes)
181 return readRegion<BT1, BN1>(Base1, Region.first);
183 static_assert(T::Is_2D);
184 static_assert(std::is_same_v<ElemTy, __st<BT1>>);
187 constexpr
int M = T::Size_y * PaTy::Size_x;
188 constexpr
int VS = T::Stride_y * PaTy::Size_x;
189 constexpr
int W = PaTy::Size_x;
190 constexpr
int HS = 1;
191 constexpr
int ParentWidth = PaTy::Size_x;
192 uint16_t Offset =
static_cast<uint16_t
>(Region.first.M_offset_y *
193 PaTy::Size_x *
sizeof(ElemTy));
196 __esimd_rdregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(Base1, Offset);
199 constexpr
int N1 = M;
201 constexpr
int VS1 = PaTy::Size_x;
202 constexpr
int W1 = T::Size_x;
203 constexpr
int HS1 = T::Stride_x;
205 static_cast<uint16_t
>(Region.first.M_offset_x *
sizeof(ElemTy));
207 return __esimd_rdregion<ElemTy, N1, M1, VS1, W1, HS1, ParentWidth>(R,
221 template <
typename T,
int N>
222 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
223 __esimd_vload(
const __ESIMD_DNS::vector_type_t<T, N> *ptr);
229 template <
typename T,
int N>
230 __ESIMD_INTRIN
void __esimd_vstore(__ESIMD_DNS::vector_type_t<T, N> *ptr,
231 __ESIMD_DNS::vector_type_t<T, N> vals);
233 template <
typename T,
int N>
234 __ESIMD_INTRIN uint16_t __esimd_any(__ESIMD_DNS::vector_type_t<T, N> src)
235 #ifdef __SYCL_DEVICE_ONLY__
239 for (
unsigned int i = 0; i != N; i++) {
247 template <
typename T,
int N>
248 __ESIMD_INTRIN uint16_t __esimd_all(__ESIMD_DNS::vector_type_t<T, N> src)
249 #ifdef __SYCL_DEVICE_ONLY__
253 for (
unsigned int i = 0; i != N; i++) {
261 #ifndef __SYCL_DEVICE_ONLY__
264 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
266 __ESIMD_INTRIN std::enable_if_t<(Width > 0) && M % Width == 0,
267 __ESIMD_DNS::vector_type_t<T, M>>
268 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset) {
269 uint16_t EltOffset = Offset /
sizeof(T);
270 assert(Offset %
sizeof(T) == 0);
272 int NumRows = M / Width;
273 assert(M % Width == 0);
275 __ESIMD_DNS::vector_type_t<T, M> Result;
277 for (
int i = 0; i < NumRows; ++i) {
278 for (
int j = 0; j < Width; ++j) {
279 Result[Index++] = Input[i * VStride + j * Stride + EltOffset];
285 template <
typename T,
int N,
int M,
int ParentW
idth>
286 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
287 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
288 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset) {
289 __ESIMD_DNS::vector_type_t<T, M> Result;
290 for (
int i = 0; i < M; ++i) {
291 uint16_t EltOffset = Offset[i] /
sizeof(T);
292 assert(Offset[i] %
sizeof(T) == 0);
293 assert(EltOffset < N);
294 Result[i] = Input[EltOffset];
299 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
301 __ESIMD_INTRIN std::enable_if_t<M <= N && (Width > 0) && M % Width == 0,
302 __ESIMD_DNS::vector_type_t<T, N>>
303 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
304 __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
305 __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
306 uint16_t EltOffset = Offset /
sizeof(T);
307 assert(Offset %
sizeof(T) == 0);
309 int NumRows = M / Width;
310 assert(M % Width == 0);
312 __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
314 for (
int i = 0; i < NumRows; ++i) {
315 for (
int j = 0; j < Width; ++j) {
317 Result[i * VStride + j * Stride + EltOffset] = NewVal[Index];
324 template <
typename T,
int N,
int M,
int ParentW
idth>
325 __ESIMD_INTRIN std::enable_if_t<M <= N, __ESIMD_DNS::vector_type_t<T, N>>
326 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
327 __ESIMD_DNS::vector_type_t<T, M> NewVal,
328 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
329 __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
330 __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
331 for (
int i = 0; i < M; ++i) {
333 uint16_t EltOffset = Offset[i] /
sizeof(T);
334 assert(Offset[i] %
sizeof(T) == 0);
335 assert(EltOffset < N);
336 Result[EltOffset] = NewVal[i];
343 #ifdef __SYCL_DEVICE_ONLY__
346 template <
class To,
class From,
int N>
347 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
348 __esimd_bf_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
351 #ifdef __SYCL_DEVICE_ONLY__
352 template <
class To,
class From,
int N>
353 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
354 __esimd_tf32_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.