65 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
67 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
68 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset);
70 template <
typename T,
int N,
int M,
int ParentW
idth = 0>
71 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
72 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
73 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset);
121 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
123 __ESIMD_INTRIN std::enable_if_t<M <= N, __ESIMD_DNS::vector_type_t<T, N>>
124 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
125 __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
126 __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
128 template <
typename T,
int N,
int M,
int ParentW
idth = 0>
129 __ESIMD_INTRIN std::enable_if_t<M <= N, __ESIMD_DNS::vector_type_t<T, N>>
130 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
131 __ESIMD_DNS::vector_type_t<T, M> NewVal,
132 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
133 __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
136 inline namespace _V1 {
137 namespace ext::intel::esimd::detail {
139 template <
class T>
using __st = __raw_t<T>;
142 template <
typename BT,
int BN,
typename RTy>
143 __ESIMD_DNS::vector_type_t<__st<typename RTy::element_type>,
RTy::length>
144 ESIMD_INLINE readRegion(
145 const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base, RTy Region) {
146 using ElemTy = __st<typename RTy::element_type>;
147 auto Base1 = bitcast<ElemTy, __st<BT>, BN>(Base);
148 constexpr
int Bytes = BN *
sizeof(BT);
149 if constexpr (Bytes == RTy::Size_in_bytes)
153 static_assert(!RTy::Is_2D);
154 constexpr
int N = Bytes /
sizeof(ElemTy);
156 constexpr
int M = RTy::Size_x;
157 constexpr
int Stride = RTy::Stride_x;
158 int16_t Offset =
static_cast<int16_t
>(Region.M_offset_x *
sizeof(ElemTy));
160 check_rdregion_params<N, M, 0, M, Stride>();
161 return __esimd_rdregion<ElemTy, N, M, 0, M, Stride>(Base1, Offset);
166 template <
typename BT,
int BN,
typename T,
typename U>
168 __ESIMD_DNS::vector_type_t<__st<typename T::element_type>,
T::length>
169 readRegion(
const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base,
170 std::pair<T, U> Region) {
172 using PaTy =
typename shape_type<U>::type;
174 using BT1 =
typename PaTy::element_type;
175 using ElemTy = __st<typename T::element_type>;
177 auto Base1 = readRegion<BT, BN>(Base, Region.second);
178 if constexpr (!T::Is_2D || BN1 *
sizeof(BT1) == T::Size_in_bytes)
180 return readRegion<BT1, BN1>(Base1, Region.first);
182 static_assert(T::Is_2D);
183 static_assert(std::is_same_v<ElemTy, __st<BT1>>);
186 constexpr
int M = T::Size_y * PaTy::Size_x;
187 constexpr
int VS = T::Stride_y * PaTy::Size_x;
188 constexpr
int W = PaTy::Size_x;
189 constexpr
int HS = 1;
190 constexpr
int ParentWidth = PaTy::Size_x;
191 uint16_t Offset =
static_cast<uint16_t
>(Region.first.M_offset_y *
192 PaTy::Size_x *
sizeof(ElemTy));
193 check_rdregion_params<BN1, M, VS, W, HS>();
195 __esimd_rdregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(Base1, Offset);
198 constexpr
int N1 = M;
200 constexpr
int VS1 = PaTy::Size_x;
201 constexpr
int W1 = T::Size_x;
202 constexpr
int HS1 = T::Stride_x;
204 static_cast<uint16_t
>(Region.first.M_offset_x *
sizeof(ElemTy));
205 check_rdregion_params<N1, M1, VS1, W1, HS1>();
207 return __esimd_rdregion<ElemTy, N1, M1, VS1, W1, HS1, ParentWidth>(R,
221 template <
typename T,
int N>
222 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
223 __esimd_vload(
const __ESIMD_DNS::vector_type_t<T, N> *ptr);
229 template <
typename T,
int N>
230 __ESIMD_INTRIN
void __esimd_vstore(__ESIMD_DNS::vector_type_t<T, N> *ptr,
231 __ESIMD_DNS::vector_type_t<T, N> vals);
233 template <
typename T,
int N>
234 __ESIMD_INTRIN uint16_t __esimd_any(__ESIMD_DNS::vector_type_t<T, N> src)
235 #ifdef __SYCL_DEVICE_ONLY__
239 for (
unsigned int i = 0; i != N; i++) {
247 template <
typename T,
int N>
248 __ESIMD_INTRIN uint16_t __esimd_all(__ESIMD_DNS::vector_type_t<T, N> src)
249 #ifdef __SYCL_DEVICE_ONLY__
253 for (
unsigned int i = 0; i != N; i++) {
261 #ifndef __SYCL_DEVICE_ONLY__
264 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
266 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
267 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset) {
268 uint16_t EltOffset = Offset /
sizeof(T);
269 assert(Offset %
sizeof(T) == 0);
271 int NumRows = M / Width;
272 assert(M % Width == 0);
274 __ESIMD_DNS::vector_type_t<T, M> Result;
276 for (
int i = 0; i < NumRows; ++i) {
277 for (
int j = 0; j < Width; ++j) {
278 Result[Index++] = Input[i * VStride + j * Stride + EltOffset];
284 template <
typename T,
int N,
int M,
int ParentW
idth>
285 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
286 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
287 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset) {
288 __ESIMD_DNS::vector_type_t<T, M> Result;
289 for (
int i = 0; i < M; ++i) {
290 uint16_t EltOffset = Offset[i] /
sizeof(T);
291 assert(Offset[i] %
sizeof(T) == 0);
292 assert(EltOffset < N);
293 Result[i] = Input[EltOffset];
298 template <
typename T,
int N,
int M,
int VStride,
int Width,
int Stride,
300 __ESIMD_INTRIN std::enable_if_t<M <= N, __ESIMD_DNS::vector_type_t<T, N>>
301 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
302 __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
303 __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
304 uint16_t EltOffset = Offset /
sizeof(T);
305 assert(Offset %
sizeof(T) == 0);
307 int NumRows = M / Width;
308 assert(M % Width == 0);
310 __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
312 for (
int i = 0; i < NumRows; ++i) {
313 for (
int j = 0; j < Width; ++j) {
315 Result[i * VStride + j * Stride + EltOffset] = NewVal[Index];
322 template <
typename T,
int N,
int M,
int ParentW
idth>
323 __ESIMD_INTRIN std::enable_if_t<M <= N, __ESIMD_DNS::vector_type_t<T, N>>
324 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
325 __ESIMD_DNS::vector_type_t<T, M> NewVal,
326 __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
327 __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
328 __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
329 for (
int i = 0; i < M; ++i) {
331 uint16_t EltOffset = Offset[i] /
sizeof(T);
332 assert(Offset[i] %
sizeof(T) == 0);
333 assert(EltOffset < N);
334 Result[EltOffset] = NewVal[i];
341 #ifdef __SYCL_DEVICE_ONLY__
344 template <
class To,
class From,
int N>
345 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
346 __esimd_bf_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
349 #ifdef __SYCL_DEVICE_ONLY__
350 template <
class To,
class From,
int N>
351 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
352 __esimd_tf32_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.