DPC++ Runtime
Runtime libraries for oneAPI DPC++
intrin.hpp
Go to the documentation of this file.
1 //==------------ intrin.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Declares Explicit SIMD intrinsics used to implement working with
9 // the SIMD classes objects.
10 //===----------------------------------------------------------------------===//
11 
12 #pragma once
13 
15 
19 
20 #include <assert.h>
21 #include <cstdint>
22 
23 // \brief __esimd_rdregion: region access intrinsic.
24 //
25 // @param T the element data type, one of i8, i16, i32, i64, half, float,
26 // double. In particular bool (i1) and pointer types are not allowed.
27 //
28 // @param N the input vector size.
29 //
30 // @param M the return vector size.
31 //
32 // @param VStride the vertical stride in elements between rows.
33 //
34 // @param Width the size or each row, non-zero and even divides `M`.
35 //
36 // @param Stride horizontal stride in elements within each row.
37 //
38 // @param ParentWidth the width of the input vector when viewed as a 2D
39 // matrix. Ignored if offset is a constant.
40 //
41 // @param Input the input vector
42 //
43 // @param Offset the starting offset in bytes.
44 //
45 // @return the region extracted.
46 //
47 // This intrinsic computes a vector Result:
48 //
49 // \code{.cpp}
50 // uint16_t EltOffset = Offset / sizeof(T);
51 // assert(Offset % sizeof(T) == 0);
52 //
53 // int NumRows = M / Width;
54 // assert(M % Width == 0);
55 //
56 // int Index = 0;
57 // for (int i = 0; i < NumRows; ++i) {
58 // for (int j = 0; j < Width; ++j) {
59 // Result[Index++] = Input[i * VStride + j * Stride +
60 // EltOffset];
61 // }
62 // }
63 // \endcode
64 //
65 template <typename T, int N, int M, int VStride, int Width, int Stride,
66  int ParentWidth = 0>
67 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
68 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset);
69 
70 template <typename T, int N, int M, int ParentWidth = 0>
71 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
72 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
73  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset);
74 
75 // __esimd_wrregion returns the updated vector with the region updated.
76 //
77 // @param T the element data type, one of i8, i16, i32, i64, half, float,
78 // double. In particular bool (i1) and pointer types are not allowed.
79 //
80 // @param N the return vector size.
81 //
82 // @param M the vector size to write.
83 //
84 // @param VStride the vertical stride in elements between rows.
85 //
86 // @param Width the size or each row, non-zero and even divides `M`.
87 //
88 // @param Stride horizontal stride in elements within each row.
89 //
90 // @param ParentWidth the width of the input vector when viewed as a 2D
91 // matrix. Ignored if offset is a constant.
92 //
93 // @param OldVal the vector to write region into.
94 //
95 // @param NewVal the vector to write.
96 //
97 // @param Offset the starting offset in bytes.
98 //
99 // @return the updated vector with the region modifided.
100 //
101 // This intrinsic computes a vector Result:
102 //
103 // \code{.cpp}
104 // uint16_t EltOffset = Offset / sizeof(T);
105 // assert(Offset % sizeof(T) == 0);
106 //
107 // int NumRows = M / Width;
108 // assert(M % Width == 0);
109 //
110 // Result = OldValue;
111 // int Index = 0;
112 // for (int i = 0; i < NumRows; ++i) {
113 // for (int j = 0; j < Width; ++j) {
114 // if (Mask[Index])
115 // Result[i * VStride + j * Stride + EltOffset] =
116 // NewVal[Index];
117 // ++Index;
118 // }
119 // }
120 // \endcode
121 //
122 template <typename T, int N, int M, int VStride, int Width, int Stride,
123  int ParentWidth = 0>
124 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
125 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
126  __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
127  __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
128 
129 template <typename T, int N, int M, int ParentWidth = 0>
130 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
131 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
132  __ESIMD_DNS::vector_type_t<T, M> NewVal,
133  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
134  __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
135 
136 namespace sycl {
138 namespace ext::intel::esimd::detail {
139 
140 template <class T> using __st = __raw_t<T>;
141 
143 template <typename BT, int BN, typename RTy>
144 __ESIMD_DNS::vector_type_t<__st<typename RTy::element_type>, RTy::length>
145  ESIMD_INLINE readRegion(
146  const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base, RTy Region) {
147  using ElemTy = __st<typename RTy::element_type>;
148  auto Base1 = bitcast<ElemTy, __st<BT>, BN>(Base);
149  constexpr int Bytes = BN * sizeof(BT);
150  if constexpr (Bytes == RTy::Size_in_bytes)
151  // This is a no-op format.
152  return Base1;
153  else {
154  static_assert(!RTy::Is_2D);
155  constexpr int N = Bytes / sizeof(ElemTy);
156  // Access the region information.
157  constexpr int M = RTy::Size_x;
158  constexpr int Stride = RTy::Stride_x;
159  int16_t Offset = static_cast<int16_t>(Region.M_offset_x * sizeof(ElemTy));
160  // read-region
161  return __esimd_rdregion<ElemTy, N, M, /*VS*/ 0, M, Stride>(Base1, Offset);
162  }
163 }
164 
166 template <typename BT, int BN, typename T, typename U>
167 ESIMD_INLINE
168  __ESIMD_DNS::vector_type_t<__st<typename T::element_type>, T::length>
169  readRegion(const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base,
170  std::pair<T, U> Region) {
171  // parent-region type
172  using PaTy = typename shape_type<U>::type;
173  constexpr int BN1 = PaTy::length;
174  using BT1 = typename PaTy::element_type;
175  using ElemTy = __st<typename T::element_type>;
176  // Recursively read the base
177  auto Base1 = readRegion<BT, BN>(Base, Region.second);
178  if constexpr (!T::Is_2D || BN1 * sizeof(BT1) == T::Size_in_bytes)
179  // 1-D region or format
180  return readRegion<BT1, BN1>(Base1, Region.first);
181  else {
182  static_assert(T::Is_2D);
183  static_assert(std::is_same_v<ElemTy, __st<BT1>>);
184  // To read a 2D region, we need the parent region
185  // Read full rows with non-trivial vertical and horizontal stride = 1.
186  constexpr int M = T::Size_y * PaTy::Size_x;
187  constexpr int VS = T::Stride_y * PaTy::Size_x;
188  constexpr int W = PaTy::Size_x;
189  constexpr int HS = 1;
190  constexpr int ParentWidth = PaTy::Size_x;
191  uint16_t Offset = static_cast<uint16_t>(Region.first.M_offset_y *
192  PaTy::Size_x * sizeof(ElemTy));
193 
194  auto R =
195  __esimd_rdregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(Base1, Offset);
196 
197  // Read columns with non-trivial horizontal stride.
198  constexpr int N1 = M;
199  constexpr int M1 = T::length;
200  constexpr int VS1 = PaTy::Size_x;
201  constexpr int W1 = T::Size_x;
202  constexpr int HS1 = T::Stride_x;
203  uint16_t Offset1 =
204  static_cast<uint16_t>(Region.first.M_offset_x * sizeof(ElemTy));
205 
206  return __esimd_rdregion<ElemTy, N1, M1, VS1, W1, HS1, ParentWidth>(R,
207  Offset1);
208  }
209 }
210 
211 } // namespace ext::intel::esimd::detail
212 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
213 } // namespace sycl
214 
215 // vload
216 //
217 // map to the backend vload intrinsic, used by compiler to control
218 // optimization on simd object
219 //
220 template <typename T, int N>
221 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
222 __esimd_vload(const __ESIMD_DNS::vector_type_t<T, N> *ptr);
223 
224 // vstore
225 //
226 // map to the backend vstore intrinsic, used by compiler to control
227 // optimization on simd object
228 template <typename T, int N>
229 __ESIMD_INTRIN void __esimd_vstore(__ESIMD_DNS::vector_type_t<T, N> *ptr,
230  __ESIMD_DNS::vector_type_t<T, N> vals);
231 
232 template <typename T, int N>
233 __ESIMD_INTRIN uint16_t __esimd_any(__ESIMD_DNS::vector_type_t<T, N> src)
234 #ifdef __SYCL_DEVICE_ONLY__
235  ;
236 #else
237 {
238  for (unsigned int i = 0; i != N; i++) {
239  if (src[i] != 0)
240  return 1;
241  }
242  return 0;
243 }
244 #endif // __SYCL_DEVICE_ONLY__
245 
246 template <typename T, int N>
247 __ESIMD_INTRIN uint16_t __esimd_all(__ESIMD_DNS::vector_type_t<T, N> src)
248 #ifdef __SYCL_DEVICE_ONLY__
249  ;
250 #else
251 {
252  for (unsigned int i = 0; i != N; i++) {
253  if (src[i] == 0)
254  return 0;
255  }
256  return 1;
257 }
258 #endif // __SYCL_DEVICE_ONLY__
259 
260 #ifndef __SYCL_DEVICE_ONLY__
261 
262 // Implementations of ESIMD intrinsics for the SYCL host device
263 template <typename T, int N, int M, int VStride, int Width, int Stride,
264  int ParentWidth>
265 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
266 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset) {
267  uint16_t EltOffset = Offset / sizeof(T);
268  assert(Offset % sizeof(T) == 0);
269 
270  int NumRows = M / Width;
271  assert(M % Width == 0);
272 
273  __ESIMD_DNS::vector_type_t<T, M> Result;
274  int Index = 0;
275  for (int i = 0; i < NumRows; ++i) {
276  for (int j = 0; j < Width; ++j) {
277  Result[Index++] = Input[i * VStride + j * Stride + EltOffset];
278  }
279  }
280  return Result;
281 }
282 
283 template <typename T, int N, int M, int ParentWidth>
284 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
285 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
286  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset) {
287  __ESIMD_DNS::vector_type_t<T, M> Result;
288  for (int i = 0; i < M; ++i) {
289  uint16_t EltOffset = Offset[i] / sizeof(T);
290  assert(Offset[i] % sizeof(T) == 0);
291  assert(EltOffset < N);
292  Result[i] = Input[EltOffset];
293  }
294  return Result;
295 }
296 
297 template <typename T, int N, int M, int VStride, int Width, int Stride,
298  int ParentWidth>
299 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
300 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
301  __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
302  __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
303  uint16_t EltOffset = Offset / sizeof(T);
304  assert(Offset % sizeof(T) == 0);
305 
306  int NumRows = M / Width;
307  assert(M % Width == 0);
308 
309  __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
310  int Index = 0;
311  for (int i = 0; i < NumRows; ++i) {
312  for (int j = 0; j < Width; ++j) {
313  if (Mask[Index])
314  Result[i * VStride + j * Stride + EltOffset] = NewVal[Index];
315  ++Index;
316  }
317  }
318  return Result;
319 }
320 
321 template <typename T, int N, int M, int ParentWidth>
322 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
323 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
324  __ESIMD_DNS::vector_type_t<T, M> NewVal,
325  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
326  __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
327  __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
328  for (int i = 0; i < M; ++i) {
329  if (Mask[i]) {
330  uint16_t EltOffset = Offset[i] / sizeof(T);
331  assert(Offset[i] % sizeof(T) == 0);
332  assert(EltOffset < N);
333  Result[EltOffset] = NewVal[i];
334  }
335  }
336  return Result;
337 }
338 #endif // __SYCL_DEVICE_ONLY__
339 
340 #ifdef __SYCL_DEVICE_ONLY__
341 // This intrinsic requires one of the types to be _Float16, which is absent on
342 // host, so it can't be represented on host. Callers must emulate it.
343 template <class To, class From, int N>
344 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
345 __esimd_bf_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
346 #endif // __SYCL_DEVICE_ONLY__
347 
348 #ifdef __SYCL_DEVICE_ONLY__
349 template <class To, class From, int N>
350 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<To, N>
351 __esimd_tf32_cvt(__ESIMD_DNS::vector_type_t<From, N> Val);
352 #endif // __SYCL_DEVICE_ONLY__
353 
T
common.hpp
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::ext::intel::esimd::rgba_channel::R
@ R
types.hpp
util.hpp