DPC++ Runtime
Runtime libraries for oneAPI DPC++
intrin.hpp
Go to the documentation of this file.
1 //==------------ intrin.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Declares Explicit SIMD intrinsics used to implement working with
9 // the SIMD classes objects.
10 //===----------------------------------------------------------------------===//
11 
12 #pragma once
13 
15 
19 
20 #include <assert.h>
21 #include <cstdint>
22 
23 // \brief __esimd_rdregion: region access intrinsic.
24 //
25 // @param T the element data type, one of i8, i16, i32, i64, half, float,
26 // double. In particular bool (i1) and pointer types are not allowed.
27 //
28 // @param N the input vector size.
29 //
30 // @param M the return vector size.
31 //
32 // @param VStride the vertical stride in elements between rows.
33 //
34 // @param Width the size or each row, non-zero and even divides `M`.
35 //
36 // @param Stride horizontal stride in elements within each row.
37 //
38 // @param ParentWidth the width of the input vector when viewed as a 2D
39 // matrix. Ignored if offset is a constant.
40 //
41 // @param Input the input vector
42 //
43 // @param Offset the starting offset in bytes.
44 //
45 // @return the region extracted.
46 //
47 // This intrinsic computes a vector Result:
48 //
49 // \code{.cpp}
50 // uint16_t EltOffset = Offset / sizeof(T);
51 // assert(Offset % sizeof(T) == 0);
52 //
53 // int NumRows = M / Width;
54 // assert(M % Width == 0);
55 //
56 // int Index = 0;
57 // for (int i = 0; i < NumRows; ++i) {
58 // for (int j = 0; j < Width; ++j) {
59 // Result[Index++] = Input[i * VStride + j * Stride +
60 // EltOffset];
61 // }
62 // }
63 // \endcode
64 //
65 template <typename T, int N, int M, int VStride, int Width, int Stride,
66  int ParentWidth = 0>
67 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
68 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset);
69 
70 template <typename T, int N, int M, int ParentWidth = 0>
71 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
72 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
73  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset);
74 
75 // __esimd_wrregion returns the updated vector with the region updated.
76 //
77 // @param T the element data type, one of i8, i16, i32, i64, half, float,
78 // double. In particular bool (i1) and pointer types are not allowed.
79 //
80 // @param N the return vector size.
81 //
82 // @param M the vector size to write.
83 //
84 // @param VStride the vertical stride in elements between rows.
85 //
86 // @param Width the size or each row, non-zero and even divides `M`.
87 //
88 // @param Stride horizontal stride in elements within each row.
89 //
90 // @param ParentWidth the width of the input vector when viewed as a 2D
91 // matrix. Ignored if offset is a constant.
92 //
93 // @param OldVal the vector to write region into.
94 //
95 // @param NewVal the vector to write.
96 //
97 // @param Offset the starting offset in bytes.
98 //
99 // @return the updated vector with the region modifided.
100 //
101 // This intrinsic computes a vector Result:
102 //
103 // \code{.cpp}
104 // uint16_t EltOffset = Offset / sizeof(T);
105 // assert(Offset % sizeof(T) == 0);
106 //
107 // int NumRows = M / Width;
108 // assert(M % Width == 0);
109 //
110 // Result = OldValue;
111 // int Index = 0;
112 // for (int i = 0; i < NumRows; ++i) {
113 // for (int j = 0; j < Width; ++j) {
114 // if (Mask[Index])
115 // Result[i * VStride + j * Stride + EltOffset] =
116 // NewVal[Index];
117 // ++Index;
118 // }
119 // }
120 // \endcode
121 //
122 template <typename T, int N, int M, int VStride, int Width, int Stride,
123  int ParentWidth = 0>
124 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
125 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
126  __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
127  __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
128 
129 template <typename T, int N, int M, int ParentWidth = 0>
130 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
131 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
132  __ESIMD_DNS::vector_type_t<T, M> NewVal,
133  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
134  __ESIMD_DNS::simd_mask_storage_t<M> Mask = 1);
135 
137 namespace __ESIMD_DNS {
138 
139 template <class T> using __st = __raw_t<T>;
140 
142 template <typename BT, int BN, typename RTy>
143 __ESIMD_DNS::vector_type_t<__st<typename RTy::element_type>, RTy::length>
144  ESIMD_INLINE readRegion(
145  const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base, RTy Region) {
146  using ElemTy = __st<typename RTy::element_type>;
147  auto Base1 = bitcast<ElemTy, __st<BT>, BN>(Base);
148  constexpr int Bytes = BN * sizeof(BT);
149  if constexpr (Bytes == RTy::Size_in_bytes)
150  // This is a no-op format.
151  return Base1;
152  else {
153  static_assert(!RTy::Is_2D);
154  constexpr int N = Bytes / sizeof(ElemTy);
155  // Access the region information.
156  constexpr int M = RTy::Size_x;
157  constexpr int Stride = RTy::Stride_x;
158  int16_t Offset = static_cast<int16_t>(Region.M_offset_x * sizeof(ElemTy));
159  // read-region
160  return __esimd_rdregion<ElemTy, N, M, /*VS*/ 0, M, Stride>(Base1, Offset);
161  }
162 }
163 
165 template <typename BT, int BN, typename T, typename U>
166 ESIMD_INLINE
167  __ESIMD_DNS::vector_type_t<__st<typename T::element_type>, T::length>
168  readRegion(const __ESIMD_DNS::vector_type_t<__st<BT>, BN> &Base,
169  std::pair<T, U> Region) {
170  // parent-region type
171  using PaTy = typename shape_type<U>::type;
172  constexpr int BN1 = PaTy::length;
173  using BT1 = typename PaTy::element_type;
174  using ElemTy = __st<typename T::element_type>;
175  // Recursively read the base
176  auto Base1 = readRegion<BT, BN>(Base, Region.second);
177  if constexpr (!T::Is_2D || BN1 * sizeof(BT1) == T::Size_in_bytes)
178  // 1-D region or format
179  return readRegion<BT1, BN1>(Base1, Region.first);
180  else {
181  static_assert(T::Is_2D);
182  static_assert(std::is_same<ElemTy, __st<BT1>>::value);
183  // To read a 2D region, we need the parent region
184  // Read full rows with non-trivial vertical and horizontal stride = 1.
185  constexpr int M = T::Size_y * PaTy::Size_x;
186  constexpr int VS = T::Stride_y * PaTy::Size_x;
187  constexpr int W = PaTy::Size_x;
188  constexpr int HS = 1;
189  constexpr int ParentWidth = PaTy::Size_x;
190  uint16_t Offset = static_cast<uint16_t>(Region.first.M_offset_y *
191  PaTy::Size_x * sizeof(ElemTy));
192 
193  auto R =
194  __esimd_rdregion<ElemTy, BN1, M, VS, W, HS, ParentWidth>(Base1, Offset);
195 
196  // Read columns with non-trivial horizontal stride.
197  constexpr int N1 = M;
198  constexpr int M1 = T::length;
199  constexpr int VS1 = PaTy::Size_x;
200  constexpr int W1 = T::Size_x;
201  constexpr int HS1 = T::Stride_x;
202  uint16_t Offset1 =
203  static_cast<uint16_t>(Region.first.M_offset_x * sizeof(ElemTy));
204 
205  return __esimd_rdregion<ElemTy, N1, M1, VS1, W1, HS1, ParentWidth>(R,
206  Offset1);
207  }
208 }
209 
210 } // namespace __ESIMD_DNS
211 } // __SYCL_INLINE_NAMESPACE(cl)
212 
213 // vload
214 //
215 // map to the backend vload intrinsic, used by compiler to control
216 // optimization on simd object
217 //
218 template <typename T, int N>
219 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
220 __esimd_vload(const __ESIMD_DNS::vector_type_t<T, N> *ptr);
221 
222 // vstore
223 //
224 // map to the backend vstore intrinsic, used by compiler to control
225 // optimization on simd object
226 template <typename T, int N>
227 __ESIMD_INTRIN void __esimd_vstore(__ESIMD_DNS::vector_type_t<T, N> *ptr,
228  __ESIMD_DNS::vector_type_t<T, N> vals);
229 
230 template <typename T, int N>
231 __ESIMD_INTRIN uint16_t __esimd_any(__ESIMD_DNS::vector_type_t<T, N> src)
232 #ifdef __SYCL_DEVICE_ONLY__
233  ;
234 #else
235 {
236  for (unsigned int i = 0; i != N; i++) {
237  if (src[i] != 0)
238  return 1;
239  }
240  return 0;
241 }
242 #endif // __SYCL_DEVICE_ONLY__
243 
244 template <typename T, int N>
245 __ESIMD_INTRIN uint16_t __esimd_all(__ESIMD_DNS::vector_type_t<T, N> src)
246 #ifdef __SYCL_DEVICE_ONLY__
247  ;
248 #else
249 {
250  for (unsigned int i = 0; i != N; i++) {
251  if (src[i] == 0)
252  return 0;
253  }
254  return 1;
255 }
256 #endif // __SYCL_DEVICE_ONLY__
257 
258 #ifndef __SYCL_DEVICE_ONLY__
259 
260 // Implementations of ESIMD intrinsics for the SYCL host device
261 template <typename T, int N, int M, int VStride, int Width, int Stride,
262  int ParentWidth>
263 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
264 __esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset) {
265  uint16_t EltOffset = Offset / sizeof(T);
266  assert(Offset % sizeof(T) == 0);
267 
268  int NumRows = M / Width;
269  assert(M % Width == 0);
270 
271  __ESIMD_DNS::vector_type_t<T, M> Result;
272  int Index = 0;
273  for (int i = 0; i < NumRows; ++i) {
274  for (int j = 0; j < Width; ++j) {
275  Result[Index++] = Input[i * VStride + j * Stride + EltOffset];
276  }
277  }
278  return Result;
279 }
280 
281 template <typename T, int N, int M, int ParentWidth>
282 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, M>
283 __esimd_rdindirect(__ESIMD_DNS::vector_type_t<T, N> Input,
284  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset) {
285  __ESIMD_DNS::vector_type_t<T, M> Result;
286  for (int i = 0; i < M; ++i) {
287  uint16_t EltOffset = Offset[i] / sizeof(T);
288  assert(Offset[i] % sizeof(T) == 0);
289  assert(EltOffset < N);
290  Result[i] = Input[EltOffset];
291  }
292  return Result;
293 }
294 
295 template <typename T, int N, int M, int VStride, int Width, int Stride,
296  int ParentWidth>
297 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
298 __esimd_wrregion(__ESIMD_DNS::vector_type_t<T, N> OldVal,
299  __ESIMD_DNS::vector_type_t<T, M> NewVal, uint16_t Offset,
300  __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
301  uint16_t EltOffset = Offset / sizeof(T);
302  assert(Offset % sizeof(T) == 0);
303 
304  int NumRows = M / Width;
305  assert(M % Width == 0);
306 
307  __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
308  int Index = 0;
309  for (int i = 0; i < NumRows; ++i) {
310  for (int j = 0; j < Width; ++j) {
311  if (Mask[Index])
312  Result[i * VStride + j * Stride + EltOffset] = NewVal[Index];
313  ++Index;
314  }
315  }
316  return Result;
317 }
318 
319 template <typename T, int N, int M, int ParentWidth>
320 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
321 __esimd_wrindirect(__ESIMD_DNS::vector_type_t<T, N> OldVal,
322  __ESIMD_DNS::vector_type_t<T, M> NewVal,
323  __ESIMD_DNS::vector_type_t<uint16_t, M> Offset,
324  __ESIMD_DNS::simd_mask_storage_t<M> Mask) {
325  __ESIMD_DNS::vector_type_t<T, N> Result = OldVal;
326  for (int i = 0; i < M; ++i) {
327  if (Mask[i]) {
328  uint16_t EltOffset = Offset[i] / sizeof(T);
329  assert(Offset[i] % sizeof(T) == 0);
330  assert(EltOffset < N);
331  Result[EltOffset] = NewVal[i];
332  }
333  }
334  return Result;
335 }
336 
337 #endif // __SYCL_DEVICE_ONLY__
338 
T
util.hpp
cl::sycl::ext::intel::esimd::rgba_channel::R
@ R
cl::sycl::length
float length(T p) __NOEXC
Definition: builtins.hpp:1032
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
types.hpp
common.hpp
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12