DPC++ Runtime
Runtime libraries for oneAPI DPC++
common.hpp
Go to the documentation of this file.
1 //==---------------- common.hpp - DPC++ Explicit SIMD API ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Common definitions used in experimental Explicit SIMD APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
16 
17 #include <cstdint>
18 #include <type_traits>
19 
20 namespace sycl {
22 namespace ext::intel::experimental::esimd {
23 
26 
27 using argument_type
28  __SYCL_DEPRECATED("use sycl::ext::intel::esimd::xmx::dpas_argument_type") =
30 
33 enum class lsc_scope : uint8_t {
34  group = 0,
35  local = 1,
36  tile = 2,
37  gpu = 3,
38  gpus = 4,
39  system = 5,
40  sysacq = 6,
41 };
42 
45 enum class lsc_fence_op : uint8_t {
46  none = 0,
47  evict = 1,
48  invalidate = 2,
49  discard = 3,
50  clean = 4,
51  flushl3 = 5,
53 };
54 
57 enum class lsc_memory_kind : uint8_t {
58  untyped_global = 0,
60  typed_global = 2,
61  shared_local = 3,
62 };
63 
65 enum class lsc_data_size : uint8_t {
66  default_size = 0,
67  u8 = 1,
68  u16 = 2,
69  u32 = 3,
70  u64 = 4,
71  u8u32 = 5,
72  u16u32 = 6,
73  u16u32h = 7,
74 };
75 
76 namespace detail {
77 
78 enum class lsc_vector_size : uint8_t {
79  n1 = 1,
80  n2 = 2,
81  n3 = 3,
82  n4 = 4,
83  n8 = 5,
84  n16 = 6,
85  n32 = 7,
86  n64 = 8,
87 };
88 
89 enum class lsc_data_order : uint8_t {
90  nontranspose = 1,
91  transpose = 2,
92 };
93 
94 template <lsc_vector_size VS> constexpr void check_lsc_vector_size() {
95  static_assert(VS == lsc_vector_size::n1 || VS == lsc_vector_size::n2 ||
96  VS == lsc_vector_size::n3 || VS == lsc_vector_size::n4 ||
97  VS == lsc_vector_size::n8 || VS == lsc_vector_size::n16 ||
98  VS == lsc_vector_size::n64 || VS == lsc_vector_size::n32,
99  "Unsupported vector size");
100 }
101 
102 template <int VS> constexpr void check_lsc_vector_size() {
103  static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
104  VS == 16 || VS == 32 || VS == 64,
105  "Unsupported vector size");
106 }
107 
108 template <typename T, lsc_data_size DS> constexpr void check_lsc_data_size() {
109  static_assert(DS != lsc_data_size::default_size || sizeof(T) == 1 ||
110  sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8,
111  "Unsupported data type");
112  static_assert(
113  DS == lsc_data_size::default_size ||
114  (sizeof(T) == 1 &&
115  (DS == lsc_data_size::u8 || DS == lsc_data_size::u8u32)) ||
116  (sizeof(T) == 2 &&
117  (DS == lsc_data_size::u16 || DS == lsc_data_size::u16u32 ||
118  DS == lsc_data_size::u16u32h)) ||
119  (sizeof(T) == 4 &&
120  (DS == lsc_data_size::u32 || DS == lsc_data_size::u8u32 ||
121  DS == lsc_data_size::u16u32 || DS == lsc_data_size::u16u32h)) ||
122  (sizeof(T) == 8 && DS == lsc_data_size::u64),
123  "Data type does not match data size");
124 }
125 
126 template <lsc_vector_size VS> constexpr uint8_t to_int() {
127  check_lsc_vector_size<VS>();
128  switch (VS) {
129  case lsc_vector_size::n1:
130  return 1;
131  case lsc_vector_size::n2:
132  return 2;
133  case lsc_vector_size::n3:
134  return 3;
135  case lsc_vector_size::n4:
136  return 4;
137  case lsc_vector_size::n8:
138  return 8;
139  case lsc_vector_size::n16:
140  return 16;
141  case lsc_vector_size::n32:
142  return 32;
143  case lsc_vector_size::n64:
144  return 64;
145  default:
146  return 1;
147  }
148 }
149 
150 template <int VS> constexpr lsc_vector_size to_lsc_vector_size() {
151  check_lsc_vector_size<VS>();
152  switch (VS) {
153  case 1:
154  return lsc_vector_size::n1;
155  case 2:
156  return lsc_vector_size::n2;
157  case 3:
158  return lsc_vector_size::n3;
159  case 4:
160  return lsc_vector_size::n4;
161  case 8:
162  return lsc_vector_size::n8;
163  case 16:
164  return lsc_vector_size::n16;
165  case 32:
166  return lsc_vector_size::n32;
167  case 64:
168  return lsc_vector_size::n64;
169  default:
170  return lsc_vector_size::n1;
171  }
172 }
173 
174 template <typename T, lsc_data_size DS>
176  check_lsc_data_size<T, DS>();
177  if (DS != lsc_data_size::default_size)
178  return DS;
179  else if (sizeof(T) == 1)
180  return lsc_data_size::u8;
181  else if (sizeof(T) == 2)
182  return lsc_data_size::u16;
183  else if (sizeof(T) == 4)
184  return lsc_data_size::u32;
185  else if (sizeof(T) == 8)
186  return lsc_data_size::u64;
187  else
188  return DS;
189 }
190 
192  if (DS == lsc_data_size::u8)
193  return lsc_data_size::u8u32;
194  if (DS == lsc_data_size::u16)
195  return lsc_data_size::u16u32;
196  return DS;
197 }
198 
199 template <typename T> struct lsc_expand_type {
200  using type = std::conditional_t<
201  sizeof(T) <= 4,
202  std::conditional_t<std::is_signed<T>::value, int32_t, uint32_t>,
203  std::conditional_t<std::is_signed<T>::value, int64_t, uint64_t>>;
204 };
205 
206 template <typename T> struct lsc_bitcast_type {
207 public:
208  using type = std::conditional_t<
209  sizeof(T) == 1, uint8_t,
211  sizeof(T) == 2, uint16_t,
212  std::conditional_t<sizeof(T) == 4, uint32_t,
213  std::conditional_t<sizeof(T) == 8, uint64_t, T>>>>;
214 };
215 
216 } // namespace detail
217 
219 enum class cache_hint : uint8_t {
220  none = 0,
221  uncached = 1,
222  cached = 2,
223  write_back = 3,
224  write_through = 4,
225  streaming = 5,
226  read_invalidate = 6
227 };
228 
229 namespace detail {
230 
231 template <cache_hint Hint> class cache_hint_wrap {
232  template <cache_hint...> struct is_one_of_t;
233  template <cache_hint Last>
234  struct is_one_of_t<Last>
235  : std::conditional<Last == Hint, std::true_type, std::false_type>::type {
236  };
237  template <cache_hint Head, cache_hint... Tail>
238  struct is_one_of_t<Head, Tail...>
239  : std::conditional<Head == Hint, std::true_type,
240  is_one_of_t<Tail...>>::type {};
241 
242 public:
243  constexpr operator cache_hint() const { return Hint; }
244  template <cache_hint... Hints> constexpr bool is_one_of() const {
245  return is_one_of_t<Hints...>::value;
246  }
247 };
248 
249 constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val) {
250  return First == Val && Second == Val;
251 }
252 
253 enum class lsc_action { prefetch, load, store, atomic };
254 
255 template <lsc_action Action, cache_hint L1, cache_hint L3>
256 constexpr void check_lsc_cache_hint() {
257  constexpr auto L1H = cache_hint_wrap<L1>{};
258  constexpr auto L3H = cache_hint_wrap<L3>{};
259  if constexpr (Action == lsc_action::prefetch) {
260  static_assert(
261  L1H.template is_one_of<cache_hint::cached, cache_hint::uncached,
262  cache_hint::streaming>() &&
263  L3H.template is_one_of<cache_hint::cached,
264  cache_hint::uncached>() &&
265  !are_both(L1H, L3H, cache_hint::uncached),
266  "unsupported cache hint");
267  } else if constexpr (Action == lsc_action::load) {
268  static_assert(
269  are_both(L1H, L3H, cache_hint::none) ||
270  (L1H.template is_one_of<cache_hint::uncached, cache_hint::cached,
271  cache_hint::streaming>() &&
272  L3H.template is_one_of<cache_hint::uncached,
273  cache_hint::cached>()) ||
274  (L1H == cache_hint::read_invalidate && L3H == cache_hint::cached),
275  "unsupported cache hint");
276  } else if constexpr (Action == lsc_action::store) {
277  static_assert(are_both(L1H, L3H, cache_hint::none) ||
278  are_both(L1H, L3H, cache_hint::write_back) ||
279  (L1H.template is_one_of<cache_hint::uncached,
280  cache_hint::write_through,
281  cache_hint::streaming>() &&
282  L3H.template is_one_of<cache_hint::uncached,
283  cache_hint::write_back>()),
284  "unsupported cache hint");
285  } else if constexpr (Action == lsc_action::atomic) {
286  static_assert(are_both(L1H, L3H, cache_hint::none) ||
287  (L1H == cache_hint::uncached &&
288  L3H.template is_one_of<cache_hint::uncached,
289  cache_hint::write_back>()),
290  "unsupported cache hint");
291  }
292 }
293 
294 } // namespace detail
295 
297 enum class split_barrier_action : uint8_t {
298  wait = 0, // split barrier wait
299  signal = 1, // split barrier signal
300 };
301 
303 
304 } // namespace ext::intel::experimental::esimd
305 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
306 } // namespace sycl
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::shared_local
@ shared_local
typed global memory
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::untyped_global_low_pri
@ untyped_global_low_pri
untyped global memory
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u64
@ u64
common.hpp
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::tile
@ tile
flush out to the local scope
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::none
@ none
sycl::_V1::ext::intel::experimental::prefetch
prefetch_impl< _B > prefetch
Definition: fpga_lsu.hpp:45
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n3
@ n3
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n2
@ n2
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
common.hpp
sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_data_size
constexpr void check_lsc_data_size()
Definition: common.hpp:108
defines_elementary.hpp
sycl::_V1::detail::int64_t
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:34
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u32
@ u32
sycl::_V1::ext::intel::experimental::esimd::cache_hint::write_through
@ write_through
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n32
@ n32
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::ext::intel::experimental::esimd::detail::are_both
constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val)
Definition: common.hpp:249
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u16
@ u16
sycl::_V1::ext::intel::experimental::prefetch_impl
Definition: fpga_lsu.hpp:34
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::gpu
@ gpu
tile, flush out to several DSSs
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_bitcast_type
Definition: common.hpp:206
sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
Definition: common.hpp:191
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::sysacq
@ sysacq
the entire system memory space
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u16u32
@ u16u32
load 8b, zero extend to 32b; store the opposite
__SYCL_DEPRECATED
#define __SYCL_DEPRECATED(message)
Definition: defines_elementary.hpp:46
sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_vector_size
constexpr void check_lsc_vector_size()
Definition: common.hpp:94
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_action
lsc_action
Definition: common.hpp:253
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind
lsc_memory_kind
The specific LSC shared function to fence with lsc_fence Supported platforms: DG2,...
Definition: common.hpp:57
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::invalidate
@ invalidate
dirty lines evicted and invalidated from L1
sycl::_V1::ext::intel::experimental::esimd::cache_hint::streaming
@ streaming
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n1
@ n1
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n16
@ n16
sycl::_V1::ext::intel::experimental::esimd::cache_hint::write_back
@ write_back
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op
lsc_fence_op
The lsc_fence operation to apply to caches Supported platforms: DG2, PVC.
Definition: common.hpp:45
sycl::_V1::ext::intel::experimental::esimd::wait
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
Definition: memory.hpp:307
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_data_order
lsc_data_order
Definition: common.hpp:89
sycl::_V1::ext::intel::experimental::esimd::detail::to_int
constexpr uint8_t to_int()
Definition: common.hpp:126
sycl::_V1::ext::intel::experimental::esimd::detail::finalize_data_size
constexpr lsc_data_size finalize_data_size()
Definition: common.hpp:175
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u16u32h
@ u16u32h
load 16b, zero extend to 32b; store the opposite
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::system
@ system
all GPUs in the system, flush out to memory shared by all GPUs
sycl::_V1::ext::intel::experimental::esimd::detail::to_lsc_vector_size
constexpr lsc_vector_size to_lsc_vector_size()
Definition: common.hpp:150
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::untyped_global
@ untyped_global
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size
lsc_vector_size
Definition: common.hpp:78
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n64
@ n64
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_data_order::nontranspose
@ nontranspose
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u8u32
@ u8u32
sycl::_V1::ext::intel::experimental::esimd::cache_hint::read_invalidate
@ read_invalidate
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_expand_type::type
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed< T >::value, int32_t, uint32_t >, std::conditional_t< std::is_signed< T >::value, int64_t, uint64_t > > type
Definition: common.hpp:203
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::clean
@ clean
direct and clean lines are discarded w/o eviction
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_data_order::transpose
@ transpose
sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_cache_hint
constexpr void check_lsc_cache_hint()
Definition: common.hpp:256
sycl::_V1::ext::intel::experimental::esimd::cache_hint::uncached
@ uncached
sycl::_V1::ext::intel::experimental::esimd::cache_hint::cached
@ cached
sycl::_V1::ext::intel::experimental::esimd::lsc_scope
lsc_scope
The scope that lsc_fence operation should apply to Supported platforms: DG2, PVC.
Definition: common.hpp:33
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::flushl3
@ flushl3
dirty lines are written to memory, but retained in cache in clean state
sycl::_V1::ext::intel::experimental::esimd::split_barrier_action
split_barrier_action
Represents a split barrier action.
Definition: common.hpp:297
sycl::_V1::ext::intel::experimental::esimd::split_barrier_action::signal
@ signal
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n8
@ n8
sycl::_V1::ext::intel::esimd::xmx::dpas_argument_type
dpas_argument_type
Describes the element types in the input matrices.
Definition: common.hpp:22
sycl::_V1::ext::intel::experimental::esimd::cache_hint
cache_hint
L1 or L3 cache hint kinds.
Definition: common.hpp:219
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::typed_global
@ typed_global
low-priority untyped global memory
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n4
@ n4
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_expand_type
Definition: common.hpp:199
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::local
@ local
flush out to the threadgroup's scope
sycl::_V1::detail::conditional_t
typename std::conditional< B, T, F >::type conditional_t
Definition: stl_type_traits.hpp:27
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::gpus
@ gpus
entire GPU, flush out to the GPUs LLC
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:65
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_bitcast_type::type
std::conditional_t< sizeof(T)==1, uint8_t, std::conditional_t< sizeof(T)==2, uint16_t, std::conditional_t< sizeof(T)==4, uint32_t, std::conditional_t< sizeof(T)==8, uint64_t, T > >> > type
Definition: common.hpp:213
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::evict
@ evict
no operation
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::discard
@ discard
invalidate all clean lines
sycl::_V1::group
Definition: helpers.hpp:30
sycl::_V1::ext::intel::experimental::esimd::detail::cache_hint_wrap::is_one_of
constexpr bool is_one_of() const
Definition: common.hpp:244
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::default_size
@ default_size
sycl::_V1::ext::intel::experimental::esimd::detail::cache_hint_wrap
Definition: common.hpp:231