DPC++ Runtime
Runtime libraries for oneAPI DPC++
common.hpp
Go to the documentation of this file.
1 //==---------------- common.hpp - DPC++ Explicit SIMD API ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Common definitions used in experimental Explicit SIMD APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
16 
17 #include <cstdint>
18 #include <type_traits>
19 
20 namespace sycl {
22 namespace ext::intel::experimental::esimd {
23 
26 
27 using argument_type
28  __SYCL_DEPRECATED("use sycl::ext::intel::esimd::xmx::dpas_argument_type") =
30 
33 enum class lsc_scope : uint8_t {
34  group = 0,
35  local = 1,
36  tile = 2,
37  gpu = 3,
38  gpus = 4,
39  system = 5,
40  sysacq = 6,
41 };
42 
45 enum class lsc_fence_op : uint8_t {
46  none = 0,
47  evict = 1,
48  invalidate = 2,
49  discard = 3,
50  clean = 4,
51  flushl3 = 5,
53 };
54 
57 enum class lsc_memory_kind : uint8_t {
58  untyped_global = 0,
60  typed_global = 2,
61  shared_local = 3,
62 };
63 
65 enum class lsc_data_size : uint8_t {
66  default_size = 0,
67  u8 = 1,
68  u16 = 2,
69  u32 = 3,
70  u64 = 4,
71  u8u32 = 5,
72  u16u32 = 6,
73  u16u32h = 7,
74 };
75 
76 namespace detail {
77 
78 enum class lsc_vector_size : uint8_t {
79  n1 = 1,
80  n2 = 2,
81  n3 = 3,
82  n4 = 4,
83  n8 = 5,
84  n16 = 6,
85  n32 = 7,
86  n64 = 8,
87 };
88 
89 enum class lsc_data_order : uint8_t {
90  nontranspose = 1,
91  transpose = 2,
92 };
93 
94 template <lsc_vector_size VS> constexpr void check_lsc_vector_size() {
95  static_assert(VS == lsc_vector_size::n1 || VS == lsc_vector_size::n2 ||
96  VS == lsc_vector_size::n3 || VS == lsc_vector_size::n4 ||
97  VS == lsc_vector_size::n8 || VS == lsc_vector_size::n16 ||
98  VS == lsc_vector_size::n64 || VS == lsc_vector_size::n32,
99  "Unsupported vector size");
100 }
101 
102 template <int VS> constexpr void check_lsc_vector_size() {
103  static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
104  VS == 16 || VS == 32 || VS == 64,
105  "Unsupported vector size");
106 }
107 
108 template <typename T, lsc_data_size DS> constexpr void check_lsc_data_size() {
109  static_assert(DS != lsc_data_size::default_size || sizeof(T) == 1 ||
110  sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8,
111  "Unsupported data type");
112 }
113 
114 template <lsc_vector_size VS> constexpr uint8_t to_int() {
115  check_lsc_vector_size<VS>();
116  switch (VS) {
117  case lsc_vector_size::n1:
118  return 1;
119  case lsc_vector_size::n2:
120  return 2;
121  case lsc_vector_size::n3:
122  return 3;
123  case lsc_vector_size::n4:
124  return 4;
125  case lsc_vector_size::n8:
126  return 8;
127  case lsc_vector_size::n16:
128  return 16;
129  case lsc_vector_size::n32:
130  return 32;
131  case lsc_vector_size::n64:
132  return 64;
133  default:
134  return 1;
135  }
136 }
137 
138 template <int VS> constexpr lsc_vector_size to_lsc_vector_size() {
139  check_lsc_vector_size<VS>();
140  switch (VS) {
141  case 1:
142  return lsc_vector_size::n1;
143  case 2:
144  return lsc_vector_size::n2;
145  case 3:
146  return lsc_vector_size::n3;
147  case 4:
148  return lsc_vector_size::n4;
149  case 8:
150  return lsc_vector_size::n8;
151  case 16:
152  return lsc_vector_size::n16;
153  case 32:
154  return lsc_vector_size::n32;
155  case 64:
156  return lsc_vector_size::n64;
157  default:
158  return lsc_vector_size::n1;
159  }
160 }
161 
162 template <typename T, lsc_data_size DS>
164  check_lsc_data_size<T, DS>();
165  if (DS != lsc_data_size::default_size)
166  return DS;
167  else if (sizeof(T) == 1)
168  return lsc_data_size::u8;
169  else if (sizeof(T) == 2)
170  return lsc_data_size::u16;
171  else if (sizeof(T) == 4)
172  return lsc_data_size::u32;
173  else if (sizeof(T) == 8)
174  return lsc_data_size::u64;
175  else
176  return DS;
177 }
178 
180  if (DS == lsc_data_size::u8)
181  return lsc_data_size::u8u32;
182  if (DS == lsc_data_size::u16)
183  return lsc_data_size::u16u32;
184  return DS;
185 }
186 
187 template <typename T> struct lsc_expand_type {
188  using type = typename std::conditional<sizeof(T) < 4, uint32_t, T>::type;
189 };
190 
191 template <typename T> struct lsc_bitcast_type {
192 private:
193  using _type1 = typename std::conditional<sizeof(T) == 2, uint16_t, T>::type;
194  using _type2 = typename std::conditional<sizeof(T) == 1, uint8_t, T>::type;
195 
196 public:
197  using type =
198  typename std::conditional<sizeof(_type2) == 1, _type2, _type1>::type;
199 };
200 
201 } // namespace detail
202 
204 enum class cache_hint : uint8_t {
205  none = 0,
206  uncached = 1,
207  cached = 2,
208  write_back = 3,
209  write_through = 4,
210  streaming = 5,
211  read_invalidate = 6
212 };
213 
214 namespace detail {
215 
216 template <cache_hint Hint> class cache_hint_wrap {
217  template <cache_hint...> class is_one_of_t;
218  template <cache_hint Last>
219  struct is_one_of_t<Last>
220  : std::conditional<Last == Hint, std::true_type, std::false_type>::type {
221  };
222  template <cache_hint Head, cache_hint... Tail>
223  struct is_one_of_t<Head, Tail...>
224  : std::conditional<Head == Hint, std::true_type,
225  is_one_of_t<Tail...>>::type {};
226 
227 public:
228  constexpr operator cache_hint() const { return Hint; }
229  template <cache_hint... Hints> constexpr bool is_one_of() const {
230  return is_one_of_t<Hints...>::value;
231  }
232 };
233 
234 constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val) {
235  return First == Val && Second == Val;
236 }
237 
238 enum class lsc_action { prefetch, load, store, atomic };
239 
240 template <lsc_action Action, cache_hint L1, cache_hint L3>
241 constexpr void check_lsc_cache_hint() {
242  constexpr auto L1H = cache_hint_wrap<L1>{};
243  constexpr auto L3H = cache_hint_wrap<L3>{};
244  if constexpr (Action == lsc_action::prefetch) {
245  static_assert(
246  L1H.template is_one_of<cache_hint::cached, cache_hint::uncached,
247  cache_hint::streaming>() &&
248  L3H.template is_one_of<cache_hint::cached,
249  cache_hint::uncached>() &&
250  !are_both(L1H, L3H, cache_hint::uncached),
251  "unsupported cache hint");
252  } else if constexpr (Action == lsc_action::load) {
253  static_assert(
254  are_both(L1H, L3H, cache_hint::none) ||
255  (L1H.template is_one_of<cache_hint::uncached, cache_hint::cached,
256  cache_hint::streaming>() &&
257  L3H.template is_one_of<cache_hint::uncached,
258  cache_hint::cached>()) ||
259  (L1H == cache_hint::read_invalidate && L3H == cache_hint::cached),
260  "unsupported cache hint");
261  } else if constexpr (Action == lsc_action::store) {
262  static_assert(are_both(L1H, L3H, cache_hint::none) ||
263  are_both(L1H, L3H, cache_hint::write_back) ||
264  (L1H.template is_one_of<cache_hint::uncached,
265  cache_hint::write_through,
266  cache_hint::streaming>() &&
267  L3H.template is_one_of<cache_hint::uncached,
268  cache_hint::write_back>()),
269  "unsupported cache hint");
270  } else if constexpr (Action == lsc_action::atomic) {
271  static_assert(are_both(L1H, L3H, cache_hint::none) ||
272  (L1H == cache_hint::uncached &&
273  L3H.template is_one_of<cache_hint::uncached,
274  cache_hint::write_back>()),
275  "unsupported cache hint");
276  }
277 }
278 
279 } // namespace detail
280 
282 enum class split_barrier_action : uint8_t {
283  wait = 0, // split barrier wait
284  signal = 1, // split barrier signal
285 };
286 
288 
289 } // namespace ext::intel::experimental::esimd
290 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
291 } // namespace sycl
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::shared_local
@ shared_local
typed global memory
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::untyped_global_low_pri
@ untyped_global_low_pri
untyped global memory
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u64
@ u64
common.hpp
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::tile
@ tile
flush out to the local scope
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::none
@ none
sycl::_V1::ext::intel::experimental::prefetch
prefetch_impl< _B > prefetch
Definition: fpga_lsu.hpp:47
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n3
@ n3
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n2
@ n2
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:13
common.hpp
sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_data_size
constexpr void check_lsc_data_size()
Definition: common.hpp:108
defines_elementary.hpp
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u32
@ u32
sycl::_V1::ext::intel::experimental::esimd::cache_hint::write_through
@ write_through
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n32
@ n32
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:13
sycl::_V1::ext::intel::experimental::esimd::detail::are_both
constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val)
Definition: common.hpp:234
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u16
@ u16
sycl::_V1::ext::intel::experimental::prefetch_impl
Definition: fpga_lsu.hpp:36
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::gpu
@ gpu
tile, flush out to several DSSs
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_bitcast_type
Definition: common.hpp:191
sycl::_V1::ext::intel::experimental::esimd::detail::expand_data_size
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
Definition: common.hpp:179
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::sysacq
@ sysacq
the entire system memory space
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u16u32
@ u16u32
load 8b, zero extend to 32b; store the opposite
__SYCL_DEPRECATED
#define __SYCL_DEPRECATED(message)
Definition: defines_elementary.hpp:45
sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_vector_size
constexpr void check_lsc_vector_size()
Definition: common.hpp:94
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_action
lsc_action
Definition: common.hpp:238
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_bitcast_type::type
typename std::conditional< sizeof(_type2)==1, _type2, _type1 >::type type
Definition: common.hpp:198
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind
lsc_memory_kind
The specific LSC shared function to fence with lsc_fence Supported platforms: DG2,...
Definition: common.hpp:57
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::invalidate
@ invalidate
dirty lines evicted and invalidated from L1
sycl::_V1::ext::intel::experimental::esimd::cache_hint::streaming
@ streaming
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n1
@ n1
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n16
@ n16
sycl::_V1::ext::intel::experimental::esimd::cache_hint::write_back
@ write_back
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op
lsc_fence_op
The lsc_fence operation to apply to caches Supported platforms: DG2, PVC.
Definition: common.hpp:45
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_data_order
lsc_data_order
Definition: common.hpp:89
sycl::_V1::ext::intel::experimental::esimd::detail::to_int
constexpr uint8_t to_int()
Definition: common.hpp:114
sycl::_V1::ext::intel::experimental::esimd::detail::finalize_data_size
constexpr lsc_data_size finalize_data_size()
Definition: common.hpp:163
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u16u32h
@ u16u32h
load 16b, zero extend to 32b; store the opposite
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::system
@ system
all GPUs in the system, flush out to memory shared by all GPUs
sycl::_V1::ext::intel::experimental::esimd::detail::to_lsc_vector_size
constexpr lsc_vector_size to_lsc_vector_size()
Definition: common.hpp:138
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::untyped_global
@ untyped_global
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size
lsc_vector_size
Definition: common.hpp:78
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n64
@ n64
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_data_order::nontranspose
@ nontranspose
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::u8u32
@ u8u32
sycl::_V1::ext::intel::experimental::esimd::cache_hint::read_invalidate
@ read_invalidate
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::clean
@ clean
direct and clean lines are discarded w/o eviction
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_data_order::transpose
@ transpose
sycl::_V1::ext::intel::experimental::esimd::detail::check_lsc_cache_hint
constexpr void check_lsc_cache_hint()
Definition: common.hpp:241
sycl::_V1::ext::intel::experimental::esimd::cache_hint::uncached
@ uncached
sycl::_V1::ext::intel::experimental::esimd::cache_hint::cached
@ cached
sycl::_V1::ext::intel::experimental::esimd::lsc_scope
lsc_scope
The scope that lsc_fence operation should apply to Supported platforms: DG2, PVC.
Definition: common.hpp:33
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::flushl3
@ flushl3
dirty lines are written to memory, but retained in cache in clean state
sycl::_V1::ext::intel::experimental::esimd::split_barrier_action
split_barrier_action
Represents a split barrier action.
Definition: common.hpp:282
sycl::_V1::ext::intel::experimental::esimd::split_barrier_action::signal
@ signal
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n8
@ n8
sycl::_V1::ext::intel::esimd::xmx::dpas_argument_type
dpas_argument_type
Definition: common.hpp:19
sycl::_V1::ext::intel::experimental::esimd::cache_hint
cache_hint
L1 or L3 cache hint kinds.
Definition: common.hpp:204
sycl::_V1::ext::intel::experimental::esimd::lsc_memory_kind::typed_global
@ typed_global
low-priority untyped global memory
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_vector_size::n4
@ n4
sycl::_V1::ext::intel::experimental::esimd::split_barrier_action::wait
@ wait
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_expand_type
Definition: common.hpp:187
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::local
@ local
flush out to the threadgroup's scope
sycl::_V1::ext::intel::experimental::esimd::lsc_scope::gpus
@ gpus
entire GPU, flush out to the GPUs LLC
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:65
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::evict
@ evict
no operation
sycl::_V1::ext::intel::experimental::esimd::lsc_fence_op::discard
@ discard
invalidate all clean lines
sycl::_V1::ext::intel::experimental::esimd::detail::lsc_expand_type::type
typename std::conditional< sizeof(T)< 4, uint32_t, T >::type type
Definition: common.hpp:188
sycl::_V1::group
Definition: helpers.hpp:29
sycl::_V1::ext::intel::experimental::esimd::detail::cache_hint_wrap::is_one_of
constexpr bool is_one_of() const
Definition: common.hpp:229
sycl::_V1::ext::intel::experimental::esimd::lsc_data_size::default_size
@ default_size
sycl::_V1::ext::intel::experimental::esimd::detail::cache_hint_wrap
Definition: common.hpp:216