DPC++ Runtime
Runtime libraries for oneAPI DPC++
common.hpp
Go to the documentation of this file.
1 //==---------------- common.hpp - DPC++ Explicit SIMD API ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // definitions used in Explicit SIMD APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
13 #include <sycl/detail/defines.hpp>
14 #include <sycl/exception.hpp>
17 
18 #include <cstdint> // for uint* types
19 #include <type_traits>
20 
22 
23 #ifdef __SYCL_DEVICE_ONLY__
24 #define __ESIMD_UNSUPPORTED_ON_HOST
25 #else // __SYCL_DEVICE_ONLY__
26 #define __ESIMD_UNSUPPORTED_ON_HOST \
27  throw sycl::exception(sycl::errc::feature_not_supported, \
28  "This ESIMD feature is not supported on HOST")
29 #endif // __SYCL_DEVICE_ONLY__
30 
32 
33 namespace sycl {
34 inline namespace _V1 {
35 namespace ext::intel::esimd {
36 
39 
40 using uchar = unsigned char;
41 using ushort = unsigned short;
42 using uint = unsigned int;
43 
46 struct saturation_on_tag : std::true_type {};
47 
49 struct saturation_off_tag : std::false_type {};
50 
52 static inline constexpr saturation_off_tag saturation_off{};
53 
55 static inline constexpr saturation_on_tag saturation_on{};
56 
58 enum class rgba_channel : uint8_t { R, G, B, A };
59 
63 using SurfaceIndex = unsigned int;
64 
66 enum class raw_send_eot : uint8_t {
67  not_eot = 0,
68  eot = 1,
69 };
70 
72 enum class raw_send_sendc : uint8_t {
73  not_sendc = 0,
74  sendc = 1,
75 };
76 
77 namespace detail {
78 
79 // Type used in internal functions to designate SLM access by
80 // providing dummy accessor of this type. Used to make it possible to delegate
81 // implemenations of SLM memory accesses to general surface-based memory
82 // accesses and thus reuse validity checks etc.
84 
85 template <typename T> struct is_saturation_tag {
86  static constexpr bool value =
87  std::is_same_v<T, __ESIMD_NS::saturation_on_tag> ||
88  std::is_same_v<T, __ESIMD_NS::saturation_off_tag>;
89 };
90 
91 template <class T>
93 
95 ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) {
96  return (n & (n - 1)) == 0;
97 }
98 
102 ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) {
103  return (n & (n - 1)) == 0 && n <= limit;
104 }
105 
106 template <rgba_channel Ch>
107 static inline constexpr uint8_t ch = 1 << static_cast<int>(Ch);
108 static inline constexpr uint8_t chR = ch<rgba_channel::R>;
109 static inline constexpr uint8_t chG = ch<rgba_channel::G>;
110 static inline constexpr uint8_t chB = ch<rgba_channel::B>;
111 static inline constexpr uint8_t chA = ch<rgba_channel::A>;
112 
113 // Shared Local Memory Binding Table Index (aka surface index).
114 static inline constexpr SurfaceIndex SLM_BTI = 254;
115 static inline constexpr SurfaceIndex INVALID_BTI =
116  static_cast<SurfaceIndex>(-1);
117 } // namespace detail
118 
121 enum class rgba_channel_mask : uint8_t {
122  R = detail::chR,
123  G = detail::chG,
125  B = detail::chB,
129  A = detail::chA,
137 };
138 
140  int Pos = static_cast<int>(Ch);
141  return (static_cast<int>(M) & (1 << Pos)) >> Pos;
142 }
143 
149 }
150 
159 enum class atomic_op : uint8_t {
161  add = 0x0,
163  sub = 0x1,
165  inc = 0x2,
167  dec = 0x3,
169  umin = 0x4,
171  umax = 0x5,
173  xchg = 0x6,
175  cmpxchg = 0x7,
177  bit_and = 0x8,
179  bit_or = 0x9,
181  bit_xor = 0xa,
183  smin = 0xb,
185  smax = 0xc,
187  fmax = 0x10,
189  fmin = 0x11,
192  fcmpxchg = 0x12,
193  fcmpwr = fcmpxchg,
195  fadd = 0x13, //
197  fsub = 0x14,
198  load = 0x15,
199  store = 0x16,
202  predec = 0xff,
203 };
204 
205 #undef __ESIMD_USM_DWORD_TO_LSC_MSG
206 
208 
209 namespace detail {
210 template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() {
211  switch (Op) {
212  case __ESIMD_NS::atomic_op::xchg:
213  case __ESIMD_NS::atomic_op::predec:
214  return false;
215  default:
216  return true;
217  }
218 }
219 
220 template <__ESIMD_NS::atomic_op Op>
222  switch (Op) {
225  case __ESIMD_NS::atomic_op::sub:
226  return __ESIMD_NS::native::lsc::atomic_op::sub;
227  case __ESIMD_NS::atomic_op::inc:
228  return __ESIMD_NS::native::lsc::atomic_op::inc;
231  case __ESIMD_NS::atomic_op::umin:
232  return __ESIMD_NS::native::lsc::atomic_op::umin;
233  case __ESIMD_NS::atomic_op::umax:
234  return __ESIMD_NS::native::lsc::atomic_op::umax;
235  case __ESIMD_NS::atomic_op::cmpxchg:
236  return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
243  case __ESIMD_NS::atomic_op::smin:
244  return __ESIMD_NS::native::lsc::atomic_op::smin;
245  case __ESIMD_NS::atomic_op::smax:
246  return __ESIMD_NS::native::lsc::atomic_op::smax;
251  case __ESIMD_NS::atomic_op::fcmpxchg:
252  return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
253  case __ESIMD_NS::atomic_op::fadd:
254  return __ESIMD_NS::native::lsc::atomic_op::fadd;
255  case __ESIMD_NS::atomic_op::fsub:
256  return __ESIMD_NS::native::lsc::atomic_op::fsub;
257  case __ESIMD_NS::atomic_op::load:
258  return __ESIMD_NS::native::lsc::atomic_op::load;
259  case __ESIMD_NS::atomic_op::store:
260  return __ESIMD_NS::native::lsc::atomic_op::store;
261  default:
262  static_assert(has_lsc_equivalent<Op>() && "Unsupported LSC atomic op");
263  }
264 }
265 
266 template <__ESIMD_NS::native::lsc::atomic_op Op>
268  switch (Op) {
271  case __ESIMD_NS::native::lsc::atomic_op::sub:
272  return __ESIMD_NS::atomic_op::sub;
273  case __ESIMD_NS::native::lsc::atomic_op::inc:
274  return __ESIMD_NS::atomic_op::inc;
277  case __ESIMD_NS::native::lsc::atomic_op::umin:
278  return __ESIMD_NS::atomic_op::umin;
279  case __ESIMD_NS::native::lsc::atomic_op::umax:
280  return __ESIMD_NS::atomic_op::umax;
281  case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
282  return __ESIMD_NS::atomic_op::cmpxchg;
289  case __ESIMD_NS::native::lsc::atomic_op::smin:
290  return __ESIMD_NS::atomic_op::smin;
291  case __ESIMD_NS::native::lsc::atomic_op::smax:
292  return __ESIMD_NS::atomic_op::smax;
297  case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
298  return __ESIMD_NS::atomic_op::fcmpxchg;
299  case __ESIMD_NS::native::lsc::atomic_op::fadd:
300  return __ESIMD_NS::atomic_op::fadd;
301  case __ESIMD_NS::native::lsc::atomic_op::fsub:
302  return __ESIMD_NS::atomic_op::fsub;
303  case __ESIMD_NS::native::lsc::atomic_op::load:
304  return __ESIMD_NS::atomic_op::load;
305  case __ESIMD_NS::native::lsc::atomic_op::store:
306  return __ESIMD_NS::atomic_op::store;
307  }
308 }
309 
310 template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() {
311  switch (Op) {
312  case __ESIMD_NS::atomic_op::inc:
314  case __ESIMD_NS::atomic_op::load:
315  return 0;
316  case __ESIMD_NS::atomic_op::xchg:
317  case __ESIMD_NS::atomic_op::predec:
318  case __ESIMD_NS::atomic_op::store:
320  case __ESIMD_NS::atomic_op::sub:
321  case __ESIMD_NS::atomic_op::smin:
322  case __ESIMD_NS::atomic_op::smax:
323  case __ESIMD_NS::atomic_op::umin:
324  case __ESIMD_NS::atomic_op::umax:
325  case __ESIMD_NS::atomic_op::fadd:
326  case __ESIMD_NS::atomic_op::fsub:
332  return 1;
333  case __ESIMD_NS::atomic_op::cmpxchg:
334  case __ESIMD_NS::atomic_op::fcmpxchg:
335  return 2;
336  default:
337  return -1; // error
338  }
339 }
340 
341 template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() {
342  return get_num_args<to_atomic_op<Op>()>();
343 }
344 
345 } // namespace detail
346 
348 enum class cache_hint : uint8_t {
349  none = 0,
351  uncached = 1,
352 
353  // load: cache data to cache;
354  cached = 2,
355 
359  write_back = 3,
360 
363  write_through = 4,
364 
370  streaming = 5,
371 
376  read_invalidate = 6,
377 
378  // TODO: Implement the verification of this enum in check_cache_hint().
384  const_cached = 7
385 };
386 
389 enum class fence_scope : uint8_t {
392  group = 0,
393 
396  local = 1,
397 
400  tile = 2,
401 
404  gpu = 3,
405 
408  gpus = 4,
409 
412  system = 5,
413 
418  system_acquire = 6
419 };
420 
423 enum class fence_flush_op : uint8_t {
424  none = 0,
425  evict = 1,
426  invalidate = 2,
427 
428  // enum with the value 3 is reserved;
429 
430  clean = 4
432 };
433 
436 enum class memory_kind : uint8_t {
437  global = 0,
438  // enum with the value 1 is reserved;
439  image = 2,
440  local = 3,
441 };
442 
444 enum class cache_level : uint8_t { L1 = 1, L2 = 2, L3 = 3 };
445 
446 namespace detail {
447 
449 enum class lsc_data_size : uint8_t {
450  default_size = 0,
451  u8 = 1,
452  u16 = 2,
453  u32 = 3,
454  u64 = 4,
455  u8u32 = 5,
456  u16u32 = 6,
457  u16u32h = 7,
458 };
459 
460 template <typename T, lsc_data_size DS> constexpr void check_lsc_data_size() {
461  static_assert(DS != lsc_data_size::default_size || sizeof(T) == 1 ||
462  sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8,
463  "Unsupported data type");
464  static_assert(
466  (sizeof(T) == 1 &&
467  (DS == lsc_data_size::u8 || DS == lsc_data_size::u8u32)) ||
468  (sizeof(T) == 2 &&
469  (DS == lsc_data_size::u16 || DS == lsc_data_size::u16u32 ||
470  DS == lsc_data_size::u16u32h)) ||
471  (sizeof(T) == 4 &&
472  (DS == lsc_data_size::u32 || DS == lsc_data_size::u8u32 ||
474  (sizeof(T) == 8 && DS == lsc_data_size::u64),
475  "Data type does not match data size");
476 }
477 
478 template <typename T, lsc_data_size DS>
480  check_lsc_data_size<T, DS>();
481  if (DS != lsc_data_size::default_size)
482  return DS;
483  else if (sizeof(T) == 1)
484  return lsc_data_size::u8;
485  else if (sizeof(T) == 2)
486  return lsc_data_size::u16;
487  else if (sizeof(T) == 4)
488  return lsc_data_size::u32;
489  else if (sizeof(T) == 8)
490  return lsc_data_size::u64;
491  else
492  return DS;
493 }
494 
495 enum class lsc_vector_size : uint8_t {
496  n1 = 1,
497  n2 = 2,
498  n3 = 3,
499  n4 = 4,
500  n8 = 5,
501  n16 = 6,
502  n32 = 7,
503  n64 = 8,
504 };
505 
506 template <int VS> constexpr void check_lsc_vector_size() {
507  static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
508  VS == 16 || VS == 32 || VS == 64,
509  "Unsupported vector size");
510 }
511 
512 template <lsc_vector_size VS> constexpr void check_lsc_vector_size() {
513  static_assert(VS == lsc_vector_size::n1 || VS == lsc_vector_size::n2 ||
514  VS == lsc_vector_size::n3 || VS == lsc_vector_size::n4 ||
515  VS == lsc_vector_size::n8 || VS == lsc_vector_size::n16 ||
517  "Unsupported vector size");
518 }
519 
520 template <lsc_vector_size VS> constexpr uint8_t to_int() {
521  check_lsc_vector_size<VS>();
522  switch (VS) {
523  case lsc_vector_size::n1:
524  return 1;
525  case lsc_vector_size::n2:
526  return 2;
527  case lsc_vector_size::n3:
528  return 3;
529  case lsc_vector_size::n4:
530  return 4;
531  case lsc_vector_size::n8:
532  return 8;
534  return 16;
536  return 32;
538  return 64;
539  default:
540  return 1;
541  }
542 }
543 
544 template <int VS> constexpr lsc_vector_size to_lsc_vector_size() {
545  check_lsc_vector_size<VS>();
546  switch (VS) {
547  case 1:
548  return lsc_vector_size::n1;
549  case 2:
550  return lsc_vector_size::n2;
551  case 3:
552  return lsc_vector_size::n3;
553  case 4:
554  return lsc_vector_size::n4;
555  case 8:
556  return lsc_vector_size::n8;
557  case 16:
558  return lsc_vector_size::n16;
559  case 32:
560  return lsc_vector_size::n32;
561  case 64:
562  return lsc_vector_size::n64;
563  default:
564  return lsc_vector_size::n1;
565  }
566 }
567 
568 enum class lsc_data_order : uint8_t {
569  nontranspose = 1,
570  transpose = 2,
571 };
572 
573 template <cache_hint Hint> class cache_hint_wrap {
574  template <cache_hint...> struct is_one_of_t;
575  template <cache_hint Last>
576  struct is_one_of_t<Last>
577  : std::conditional_t<Last == Hint, std::true_type, std::false_type> {};
578  template <cache_hint Head, cache_hint... Tail>
579  struct is_one_of_t<Head, Tail...>
580  : std::conditional_t<Head == Hint, std::true_type, is_one_of_t<Tail...>> {
581  };
582 
583 public:
584  constexpr operator cache_hint() const { return Hint; }
585  template <cache_hint... Hints> constexpr bool is_one_of() const {
586  return is_one_of_t<Hints...>::value;
587  }
588 };
589 
590 constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val) {
591  return First == Val && Second == Val;
592 }
593 
595 
596 template <cache_action Action, cache_hint L1Hint, cache_hint L2Hint>
598  constexpr auto L1H = cache_hint_wrap<L1Hint>{};
599  constexpr auto L2H = cache_hint_wrap<L2Hint>{};
600  if constexpr (Action == cache_action::prefetch) {
601  static_assert(
602  L1H.template is_one_of<cache_hint::cached, cache_hint::uncached,
604  L2H.template is_one_of<cache_hint::cached,
606  !are_both(L1H, L2H, cache_hint::uncached),
607  "unsupported cache hint");
608  } else if constexpr (Action == cache_action::load) {
609  static_assert(
610  are_both(L1H, L2H, cache_hint::none) ||
611  (L1H.template is_one_of<cache_hint::uncached, cache_hint::cached,
613  L2H.template is_one_of<cache_hint::uncached,
614  cache_hint::cached>()) ||
616  "unsupported cache hint");
617  } else if constexpr (Action == cache_action::store) {
618  static_assert(are_both(L1H, L2H, cache_hint::none) ||
619  are_both(L1H, L2H, cache_hint::write_back) ||
620  (L1H.template is_one_of<cache_hint::uncached,
623  L2H.template is_one_of<cache_hint::uncached,
625  "unsupported cache hint");
626  } else if constexpr (Action == cache_action::atomic) {
627  static_assert(are_both(L1H, L2H, cache_hint::none) ||
628  (L1H == cache_hint::uncached &&
629  L2H.template is_one_of<cache_hint::uncached,
631  "unsupported cache hint");
632  }
633 }
634 
636  if (DS == lsc_data_size::u8)
637  return lsc_data_size::u8u32;
638  if (DS == lsc_data_size::u16)
639  return lsc_data_size::u16u32;
640  return DS;
641 }
642 
643 template <typename T> struct lsc_expand_type {
644  using type = std::conditional_t<
645  sizeof(T) <= 4,
646  std::conditional_t<std::is_signed_v<T>, int32_t, uint32_t>,
647  std::conditional_t<std::is_signed_v<T>, int64_t, uint64_t>>;
648 };
649 
650 } // namespace detail
651 
652 } // namespace ext::intel::esimd
653 } // namespace _V1
654 } // namespace sycl
Defines a shared image data.
Definition: image.hpp:444
constexpr int is_channel_enabled(rgba_channel_mask M, rgba_channel Ch)
Definition: common.hpp:139
rgba_channel
Represents a pixel's channel.
Definition: common.hpp:58
raw_send_eot
Specify if end of thread should be set.
Definition: common.hpp:66
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
Definition: common.hpp:121
raw_send_sendc
Specify if sendc should be used.
Definition: common.hpp:72
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:63
static constexpr saturation_off_tag saturation_off
Type tag object representing "saturation off" behavior.
Definition: common.hpp:52
constexpr int get_num_channels_enabled(rgba_channel_mask M)
Definition: common.hpp:144
unsigned short ushort
Definition: common.hpp:41
static constexpr saturation_on_tag saturation_on
Type tag object representing "saturation on" behavior.
Definition: common.hpp:55
atomic_op
Represents an atomic operation.
Definition: common.hpp:159
@ umin
Minimum: *addr = min(*addr, src0).
@ fsub
ACM/PVC: Subtraction (floating point): *addr = *addr - src0.
@ dec
Decrement: *addr = *addr - 1.
@ add
Addition: *addr = *addr + src0.
@ smin
Minimum (signed integer): *addr = min(*addr, src0).
@ cmpxchg
Compare and exchange. if (*addr == src0) *sddr = src1;
@ fmax
ACM/PVC: Minimum (floating point): *addr = min(*addr, src0).
@ fadd
ACM/PVC: Addition (floating point): *addr = *addr + src0.
@ umax
Maximum: *addr = max(*addr, src0).
@ sub
Subtraction: *addr = *addr - src0.
@ xchg
Exchange. *addr == src0;
@ fmin
ACM/PVC: Maximum (floating point): *addr = max(*addr, src0).
@ predec
Decrement: *addr = *addr - 1.
@ smax
Maximum (signed integer): *addr = max(*addr, src0).
@ inc
Increment: *addr = *addr + 1.
@ fcmpxchg
ACM/PVC: Compare and exchange (floating point).
atomic_op
LSC atomic operation codes.
Definition: common.hpp:39
void add(const void *DeviceGlobalPtr, const char *UniqueId)
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
constexpr sycl::ext::intel::esimd::atomic_op to_atomic_op()
Definition: common.hpp:267
constexpr bool has_lsc_equivalent()
Definition: common.hpp:210
constexpr lsc_vector_size to_lsc_vector_size()
Definition: common.hpp:544
constexpr void check_lsc_data_size()
Definition: common.hpp:460
static constexpr SurfaceIndex SLM_BTI
Definition: common.hpp:114
constexpr sycl::ext::intel::esimd::native::lsc::atomic_op to_lsc_atomic_op()
Definition: common.hpp:221
static constexpr uint8_t chG
Definition: common.hpp:109
static constexpr uint8_t chR
Definition: common.hpp:108
constexpr lsc_data_size finalize_data_size()
Definition: common.hpp:479
static constexpr uint8_t chB
Definition: common.hpp:110
constexpr bool are_both(cache_hint First, cache_hint Second, cache_hint Val)
Definition: common.hpp:590
static constexpr uint8_t ch
Definition: common.hpp:107
static constexpr uint8_t chA
Definition: common.hpp:111
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:449
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
constexpr void check_lsc_vector_size()
Definition: common.hpp:506
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:95
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
Definition: common.hpp:635
static constexpr SurfaceIndex INVALID_BTI
Definition: common.hpp:115
cache_level
L1, L2 or L3 cache hint levels. L3 is reserved for future use.
Definition: common.hpp:444
cache_hint
L1, L2 or L3 cache hints.
Definition: common.hpp:348
@ read_invalidate
load: asserts that the cache line containing the data will not be read again until it’s overwritten,...
@ const_cached
load, L2 cache only, next gen GPU after Xe required: asserts that the L2 cache line containing the da...
@ write_through
store: immediately write data to the subsequent furthest cache, marking the cache line in the current...
@ write_back
store: write data into cache level and mark the cache line as "dirty".
@ streaming
load: cache data to cache using the evict-first policy to minimize cache pollution caused by temporar...
@ uncached
load/store/atomic: do not cache data to cache;
fence_scope
The scope that fence() operation should apply to.
Definition: common.hpp:389
@ gpu
Wait until all previous memory transactions from this thread are observed in the local GPU.
@ tile
Wait until all previous memory transactions from this thread are observed in the local tile.
@ gpus
Wait until all previous memory transactions from this thread are observed across all GPUs in the syst...
@ system_acquire
Global memory data-port only: for GPUs that do not follow PCIe Write ordering for downstream writes t...
@ system
Global memory data-port only: wait until all previous memory transactions from this thread are observ...
@ local
Wait until all previous memory transactions from this thread are observed within the local sub-slice.
fence_flush_op
The cache flush operation to apply to caches after fence() is complete.
Definition: common.hpp:423
@ clean
R/W and RO: invalidate all clean lines;.
@ invalidate
R/W: evict dirty lines; R/W and RO: invalidate clean lines.
memory_kind
The target memory kind for fence() operation.
Definition: common.hpp:436
@ local
image (also known as typed global memory)
std::bit_or< T > bit_or
Definition: functional.hpp:22
std::bit_xor< T > bit_xor
Definition: functional.hpp:23
std::bit_and< T > bit_and
Definition: functional.hpp:24
std::enable_if_t< detail::is_vgenfloat_v< T >, T > fmin(T x, typename T::element_type y)
std::bit_and< T > bit_and
Definition: functional.hpp:20
std::bit_xor< T > bit_xor
Definition: functional.hpp:22
std::enable_if_t< detail::is_vgenfloat_v< T >, T > fmax(T x, typename T::element_type y)
std::bit_or< T > bit_or
Definition: functional.hpp:21
constexpr stream_manipulator dec
Definition: stream.hpp:784
Definition: access.hpp:18
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed_v< T >, int32_t, uint32_t >, std::conditional_t< std::is_signed_v< T >, int64_t, uint64_t > > type
Definition: common.hpp:647
This type tag represents "saturation off" behavior.
Definition: common.hpp:49
Gen hardware supports applying saturation to results of certain operations.
Definition: common.hpp:46