DPC++ Runtime
Runtime libraries for oneAPI DPC++
common.hpp
Go to the documentation of this file.
1 //==---------------- common.hpp - DPC++ Explicit SIMD API ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // definitions used in Explicit SIMD APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
13 #include <sycl/detail/defines.hpp>
14 #include <sycl/exception.hpp>
18 
19 #include <cstdint> // for uint* types
20 #include <type_traits>
21 
23 
24 #ifdef __SYCL_DEVICE_ONLY__
25 #define __ESIMD_UNSUPPORTED_ON_HOST
26 #else // __SYCL_DEVICE_ONLY__
27 #define __ESIMD_UNSUPPORTED_ON_HOST \
28  throw sycl::exception(sycl::errc::feature_not_supported, \
29  "This ESIMD feature is not supported on HOST")
30 #endif // __SYCL_DEVICE_ONLY__
31 
33 
34 namespace sycl {
35 inline namespace _V1 {
36 namespace ext::intel::esimd {
37 
40 
41 using uchar = unsigned char;
42 using ushort = unsigned short;
43 using uint = unsigned int;
44 
47 struct saturation_on_tag : std::true_type {};
48 
50 struct saturation_off_tag : std::false_type {};
51 
53 static inline constexpr saturation_off_tag saturation_off{};
54 
56 static inline constexpr saturation_on_tag saturation_on{};
57 
59 enum class rgba_channel : uint8_t { R, G, B, A };
60 
64 using SurfaceIndex = unsigned int;
65 
67 enum class raw_send_eot : uint8_t {
68  not_eot = 0,
69  eot = 1,
70 };
71 
73 enum class raw_send_sendc : uint8_t {
74  not_sendc = 0,
75  sendc = 1,
76 };
77 
78 namespace detail {
79 
80 // Type used in internal functions to designate SLM access by
81 // providing dummy accessor of this type. Used to make it possible to delegate
82 // implemenations of SLM memory accesses to general surface-based memory
83 // accesses and thus reuse validity checks etc.
85 
86 template <typename T> struct is_saturation_tag {
87  static constexpr bool value =
88  std::is_same_v<T, __ESIMD_NS::saturation_on_tag> ||
89  std::is_same_v<T, __ESIMD_NS::saturation_off_tag>;
90 };
91 
92 template <class T>
94 
96 ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) {
97  return (n & (n - 1)) == 0;
98 }
99 
103 ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) {
104  return (n & (n - 1)) == 0 && n <= limit;
105 }
106 
107 template <rgba_channel Ch>
108 static inline constexpr uint8_t ch = 1 << static_cast<int>(Ch);
109 static inline constexpr uint8_t chR = ch<rgba_channel::R>;
110 static inline constexpr uint8_t chG = ch<rgba_channel::G>;
111 static inline constexpr uint8_t chB = ch<rgba_channel::B>;
112 static inline constexpr uint8_t chA = ch<rgba_channel::A>;
113 
114 // Shared Local Memory Binding Table Index (aka surface index).
115 static inline constexpr SurfaceIndex SLM_BTI = 254;
116 static inline constexpr SurfaceIndex INVALID_BTI =
117  static_cast<SurfaceIndex>(-1);
118 } // namespace detail
119 
122 enum class rgba_channel_mask : uint8_t {
123  R = detail::chR,
124  G = detail::chG,
126  B = detail::chB,
130  A = detail::chA,
138 };
139 
141  int Pos = static_cast<int>(Ch);
142  return (static_cast<int>(M) & (1 << Pos)) >> Pos;
143 }
144 
150 }
151 
160 enum class atomic_op : uint8_t {
162  add = 0x0,
164  sub = 0x1,
166  inc = 0x2,
168  dec = 0x3,
170  umin = 0x4,
172  umax = 0x5,
174  xchg = 0x6,
176  cmpxchg = 0x7,
178  bit_and = 0x8,
180  bit_or = 0x9,
182  bit_xor = 0xa,
184  smin = 0xb,
186  smax = 0xc,
188  fmax = 0x10,
190  fmin = 0x11,
193  fcmpxchg = 0x12,
194  fcmpwr = fcmpxchg,
196  fadd = 0x13, //
198  fsub = 0x14,
199  load = 0x15,
200  store = 0x16,
201 };
202 
203 #undef __ESIMD_USM_DWORD_TO_LSC_MSG
204 
206 
207 namespace detail {
208 template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() {
209  switch (Op) {
210  case __ESIMD_NS::atomic_op::xchg:
211  return false;
212  default:
213  return true;
214  }
215 }
216 
217 template <__ESIMD_NS::atomic_op Op>
219  switch (Op) {
222  case __ESIMD_NS::atomic_op::sub:
223  return __ESIMD_NS::native::lsc::atomic_op::sub;
224  case __ESIMD_NS::atomic_op::inc:
225  return __ESIMD_NS::native::lsc::atomic_op::inc;
232  case __ESIMD_NS::atomic_op::cmpxchg:
233  return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
240  case __ESIMD_NS::atomic_op::smin:
241  return __ESIMD_NS::native::lsc::atomic_op::smin;
242  case __ESIMD_NS::atomic_op::smax:
243  return __ESIMD_NS::native::lsc::atomic_op::smax;
248  case __ESIMD_NS::atomic_op::fcmpxchg:
249  return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
250  case __ESIMD_NS::atomic_op::fadd:
251  return __ESIMD_NS::native::lsc::atomic_op::fadd;
252  case __ESIMD_NS::atomic_op::fsub:
253  return __ESIMD_NS::native::lsc::atomic_op::fsub;
254  case __ESIMD_NS::atomic_op::load:
255  return __ESIMD_NS::native::lsc::atomic_op::load;
256  case __ESIMD_NS::atomic_op::store:
257  return __ESIMD_NS::native::lsc::atomic_op::store;
258  default:
259  static_assert(has_lsc_equivalent<Op>() && "Unsupported LSC atomic op");
260  }
261 }
262 
263 template <__ESIMD_NS::native::lsc::atomic_op Op>
265  switch (Op) {
268  case __ESIMD_NS::native::lsc::atomic_op::sub:
269  return __ESIMD_NS::atomic_op::sub;
270  case __ESIMD_NS::native::lsc::atomic_op::inc:
271  return __ESIMD_NS::atomic_op::inc;
278  case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
279  return __ESIMD_NS::atomic_op::cmpxchg;
286  case __ESIMD_NS::native::lsc::atomic_op::smin:
287  return __ESIMD_NS::atomic_op::smin;
288  case __ESIMD_NS::native::lsc::atomic_op::smax:
289  return __ESIMD_NS::atomic_op::smax;
294  case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
295  return __ESIMD_NS::atomic_op::fcmpxchg;
296  case __ESIMD_NS::native::lsc::atomic_op::fadd:
297  return __ESIMD_NS::atomic_op::fadd;
298  case __ESIMD_NS::native::lsc::atomic_op::fsub:
299  return __ESIMD_NS::atomic_op::fsub;
300  case __ESIMD_NS::native::lsc::atomic_op::load:
301  return __ESIMD_NS::atomic_op::load;
302  case __ESIMD_NS::native::lsc::atomic_op::store:
303  return __ESIMD_NS::atomic_op::store;
304  }
305 }
306 
307 template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() {
308  switch (Op) {
309  case __ESIMD_NS::atomic_op::inc:
311  case __ESIMD_NS::atomic_op::load:
312  return 0;
313  case __ESIMD_NS::atomic_op::xchg:
314  case __ESIMD_NS::atomic_op::store:
316  case __ESIMD_NS::atomic_op::sub:
317  case __ESIMD_NS::atomic_op::smin:
318  case __ESIMD_NS::atomic_op::smax:
321  case __ESIMD_NS::atomic_op::fadd:
322  case __ESIMD_NS::atomic_op::fsub:
328  return 1;
329  case __ESIMD_NS::atomic_op::cmpxchg:
330  case __ESIMD_NS::atomic_op::fcmpxchg:
331  return 2;
332  default:
333  return -1; // error
334  }
335 }
336 
337 template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() {
338  return get_num_args<to_atomic_op<Op>()>();
339 }
340 
341 } // namespace detail
342 
345 enum class fence_scope : uint8_t {
348  group = 0,
349 
352  local = 1,
353 
356  tile = 2,
357 
360  gpu = 3,
361 
364  gpus = 4,
365 
368  system = 5,
369 
374  system_acquire = 6
375 };
376 
379 enum class fence_flush_op : uint8_t {
380  none = 0,
381  evict = 1,
382  invalidate = 2,
383 
384  // enum with the value 3 is reserved;
385 
386  clean = 4
388 };
389 
392 enum class memory_kind : uint8_t {
393  global = 0,
394  // enum with the value 1 is reserved;
395  image = 2,
396  local = 3,
397 };
398 
399 namespace detail {
400 
402 enum class lsc_data_size : uint8_t {
403  default_size = 0,
404  u8 = 1,
405  u16 = 2,
406  u32 = 3,
407  u64 = 4,
408  u8u32 = 5,
409  u16u32 = 6,
410  u16u32h = 7,
411 };
412 
413 template <typename T, lsc_data_size DS> constexpr void check_lsc_data_size() {
414  static_assert(DS != lsc_data_size::default_size || sizeof(T) == 1 ||
415  sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8,
416  "Unsupported data type");
417  static_assert(
419  (sizeof(T) == 1 &&
420  (DS == lsc_data_size::u8 || DS == lsc_data_size::u8u32)) ||
421  (sizeof(T) == 2 &&
422  (DS == lsc_data_size::u16 || DS == lsc_data_size::u16u32 ||
423  DS == lsc_data_size::u16u32h)) ||
424  (sizeof(T) == 4 &&
425  (DS == lsc_data_size::u32 || DS == lsc_data_size::u8u32 ||
427  (sizeof(T) == 8 && DS == lsc_data_size::u64),
428  "Data type does not match data size");
429 }
430 
431 template <typename T, lsc_data_size DS>
433  check_lsc_data_size<T, DS>();
434  if (DS != lsc_data_size::default_size)
435  return DS;
436  else if (sizeof(T) == 1)
437  return lsc_data_size::u8;
438  else if (sizeof(T) == 2)
439  return lsc_data_size::u16;
440  else if (sizeof(T) == 4)
441  return lsc_data_size::u32;
442  else if (sizeof(T) == 8)
443  return lsc_data_size::u64;
444  else
445  return DS;
446 }
447 
448 enum class lsc_vector_size : uint8_t {
449  n1 = 1,
450  n2 = 2,
451  n3 = 3,
452  n4 = 4,
453  n8 = 5,
454  n16 = 6,
455  n32 = 7,
456  n64 = 8,
457 };
458 
459 template <int VS> constexpr void check_lsc_vector_size() {
460  static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
461  VS == 16 || VS == 32 || VS == 64,
462  "Unsupported vector size");
463 }
464 
465 template <lsc_vector_size VS> constexpr void check_lsc_vector_size() {
466  static_assert(VS == lsc_vector_size::n1 || VS == lsc_vector_size::n2 ||
467  VS == lsc_vector_size::n3 || VS == lsc_vector_size::n4 ||
468  VS == lsc_vector_size::n8 || VS == lsc_vector_size::n16 ||
470  "Unsupported vector size");
471 }
472 
473 template <lsc_vector_size VS> constexpr uint8_t to_int() {
474  check_lsc_vector_size<VS>();
475  switch (VS) {
476  case lsc_vector_size::n1:
477  return 1;
478  case lsc_vector_size::n2:
479  return 2;
480  case lsc_vector_size::n3:
481  return 3;
482  case lsc_vector_size::n4:
483  return 4;
484  case lsc_vector_size::n8:
485  return 8;
487  return 16;
489  return 32;
491  return 64;
492  default:
493  return 1;
494  }
495 }
496 
497 template <int VS> constexpr lsc_vector_size to_lsc_vector_size() {
498  check_lsc_vector_size<VS>();
499  switch (VS) {
500  case 1:
501  return lsc_vector_size::n1;
502  case 2:
503  return lsc_vector_size::n2;
504  case 3:
505  return lsc_vector_size::n3;
506  case 4:
507  return lsc_vector_size::n4;
508  case 8:
509  return lsc_vector_size::n8;
510  case 16:
511  return lsc_vector_size::n16;
512  case 32:
513  return lsc_vector_size::n32;
514  case 64:
515  return lsc_vector_size::n64;
516  default:
517  return lsc_vector_size::n1;
518  }
519 }
520 
521 enum class lsc_data_order : uint8_t {
522  nontranspose = 1,
523  transpose = 2,
524 };
525 
526 template <cache_hint Hint> class cache_hint_wrap {
527  template <cache_hint...> struct is_one_of_t;
528  template <cache_hint Last>
529  struct is_one_of_t<Last>
530  : std::conditional_t<Last == Hint, std::true_type, std::false_type> {};
531  template <cache_hint Head, cache_hint... Tail>
532  struct is_one_of_t<Head, Tail...>
533  : std::conditional_t<Head == Hint, std::true_type, is_one_of_t<Tail...>> {
534  };
535 
536 public:
537  constexpr operator cache_hint() const { return Hint; }
538  template <cache_hint... Hints> constexpr bool is_one_of() const {
539  return is_one_of_t<Hints...>::value;
540  }
541 };
542 
543 template <cache_hint Val>
544 constexpr bool are_all(cache_hint First, cache_hint Second) {
545  return First == Val && Second == Val;
546 }
547 
549 
550 template <typename PropertyListT> constexpr bool has_cache_hints() {
551  constexpr cache_hint L1H =
552  getPropertyValue<PropertyListT, cache_hint_L1_key>(cache_hint::none);
553  constexpr cache_hint L2H =
554  getPropertyValue<PropertyListT, cache_hint_L2_key>(cache_hint::none);
555  return L1H != cache_hint::none || L2H != cache_hint::none;
556 }
557 
558 // Verifies cache-hint properties from 'PropertyListT`. The parameter 'Action'
559 // specifies the usage context.
560 template <cache_action Action, typename PropertyListT>
562  constexpr auto L1H =
564  cache_hint::none)>{};
565  constexpr auto L2H =
567  cache_hint::none)>{};
568  if constexpr (Action == cache_action::prefetch) {
569  static_assert(
570  L1H.template is_one_of<cache_hint::cached, cache_hint::uncached,
572  L2H.template is_one_of<cache_hint::cached,
574  !are_all<cache_hint::uncached>(L1H, L2H),
575  "unsupported cache hint");
576  } else if constexpr (Action == cache_action::load) {
577  static_assert(
578  are_all<cache_hint::none>(L1H, L2H) ||
579  (L1H.template is_one_of<cache_hint::uncached, cache_hint::cached,
581  L2H.template is_one_of<cache_hint::uncached,
582  cache_hint::cached>()) ||
584  "unsupported cache hint");
585  } else if constexpr (Action == cache_action::store) {
586  static_assert(are_all<cache_hint::none>(L1H, L2H) ||
587  are_all<cache_hint::write_back>(L1H, L2H) ||
588  (L1H.template is_one_of<cache_hint::uncached,
591  L2H.template is_one_of<cache_hint::uncached,
593  "unsupported cache hint");
594  } else if constexpr (Action == cache_action::atomic) {
595  static_assert(are_all<cache_hint::none>(L1H, L2H) ||
596  (L1H == cache_hint::uncached &&
597  L2H.template is_one_of<cache_hint::uncached,
599  "unsupported cache hint");
600  }
601 }
602 
604  if (DS == lsc_data_size::u8)
605  return lsc_data_size::u8u32;
606  if (DS == lsc_data_size::u16)
607  return lsc_data_size::u16u32;
608  return DS;
609 }
610 
611 template <typename T> struct lsc_expand_type {
612  using type = std::conditional_t<
613  sizeof(T) <= 4,
614  std::conditional_t<std::is_signed_v<T>, int32_t, uint32_t>,
615  std::conditional_t<std::is_signed_v<T>, int64_t, uint64_t>>;
616 };
617 
618 } // namespace detail
619 
620 } // namespace ext::intel::esimd
621 } // namespace _V1
622 } // namespace sycl
Defines a shared image data.
Definition: image.hpp:449
constexpr int is_channel_enabled(rgba_channel_mask M, rgba_channel Ch)
Definition: common.hpp:140
rgba_channel
Represents a pixel's channel.
Definition: common.hpp:59
raw_send_eot
Specify if end of thread should be set.
Definition: common.hpp:67
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
Definition: common.hpp:122
raw_send_sendc
Specify if sendc should be used.
Definition: common.hpp:73
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:64
static constexpr saturation_off_tag saturation_off
Type tag object representing "saturation off" behavior.
Definition: common.hpp:53
constexpr int get_num_channels_enabled(rgba_channel_mask M)
Definition: common.hpp:145
unsigned short ushort
Definition: common.hpp:42
static constexpr saturation_on_tag saturation_on
Type tag object representing "saturation on" behavior.
Definition: common.hpp:56
atomic_op
Represents an atomic operation.
Definition: common.hpp:160
@ umin
Minimum: *addr = min(*addr, src0).
@ fsub
ACM/PVC: Subtraction (floating point): *addr = *addr - src0.
@ dec
Decrement: *addr = *addr - 1.
@ add
Addition: *addr = *addr + src0.
@ smin
Minimum (signed integer): *addr = min(*addr, src0).
@ cmpxchg
Compare and exchange. if (*addr == src0) *sddr = src1;
@ fmax
ACM/PVC: Minimum (floating point): *addr = min(*addr, src0).
@ fadd
ACM/PVC: Addition (floating point): *addr = *addr + src0.
@ umax
Maximum: *addr = max(*addr, src0).
@ sub
Subtraction: *addr = *addr - src0.
@ xchg
Exchange. *addr == src0;
@ fmin
ACM/PVC: Maximum (floating point): *addr = max(*addr, src0).
@ smax
Maximum (signed integer): *addr = max(*addr, src0).
@ inc
Increment: *addr = *addr + 1.
@ fcmpxchg
ACM/PVC: Compare and exchange (floating point).
atomic_op
LSC atomic operation codes.
Definition: common.hpp:39
void add(const void *DeviceGlobalPtr, const char *UniqueId)
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
constexpr sycl::ext::intel::esimd::atomic_op to_atomic_op()
Definition: common.hpp:264
constexpr bool has_lsc_equivalent()
Definition: common.hpp:208
constexpr lsc_vector_size to_lsc_vector_size()
Definition: common.hpp:497
constexpr void check_lsc_data_size()
Definition: common.hpp:413
static constexpr SurfaceIndex SLM_BTI
Definition: common.hpp:115
constexpr sycl::ext::intel::esimd::native::lsc::atomic_op to_lsc_atomic_op()
Definition: common.hpp:218
static constexpr uint8_t chG
Definition: common.hpp:110
static constexpr uint8_t chR
Definition: common.hpp:109
constexpr lsc_data_size finalize_data_size()
Definition: common.hpp:432
static constexpr uint8_t chB
Definition: common.hpp:111
static constexpr uint8_t ch
Definition: common.hpp:108
static constexpr uint8_t chA
Definition: common.hpp:112
constexpr bool are_all(cache_hint First, cache_hint Second)
Definition: common.hpp:544
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:402
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
constexpr void check_lsc_vector_size()
Definition: common.hpp:459
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:96
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
Definition: common.hpp:603
static constexpr SurfaceIndex INVALID_BTI
Definition: common.hpp:116
cache_hint
L1, L2 or L3 cache hints.
@ read_invalidate
load: asserts that the cache line containing the data will not be read again until it’s overwritten,...
@ write_through
store: immediately write data to the subsequent furthest cache, marking the cache line in the current...
@ write_back
store: write data into cache level and mark the cache line as "dirty".
@ streaming
load: cache data to cache using the evict-first policy to minimize cache pollution caused by temporar...
@ uncached
load/store/atomic: do not cache data to cache;
fence_scope
The scope that fence() operation should apply to.
Definition: common.hpp:345
@ gpu
Wait until all previous memory transactions from this thread are observed in the local GPU.
@ tile
Wait until all previous memory transactions from this thread are observed in the local tile.
@ gpus
Wait until all previous memory transactions from this thread are observed across all GPUs in the syst...
@ system_acquire
Global memory data-port only: for GPUs that do not follow PCIe Write ordering for downstream writes t...
@ system
Global memory data-port only: wait until all previous memory transactions from this thread are observ...
@ local
Wait until all previous memory transactions from this thread are observed within the local sub-slice.
fence_flush_op
The cache flush operation to apply to caches after fence() is complete.
Definition: common.hpp:379
@ clean
R/W and RO: invalidate all clean lines;.
@ invalidate
R/W: evict dirty lines; R/W and RO: invalidate clean lines.
memory_kind
The target memory kind for fence() operation.
Definition: common.hpp:392
@ local
image (also known as typed global memory)
std::bit_or< T > bit_or
Definition: functional.hpp:22
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmax(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::bit_xor< T > bit_xor
Definition: functional.hpp:23
std::bit_and< T > bit_and
Definition: functional.hpp:24
std::bit_and< T > bit_and
Definition: functional.hpp:20
std::bit_xor< T > bit_xor
Definition: functional.hpp:22
std::bit_or< T > bit_or
Definition: functional.hpp:21
constexpr stream_manipulator dec
Definition: stream.hpp:785
Definition: access.hpp:18
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed_v< T >, int32_t, uint32_t >, std::conditional_t< std::is_signed_v< T >, int64_t, uint64_t > > type
Definition: common.hpp:615
This type tag represents "saturation off" behavior.
Definition: common.hpp:50
Gen hardware supports applying saturation to results of certain operations.
Definition: common.hpp:47