DPC++ Runtime
Runtime libraries for oneAPI DPC++
common.hpp
Go to the documentation of this file.
1 //==---------------- common.hpp - DPC++ Explicit SIMD API ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // definitions used in Explicit SIMD APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
13 #include <sycl/detail/defines.hpp>
14 #include <sycl/exception.hpp>
18 
19 #include <cstdint> // for uint* types
20 #include <type_traits>
21 
23 
24 #ifdef __SYCL_DEVICE_ONLY__
25 #define __ESIMD_UNSUPPORTED_ON_HOST
26 #else // __SYCL_DEVICE_ONLY__
27 #define __ESIMD_UNSUPPORTED_ON_HOST \
28  throw sycl::exception(sycl::errc::feature_not_supported, \
29  "This ESIMD feature is not supported on HOST")
30 #endif // __SYCL_DEVICE_ONLY__
31 
33 
34 namespace sycl {
35 inline namespace _V1 {
36 namespace ext::intel::esimd {
37 
40 
41 using uchar = unsigned char;
42 using ushort = unsigned short;
43 using uint = unsigned int;
44 
47 struct saturation_on_tag : std::true_type {};
48 
50 struct saturation_off_tag : std::false_type {};
51 
53 static inline constexpr saturation_off_tag saturation_off{};
54 
56 static inline constexpr saturation_on_tag saturation_on{};
57 
59 enum class rgba_channel : uint8_t { R, G, B, A };
60 
64 using SurfaceIndex = unsigned int;
65 
67 enum class raw_send_eot : uint8_t {
68  not_eot = 0,
69  eot = 1,
70 };
71 
73 enum class raw_send_sendc : uint8_t {
74  not_sendc = 0,
75  sendc = 1,
76 };
77 
78 namespace detail {
79 
80 // Type used in internal functions to designate SLM access by
81 // providing dummy accessor of this type. Used to make it possible to delegate
82 // implemenations of SLM memory accesses to general surface-based memory
83 // accesses and thus reuse validity checks etc.
85 
86 template <typename T> struct is_saturation_tag {
87  static constexpr bool value =
88  std::is_same_v<T, __ESIMD_NS::saturation_on_tag> ||
89  std::is_same_v<T, __ESIMD_NS::saturation_off_tag>;
90 };
91 
92 template <class T>
94 
96 ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n) {
97  return (n & (n - 1)) == 0;
98 }
99 
103 ESIMD_INLINE constexpr bool isPowerOf2(unsigned int n, unsigned int limit) {
104  return (n & (n - 1)) == 0 && n <= limit;
105 }
106 
107 template <rgba_channel Ch>
108 static inline constexpr uint8_t ch = 1 << static_cast<int>(Ch);
109 static inline constexpr uint8_t chR = ch<rgba_channel::R>;
110 static inline constexpr uint8_t chG = ch<rgba_channel::G>;
111 static inline constexpr uint8_t chB = ch<rgba_channel::B>;
112 static inline constexpr uint8_t chA = ch<rgba_channel::A>;
113 
114 // Shared Local Memory Binding Table Index (aka surface index).
115 static inline constexpr SurfaceIndex SLM_BTI = 254;
116 static inline constexpr SurfaceIndex INVALID_BTI =
117  static_cast<SurfaceIndex>(-1);
118 } // namespace detail
119 
122 enum class rgba_channel_mask : uint8_t {
123  R = detail::chR,
124  G = detail::chG,
126  B = detail::chB,
130  A = detail::chA,
138 };
139 
141  int Pos = static_cast<int>(Ch);
142  return (static_cast<int>(M) & (1 << Pos)) >> Pos;
143 }
144 
150 }
151 
160 enum class atomic_op : uint8_t {
162  add = 0x0,
164  sub = 0x1,
166  inc = 0x2,
168  dec = 0x3,
170  umin = 0x4,
172  umax = 0x5,
174  xchg = 0x6,
176  cmpxchg = 0x7,
178  bit_and = 0x8,
180  bit_or = 0x9,
182  bit_xor = 0xa,
184  smin = 0xb,
186  smax = 0xc,
188  fmax = 0x10,
190  fmin = 0x11,
193  fcmpxchg = 0x12,
194  fcmpwr = fcmpxchg,
196  fadd = 0x13, //
198  fsub = 0x14,
199  load = 0x15,
200  store = 0x16,
203  predec = 0xff,
204 };
205 
206 #undef __ESIMD_USM_DWORD_TO_LSC_MSG
207 
209 
210 namespace detail {
211 template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent() {
212  switch (Op) {
213  case __ESIMD_NS::atomic_op::xchg:
214  case __ESIMD_NS::atomic_op::predec:
215  return false;
216  default:
217  return true;
218  }
219 }
220 
221 template <__ESIMD_NS::atomic_op Op>
223  switch (Op) {
226  case __ESIMD_NS::atomic_op::sub:
227  return __ESIMD_NS::native::lsc::atomic_op::sub;
228  case __ESIMD_NS::atomic_op::inc:
229  return __ESIMD_NS::native::lsc::atomic_op::inc;
232  case __ESIMD_NS::atomic_op::umin:
233  return __ESIMD_NS::native::lsc::atomic_op::umin;
234  case __ESIMD_NS::atomic_op::umax:
235  return __ESIMD_NS::native::lsc::atomic_op::umax;
236  case __ESIMD_NS::atomic_op::cmpxchg:
237  return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
244  case __ESIMD_NS::atomic_op::smin:
245  return __ESIMD_NS::native::lsc::atomic_op::smin;
246  case __ESIMD_NS::atomic_op::smax:
247  return __ESIMD_NS::native::lsc::atomic_op::smax;
252  case __ESIMD_NS::atomic_op::fcmpxchg:
253  return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
254  case __ESIMD_NS::atomic_op::fadd:
255  return __ESIMD_NS::native::lsc::atomic_op::fadd;
256  case __ESIMD_NS::atomic_op::fsub:
257  return __ESIMD_NS::native::lsc::atomic_op::fsub;
258  case __ESIMD_NS::atomic_op::load:
259  return __ESIMD_NS::native::lsc::atomic_op::load;
260  case __ESIMD_NS::atomic_op::store:
261  return __ESIMD_NS::native::lsc::atomic_op::store;
262  default:
263  static_assert(has_lsc_equivalent<Op>() && "Unsupported LSC atomic op");
264  }
265 }
266 
267 template <__ESIMD_NS::native::lsc::atomic_op Op>
269  switch (Op) {
272  case __ESIMD_NS::native::lsc::atomic_op::sub:
273  return __ESIMD_NS::atomic_op::sub;
274  case __ESIMD_NS::native::lsc::atomic_op::inc:
275  return __ESIMD_NS::atomic_op::inc;
278  case __ESIMD_NS::native::lsc::atomic_op::umin:
279  return __ESIMD_NS::atomic_op::umin;
280  case __ESIMD_NS::native::lsc::atomic_op::umax:
281  return __ESIMD_NS::atomic_op::umax;
282  case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
283  return __ESIMD_NS::atomic_op::cmpxchg;
290  case __ESIMD_NS::native::lsc::atomic_op::smin:
291  return __ESIMD_NS::atomic_op::smin;
292  case __ESIMD_NS::native::lsc::atomic_op::smax:
293  return __ESIMD_NS::atomic_op::smax;
298  case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
299  return __ESIMD_NS::atomic_op::fcmpxchg;
300  case __ESIMD_NS::native::lsc::atomic_op::fadd:
301  return __ESIMD_NS::atomic_op::fadd;
302  case __ESIMD_NS::native::lsc::atomic_op::fsub:
303  return __ESIMD_NS::atomic_op::fsub;
304  case __ESIMD_NS::native::lsc::atomic_op::load:
305  return __ESIMD_NS::atomic_op::load;
306  case __ESIMD_NS::native::lsc::atomic_op::store:
307  return __ESIMD_NS::atomic_op::store;
308  }
309 }
310 
311 template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args() {
312  switch (Op) {
313  case __ESIMD_NS::atomic_op::inc:
315  case __ESIMD_NS::atomic_op::load:
316  return 0;
317  case __ESIMD_NS::atomic_op::xchg:
318  case __ESIMD_NS::atomic_op::predec:
319  case __ESIMD_NS::atomic_op::store:
321  case __ESIMD_NS::atomic_op::sub:
322  case __ESIMD_NS::atomic_op::smin:
323  case __ESIMD_NS::atomic_op::smax:
324  case __ESIMD_NS::atomic_op::umin:
325  case __ESIMD_NS::atomic_op::umax:
326  case __ESIMD_NS::atomic_op::fadd:
327  case __ESIMD_NS::atomic_op::fsub:
333  return 1;
334  case __ESIMD_NS::atomic_op::cmpxchg:
335  case __ESIMD_NS::atomic_op::fcmpxchg:
336  return 2;
337  default:
338  return -1; // error
339  }
340 }
341 
342 template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args() {
343  return get_num_args<to_atomic_op<Op>()>();
344 }
345 
346 } // namespace detail
347 
350 enum class fence_scope : uint8_t {
353  group = 0,
354 
357  local = 1,
358 
361  tile = 2,
362 
365  gpu = 3,
366 
369  gpus = 4,
370 
373  system = 5,
374 
379  system_acquire = 6
380 };
381 
384 enum class fence_flush_op : uint8_t {
385  none = 0,
386  evict = 1,
387  invalidate = 2,
388 
389  // enum with the value 3 is reserved;
390 
391  clean = 4
393 };
394 
397 enum class memory_kind : uint8_t {
398  global = 0,
399  // enum with the value 1 is reserved;
400  image = 2,
401  local = 3,
402 };
403 
404 namespace detail {
405 
407 enum class lsc_data_size : uint8_t {
408  default_size = 0,
409  u8 = 1,
410  u16 = 2,
411  u32 = 3,
412  u64 = 4,
413  u8u32 = 5,
414  u16u32 = 6,
415  u16u32h = 7,
416 };
417 
418 template <typename T, lsc_data_size DS> constexpr void check_lsc_data_size() {
419  static_assert(DS != lsc_data_size::default_size || sizeof(T) == 1 ||
420  sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8,
421  "Unsupported data type");
422  static_assert(
424  (sizeof(T) == 1 &&
425  (DS == lsc_data_size::u8 || DS == lsc_data_size::u8u32)) ||
426  (sizeof(T) == 2 &&
427  (DS == lsc_data_size::u16 || DS == lsc_data_size::u16u32 ||
428  DS == lsc_data_size::u16u32h)) ||
429  (sizeof(T) == 4 &&
430  (DS == lsc_data_size::u32 || DS == lsc_data_size::u8u32 ||
432  (sizeof(T) == 8 && DS == lsc_data_size::u64),
433  "Data type does not match data size");
434 }
435 
436 template <typename T, lsc_data_size DS>
438  check_lsc_data_size<T, DS>();
439  if (DS != lsc_data_size::default_size)
440  return DS;
441  else if (sizeof(T) == 1)
442  return lsc_data_size::u8;
443  else if (sizeof(T) == 2)
444  return lsc_data_size::u16;
445  else if (sizeof(T) == 4)
446  return lsc_data_size::u32;
447  else if (sizeof(T) == 8)
448  return lsc_data_size::u64;
449  else
450  return DS;
451 }
452 
453 enum class lsc_vector_size : uint8_t {
454  n1 = 1,
455  n2 = 2,
456  n3 = 3,
457  n4 = 4,
458  n8 = 5,
459  n16 = 6,
460  n32 = 7,
461  n64 = 8,
462 };
463 
464 template <int VS> constexpr void check_lsc_vector_size() {
465  static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
466  VS == 16 || VS == 32 || VS == 64,
467  "Unsupported vector size");
468 }
469 
470 template <lsc_vector_size VS> constexpr void check_lsc_vector_size() {
471  static_assert(VS == lsc_vector_size::n1 || VS == lsc_vector_size::n2 ||
472  VS == lsc_vector_size::n3 || VS == lsc_vector_size::n4 ||
473  VS == lsc_vector_size::n8 || VS == lsc_vector_size::n16 ||
475  "Unsupported vector size");
476 }
477 
478 template <lsc_vector_size VS> constexpr uint8_t to_int() {
479  check_lsc_vector_size<VS>();
480  switch (VS) {
481  case lsc_vector_size::n1:
482  return 1;
483  case lsc_vector_size::n2:
484  return 2;
485  case lsc_vector_size::n3:
486  return 3;
487  case lsc_vector_size::n4:
488  return 4;
489  case lsc_vector_size::n8:
490  return 8;
492  return 16;
494  return 32;
496  return 64;
497  default:
498  return 1;
499  }
500 }
501 
502 template <int VS> constexpr lsc_vector_size to_lsc_vector_size() {
503  check_lsc_vector_size<VS>();
504  switch (VS) {
505  case 1:
506  return lsc_vector_size::n1;
507  case 2:
508  return lsc_vector_size::n2;
509  case 3:
510  return lsc_vector_size::n3;
511  case 4:
512  return lsc_vector_size::n4;
513  case 8:
514  return lsc_vector_size::n8;
515  case 16:
516  return lsc_vector_size::n16;
517  case 32:
518  return lsc_vector_size::n32;
519  case 64:
520  return lsc_vector_size::n64;
521  default:
522  return lsc_vector_size::n1;
523  }
524 }
525 
526 enum class lsc_data_order : uint8_t {
527  nontranspose = 1,
528  transpose = 2,
529 };
530 
531 template <cache_hint Hint> class cache_hint_wrap {
532  template <cache_hint...> struct is_one_of_t;
533  template <cache_hint Last>
534  struct is_one_of_t<Last>
535  : std::conditional_t<Last == Hint, std::true_type, std::false_type> {};
536  template <cache_hint Head, cache_hint... Tail>
537  struct is_one_of_t<Head, Tail...>
538  : std::conditional_t<Head == Hint, std::true_type, is_one_of_t<Tail...>> {
539  };
540 
541 public:
542  constexpr operator cache_hint() const { return Hint; }
543  template <cache_hint... Hints> constexpr bool is_one_of() const {
544  return is_one_of_t<Hints...>::value;
545  }
546 };
547 
548 template <cache_hint Val>
549 constexpr bool are_all(cache_hint First, cache_hint Second) {
550  return First == Val && Second == Val;
551 }
552 
554 
555 template <typename PropertyListT> constexpr bool has_cache_hints() {
556  constexpr cache_hint L1H =
557  getPropertyValue<PropertyListT, cache_hint_L1_key>(cache_hint::none);
558  constexpr cache_hint L2H =
559  getPropertyValue<PropertyListT, cache_hint_L2_key>(cache_hint::none);
560  return L1H != cache_hint::none || L2H != cache_hint::none;
561 }
562 
563 // Verifies cache-hint properties from 'PropertyListT`. The parameter 'Action'
564 // specifies the usage context.
565 template <cache_action Action, typename PropertyListT>
567  constexpr auto L1H =
569  cache_hint::none)>{};
570  constexpr auto L2H =
572  cache_hint::none)>{};
573  if constexpr (Action == cache_action::prefetch) {
574  static_assert(
575  L1H.template is_one_of<cache_hint::cached, cache_hint::uncached,
577  L2H.template is_one_of<cache_hint::cached,
579  !are_all<cache_hint::uncached>(L1H, L2H),
580  "unsupported cache hint");
581  } else if constexpr (Action == cache_action::load) {
582  static_assert(
583  are_all<cache_hint::none>(L1H, L2H) ||
584  (L1H.template is_one_of<cache_hint::uncached, cache_hint::cached,
586  L2H.template is_one_of<cache_hint::uncached,
587  cache_hint::cached>()) ||
589  "unsupported cache hint");
590  } else if constexpr (Action == cache_action::store) {
591  static_assert(are_all<cache_hint::none>(L1H, L2H) ||
592  are_all<cache_hint::write_back>(L1H, L2H) ||
593  (L1H.template is_one_of<cache_hint::uncached,
596  L2H.template is_one_of<cache_hint::uncached,
598  "unsupported cache hint");
599  } else if constexpr (Action == cache_action::atomic) {
600  static_assert(are_all<cache_hint::none>(L1H, L2H) ||
601  (L1H == cache_hint::uncached &&
602  L2H.template is_one_of<cache_hint::uncached,
604  "unsupported cache hint");
605  }
606 }
607 
609  if (DS == lsc_data_size::u8)
610  return lsc_data_size::u8u32;
611  if (DS == lsc_data_size::u16)
612  return lsc_data_size::u16u32;
613  return DS;
614 }
615 
616 template <typename T> struct lsc_expand_type {
617  using type = std::conditional_t<
618  sizeof(T) <= 4,
619  std::conditional_t<std::is_signed_v<T>, int32_t, uint32_t>,
620  std::conditional_t<std::is_signed_v<T>, int64_t, uint64_t>>;
621 };
622 
623 } // namespace detail
624 
625 } // namespace ext::intel::esimd
626 } // namespace _V1
627 } // namespace sycl
Defines a shared image data.
Definition: image.hpp:443
constexpr int is_channel_enabled(rgba_channel_mask M, rgba_channel Ch)
Definition: common.hpp:140
rgba_channel
Represents a pixel's channel.
Definition: common.hpp:59
raw_send_eot
Specify if end of thread should be set.
Definition: common.hpp:67
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
Definition: common.hpp:122
raw_send_sendc
Specify if sendc should be used.
Definition: common.hpp:73
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:64
static constexpr saturation_off_tag saturation_off
Type tag object representing "saturation off" behavior.
Definition: common.hpp:53
constexpr int get_num_channels_enabled(rgba_channel_mask M)
Definition: common.hpp:145
unsigned short ushort
Definition: common.hpp:42
static constexpr saturation_on_tag saturation_on
Type tag object representing "saturation on" behavior.
Definition: common.hpp:56
atomic_op
Represents an atomic operation.
Definition: common.hpp:160
@ umin
Minimum: *addr = min(*addr, src0).
@ fsub
ACM/PVC: Subtraction (floating point): *addr = *addr - src0.
@ dec
Decrement: *addr = *addr - 1.
@ add
Addition: *addr = *addr + src0.
@ smin
Minimum (signed integer): *addr = min(*addr, src0).
@ cmpxchg
Compare and exchange. if (*addr == src0) *sddr = src1;
@ fmax
ACM/PVC: Minimum (floating point): *addr = min(*addr, src0).
@ fadd
ACM/PVC: Addition (floating point): *addr = *addr + src0.
@ umax
Maximum: *addr = max(*addr, src0).
@ sub
Subtraction: *addr = *addr - src0.
@ xchg
Exchange. *addr == src0;
@ fmin
ACM/PVC: Maximum (floating point): *addr = max(*addr, src0).
@ predec
Decrement: *addr = *addr - 1.
@ smax
Maximum (signed integer): *addr = max(*addr, src0).
@ inc
Increment: *addr = *addr + 1.
@ fcmpxchg
ACM/PVC: Compare and exchange (floating point).
atomic_op
LSC atomic operation codes.
Definition: common.hpp:39
void add(const void *DeviceGlobalPtr, const char *UniqueId)
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
constexpr sycl::ext::intel::esimd::atomic_op to_atomic_op()
Definition: common.hpp:268
constexpr bool has_lsc_equivalent()
Definition: common.hpp:211
constexpr lsc_vector_size to_lsc_vector_size()
Definition: common.hpp:502
constexpr void check_lsc_data_size()
Definition: common.hpp:418
static constexpr SurfaceIndex SLM_BTI
Definition: common.hpp:115
constexpr sycl::ext::intel::esimd::native::lsc::atomic_op to_lsc_atomic_op()
Definition: common.hpp:222
static constexpr uint8_t chG
Definition: common.hpp:110
static constexpr uint8_t chR
Definition: common.hpp:109
constexpr lsc_data_size finalize_data_size()
Definition: common.hpp:437
static constexpr uint8_t chB
Definition: common.hpp:111
static constexpr uint8_t ch
Definition: common.hpp:108
static constexpr uint8_t chA
Definition: common.hpp:112
constexpr bool are_all(cache_hint First, cache_hint Second)
Definition: common.hpp:549
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:407
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
constexpr void check_lsc_vector_size()
Definition: common.hpp:464
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:96
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
Definition: common.hpp:608
static constexpr SurfaceIndex INVALID_BTI
Definition: common.hpp:116
cache_hint
L1, L2 or L3 cache hints.
@ read_invalidate
load: asserts that the cache line containing the data will not be read again until it’s overwritten,...
@ write_through
store: immediately write data to the subsequent furthest cache, marking the cache line in the current...
@ write_back
store: write data into cache level and mark the cache line as "dirty".
@ streaming
load: cache data to cache using the evict-first policy to minimize cache pollution caused by temporar...
@ uncached
load/store/atomic: do not cache data to cache;
fence_scope
The scope that fence() operation should apply to.
Definition: common.hpp:350
@ gpu
Wait until all previous memory transactions from this thread are observed in the local GPU.
@ tile
Wait until all previous memory transactions from this thread are observed in the local tile.
@ gpus
Wait until all previous memory transactions from this thread are observed across all GPUs in the syst...
@ system_acquire
Global memory data-port only: for GPUs that do not follow PCIe Write ordering for downstream writes t...
@ system
Global memory data-port only: wait until all previous memory transactions from this thread are observ...
@ local
Wait until all previous memory transactions from this thread are observed within the local sub-slice.
fence_flush_op
The cache flush operation to apply to caches after fence() is complete.
Definition: common.hpp:384
@ clean
R/W and RO: invalidate all clean lines;.
@ invalidate
R/W: evict dirty lines; R/W and RO: invalidate clean lines.
memory_kind
The target memory kind for fence() operation.
Definition: common.hpp:397
@ local
image (also known as typed global memory)
std::bit_or< T > bit_or
Definition: functional.hpp:22
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmax(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::bit_xor< T > bit_xor
Definition: functional.hpp:23
std::bit_and< T > bit_and
Definition: functional.hpp:24
std::bit_and< T > bit_and
Definition: functional.hpp:20
std::bit_xor< T > bit_xor
Definition: functional.hpp:22
std::bit_or< T > bit_or
Definition: functional.hpp:21
constexpr stream_manipulator dec
Definition: stream.hpp:784
Definition: access.hpp:18
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed_v< T >, int32_t, uint32_t >, std::conditional_t< std::is_signed_v< T >, int64_t, uint64_t > > type
Definition: common.hpp:620
This type tag represents "saturation off" behavior.
Definition: common.hpp:50
Gen hardware supports applying saturation to results of certain operations.
Definition: common.hpp:47