20 #include <type_traits>
24 #ifdef __SYCL_DEVICE_ONLY__
25 #define __ESIMD_UNSUPPORTED_ON_HOST
27 #define __ESIMD_UNSUPPORTED_ON_HOST \
28 throw sycl::exception(sycl::errc::feature_not_supported, \
29 "This ESIMD feature is not supported on HOST")
35 inline namespace _V1 {
36 namespace ext::intel::esimd {
88 std::is_same_v<T, __ESIMD_NS::saturation_on_tag> ||
89 std::is_same_v<T, __ESIMD_NS::saturation_off_tag>;
97 return (n & (n - 1)) == 0;
103 ESIMD_INLINE constexpr
bool isPowerOf2(
unsigned int n,
unsigned int limit) {
104 return (n & (n - 1)) == 0 && n <= limit;
107 template <rgba_channel Ch>
108 static inline constexpr uint8_t
ch = 1 <<
static_cast<int>(Ch);
109 static inline constexpr uint8_t
chR = ch<rgba_channel::R>;
110 static inline constexpr uint8_t
chG = ch<rgba_channel::G>;
111 static inline constexpr uint8_t
chB = ch<rgba_channel::B>;
112 static inline constexpr uint8_t
chA = ch<rgba_channel::A>;
141 int Pos =
static_cast<int>(Ch);
142 return (
static_cast<int>(M) & (1 << Pos)) >> Pos;
203 #undef __ESIMD_USM_DWORD_TO_LSC_MSG
210 case __ESIMD_NS::atomic_op::xchg:
217 template <__ESIMD_NS::atomic_op Op>
222 case __ESIMD_NS::atomic_op::sub:
223 return __ESIMD_NS::native::lsc::atomic_op::sub;
224 case __ESIMD_NS::atomic_op::inc:
225 return __ESIMD_NS::native::lsc::atomic_op::inc;
232 case __ESIMD_NS::atomic_op::cmpxchg:
233 return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
240 case __ESIMD_NS::atomic_op::smin:
241 return __ESIMD_NS::native::lsc::atomic_op::smin;
242 case __ESIMD_NS::atomic_op::smax:
243 return __ESIMD_NS::native::lsc::atomic_op::smax;
248 case __ESIMD_NS::atomic_op::fcmpxchg:
249 return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
250 case __ESIMD_NS::atomic_op::fadd:
251 return __ESIMD_NS::native::lsc::atomic_op::fadd;
252 case __ESIMD_NS::atomic_op::fsub:
253 return __ESIMD_NS::native::lsc::atomic_op::fsub;
254 case __ESIMD_NS::atomic_op::load:
255 return __ESIMD_NS::native::lsc::atomic_op::load;
256 case __ESIMD_NS::atomic_op::store:
257 return __ESIMD_NS::native::lsc::atomic_op::store;
259 static_assert(has_lsc_equivalent<Op>() &&
"Unsupported LSC atomic op");
263 template <__ESIMD_NS::native::lsc::atomic_op Op>
268 case __ESIMD_NS::native::lsc::atomic_op::sub:
269 return __ESIMD_NS::atomic_op::sub;
270 case __ESIMD_NS::native::lsc::atomic_op::inc:
271 return __ESIMD_NS::atomic_op::inc;
278 case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
279 return __ESIMD_NS::atomic_op::cmpxchg;
286 case __ESIMD_NS::native::lsc::atomic_op::smin:
287 return __ESIMD_NS::atomic_op::smin;
288 case __ESIMD_NS::native::lsc::atomic_op::smax:
289 return __ESIMD_NS::atomic_op::smax;
294 case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
295 return __ESIMD_NS::atomic_op::fcmpxchg;
296 case __ESIMD_NS::native::lsc::atomic_op::fadd:
297 return __ESIMD_NS::atomic_op::fadd;
298 case __ESIMD_NS::native::lsc::atomic_op::fsub:
299 return __ESIMD_NS::atomic_op::fsub;
300 case __ESIMD_NS::native::lsc::atomic_op::load:
301 return __ESIMD_NS::atomic_op::load;
302 case __ESIMD_NS::native::lsc::atomic_op::store:
303 return __ESIMD_NS::atomic_op::store;
309 case __ESIMD_NS::atomic_op::inc:
311 case __ESIMD_NS::atomic_op::load:
313 case __ESIMD_NS::atomic_op::xchg:
314 case __ESIMD_NS::atomic_op::store:
316 case __ESIMD_NS::atomic_op::sub:
317 case __ESIMD_NS::atomic_op::smin:
318 case __ESIMD_NS::atomic_op::smax:
321 case __ESIMD_NS::atomic_op::fadd:
322 case __ESIMD_NS::atomic_op::fsub:
329 case __ESIMD_NS::atomic_op::cmpxchg:
330 case __ESIMD_NS::atomic_op::fcmpxchg:
337 template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr
int get_num_args() {
338 return get_num_args<to_atomic_op<Op>()>();
415 sizeof(T) == 2 ||
sizeof(T) == 4 ||
sizeof(T) == 8,
416 "Unsupported data type");
428 "Data type does not match data size");
431 template <
typename T, lsc_data_size DS>
433 check_lsc_data_size<T, DS>();
436 else if (
sizeof(T) == 1)
438 else if (
sizeof(T) == 2)
440 else if (
sizeof(T) == 4)
442 else if (
sizeof(T) == 8)
460 static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
461 VS == 16 || VS == 32 || VS == 64,
462 "Unsupported vector size");
470 "Unsupported vector size");
473 template <lsc_vector_size VS> constexpr uint8_t
to_int() {
474 check_lsc_vector_size<VS>();
498 check_lsc_vector_size<VS>();
528 template <cache_h
int Last>
529 struct is_one_of_t<Last>
530 : std::conditional_t<Last == Hint, std::true_type, std::false_type> {};
532 struct is_one_of_t<Head, Tail...>
533 : std::conditional_t<Head == Hint, std::true_type, is_one_of_t<Tail...>> {
539 return is_one_of_t<Hints...>::value;
543 template <cache_h
int Val>
545 return First == Val && Second == Val;
560 template <cache_action Action,
typename PropertyListT>
574 !are_all<cache_hint::uncached>(L1H, L2H),
575 "unsupported cache hint");
578 are_all<cache_hint::none>(L1H, L2H) ||
584 "unsupported cache hint");
586 static_assert(are_all<cache_hint::none>(L1H, L2H) ||
587 are_all<cache_hint::write_back>(L1H, L2H) ||
593 "unsupported cache hint");
595 static_assert(are_all<cache_hint::none>(L1H, L2H) ||
599 "unsupported cache hint");
612 using type = std::conditional_t<
614 std::conditional_t<std::is_signed_v<T>, int32_t, uint32_t>,
615 std::conditional_t<std::is_signed_v<T>,
int64_t, uint64_t>>;
constexpr bool is_one_of() const
Defines a shared image data.
constexpr int is_channel_enabled(rgba_channel_mask M, rgba_channel Ch)
rgba_channel
Represents a pixel's channel.
raw_send_eot
Specify if end of thread should be set.
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
raw_send_sendc
Specify if sendc should be used.
unsigned int SurfaceIndex
Surface index type.
static constexpr saturation_off_tag saturation_off
Type tag object representing "saturation off" behavior.
constexpr int get_num_channels_enabled(rgba_channel_mask M)
static constexpr saturation_on_tag saturation_on
Type tag object representing "saturation on" behavior.
atomic_op
Represents an atomic operation.
@ umin
Minimum: *addr = min(*addr, src0).
@ fsub
ACM/PVC: Subtraction (floating point): *addr = *addr - src0.
@ dec
Decrement: *addr = *addr - 1.
@ add
Addition: *addr = *addr + src0.
@ smin
Minimum (signed integer): *addr = min(*addr, src0).
@ cmpxchg
Compare and exchange. if (*addr == src0) *sddr = src1;
@ fmax
ACM/PVC: Minimum (floating point): *addr = min(*addr, src0).
@ fadd
ACM/PVC: Addition (floating point): *addr = *addr + src0.
@ umax
Maximum: *addr = max(*addr, src0).
@ sub
Subtraction: *addr = *addr - src0.
@ xchg
Exchange. *addr == src0;
@ fmin
ACM/PVC: Maximum (floating point): *addr = max(*addr, src0).
@ smax
Maximum (signed integer): *addr = max(*addr, src0).
@ inc
Increment: *addr = *addr + 1.
@ fcmpxchg
ACM/PVC: Compare and exchange (floating point).
atomic_op
LSC atomic operation codes.
void add(const void *DeviceGlobalPtr, const char *UniqueId)
conditional< sizeof(long)==8, long, long long >::type int64_t
constexpr sycl::ext::intel::esimd::atomic_op to_atomic_op()
constexpr bool has_lsc_equivalent()
constexpr lsc_vector_size to_lsc_vector_size()
constexpr void check_lsc_data_size()
static constexpr SurfaceIndex SLM_BTI
constexpr sycl::ext::intel::esimd::native::lsc::atomic_op to_lsc_atomic_op()
constexpr bool is_saturation_tag_v
static constexpr uint8_t chG
static constexpr uint8_t chR
constexpr uint8_t to_int()
constexpr lsc_data_size finalize_data_size()
static constexpr uint8_t chB
static constexpr uint8_t ch
static constexpr uint8_t chA
constexpr bool are_all(cache_hint First, cache_hint Second)
lsc_data_size
Data size or format to read or store.
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
constexpr void check_lsc_vector_size()
constexpr bool has_cache_hints()
constexpr int get_num_args()
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
static constexpr SurfaceIndex INVALID_BTI
cache_hint
L1, L2 or L3 cache hints.
@ read_invalidate
load: asserts that the cache line containing the data will not be read again until it’s overwritten,...
@ write_through
store: immediately write data to the subsequent furthest cache, marking the cache line in the current...
@ write_back
store: write data into cache level and mark the cache line as "dirty".
@ streaming
load: cache data to cache using the evict-first policy to minimize cache pollution caused by temporar...
@ uncached
load/store/atomic: do not cache data to cache;
fence_scope
The scope that fence() operation should apply to.
@ gpu
Wait until all previous memory transactions from this thread are observed in the local GPU.
@ tile
Wait until all previous memory transactions from this thread are observed in the local tile.
@ gpus
Wait until all previous memory transactions from this thread are observed across all GPUs in the syst...
@ system_acquire
Global memory data-port only: for GPUs that do not follow PCIe Write ordering for downstream writes t...
@ system
Global memory data-port only: wait until all previous memory transactions from this thread are observ...
@ local
Wait until all previous memory transactions from this thread are observed within the local sub-slice.
fence_flush_op
The cache flush operation to apply to caches after fence() is complete.
@ clean
R/W and RO: invalidate all clean lines;.
@ invalidate
R/W: evict dirty lines; R/W and RO: invalidate clean lines.
memory_kind
The target memory kind for fence() operation.
@ local
image (also known as typed global memory)
unsigned umin(Tp x, Tp y)
unsigned umax(Tp x, Tp y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmax(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::bit_xor< T > bit_xor
std::bit_and< T > bit_and
std::bit_and< T > bit_and
std::bit_xor< T > bit_xor
constexpr stream_manipulator dec
static constexpr bool value
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed_v< T >, int32_t, uint32_t >, std::conditional_t< std::is_signed_v< T >, int64_t, uint64_t > > type
This type tag represents "saturation off" behavior.
Gen hardware supports applying saturation to results of certain operations.