20 #include <type_traits>
24 #ifdef __SYCL_DEVICE_ONLY__
25 #define __ESIMD_UNSUPPORTED_ON_HOST
27 #define __ESIMD_UNSUPPORTED_ON_HOST \
28 throw sycl::exception(sycl::errc::feature_not_supported, \
29 "This ESIMD feature is not supported on HOST")
35 inline namespace _V1 {
36 namespace ext::intel::esimd {
88 std::is_same_v<T, __ESIMD_NS::saturation_on_tag> ||
89 std::is_same_v<T, __ESIMD_NS::saturation_off_tag>;
97 return (n & (n - 1)) == 0;
103 ESIMD_INLINE constexpr
bool isPowerOf2(
unsigned int n,
unsigned int limit) {
104 return (n & (n - 1)) == 0 && n <= limit;
107 template <rgba_channel Ch>
108 static inline constexpr uint8_t
ch = 1 <<
static_cast<int>(Ch);
109 static inline constexpr uint8_t
chR = ch<rgba_channel::R>;
110 static inline constexpr uint8_t
chG = ch<rgba_channel::G>;
111 static inline constexpr uint8_t
chB = ch<rgba_channel::B>;
112 static inline constexpr uint8_t
chA = ch<rgba_channel::A>;
141 int Pos =
static_cast<int>(Ch);
142 return (
static_cast<int>(M) & (1 << Pos)) >> Pos;
206 #undef __ESIMD_USM_DWORD_TO_LSC_MSG
213 case __ESIMD_NS::atomic_op::xchg:
214 case __ESIMD_NS::atomic_op::predec:
221 template <__ESIMD_NS::atomic_op Op>
226 case __ESIMD_NS::atomic_op::sub:
227 return __ESIMD_NS::native::lsc::atomic_op::sub;
228 case __ESIMD_NS::atomic_op::inc:
229 return __ESIMD_NS::native::lsc::atomic_op::inc;
232 case __ESIMD_NS::atomic_op::umin:
233 return __ESIMD_NS::native::lsc::atomic_op::umin;
234 case __ESIMD_NS::atomic_op::umax:
235 return __ESIMD_NS::native::lsc::atomic_op::umax;
236 case __ESIMD_NS::atomic_op::cmpxchg:
237 return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
244 case __ESIMD_NS::atomic_op::smin:
245 return __ESIMD_NS::native::lsc::atomic_op::smin;
246 case __ESIMD_NS::atomic_op::smax:
247 return __ESIMD_NS::native::lsc::atomic_op::smax;
252 case __ESIMD_NS::atomic_op::fcmpxchg:
253 return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
254 case __ESIMD_NS::atomic_op::fadd:
255 return __ESIMD_NS::native::lsc::atomic_op::fadd;
256 case __ESIMD_NS::atomic_op::fsub:
257 return __ESIMD_NS::native::lsc::atomic_op::fsub;
258 case __ESIMD_NS::atomic_op::load:
259 return __ESIMD_NS::native::lsc::atomic_op::load;
260 case __ESIMD_NS::atomic_op::store:
261 return __ESIMD_NS::native::lsc::atomic_op::store;
263 static_assert(has_lsc_equivalent<Op>() &&
"Unsupported LSC atomic op");
267 template <__ESIMD_NS::native::lsc::atomic_op Op>
272 case __ESIMD_NS::native::lsc::atomic_op::sub:
273 return __ESIMD_NS::atomic_op::sub;
274 case __ESIMD_NS::native::lsc::atomic_op::inc:
275 return __ESIMD_NS::atomic_op::inc;
278 case __ESIMD_NS::native::lsc::atomic_op::umin:
279 return __ESIMD_NS::atomic_op::umin;
280 case __ESIMD_NS::native::lsc::atomic_op::umax:
281 return __ESIMD_NS::atomic_op::umax;
282 case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
283 return __ESIMD_NS::atomic_op::cmpxchg;
290 case __ESIMD_NS::native::lsc::atomic_op::smin:
291 return __ESIMD_NS::atomic_op::smin;
292 case __ESIMD_NS::native::lsc::atomic_op::smax:
293 return __ESIMD_NS::atomic_op::smax;
298 case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
299 return __ESIMD_NS::atomic_op::fcmpxchg;
300 case __ESIMD_NS::native::lsc::atomic_op::fadd:
301 return __ESIMD_NS::atomic_op::fadd;
302 case __ESIMD_NS::native::lsc::atomic_op::fsub:
303 return __ESIMD_NS::atomic_op::fsub;
304 case __ESIMD_NS::native::lsc::atomic_op::load:
305 return __ESIMD_NS::atomic_op::load;
306 case __ESIMD_NS::native::lsc::atomic_op::store:
307 return __ESIMD_NS::atomic_op::store;
313 case __ESIMD_NS::atomic_op::inc:
315 case __ESIMD_NS::atomic_op::load:
317 case __ESIMD_NS::atomic_op::xchg:
318 case __ESIMD_NS::atomic_op::predec:
319 case __ESIMD_NS::atomic_op::store:
321 case __ESIMD_NS::atomic_op::sub:
322 case __ESIMD_NS::atomic_op::smin:
323 case __ESIMD_NS::atomic_op::smax:
324 case __ESIMD_NS::atomic_op::umin:
325 case __ESIMD_NS::atomic_op::umax:
326 case __ESIMD_NS::atomic_op::fadd:
327 case __ESIMD_NS::atomic_op::fsub:
334 case __ESIMD_NS::atomic_op::cmpxchg:
335 case __ESIMD_NS::atomic_op::fcmpxchg:
342 template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr
int get_num_args() {
343 return get_num_args<to_atomic_op<Op>()>();
420 sizeof(T) == 2 ||
sizeof(T) == 4 ||
sizeof(T) == 8,
421 "Unsupported data type");
433 "Data type does not match data size");
436 template <
typename T, lsc_data_size DS>
438 check_lsc_data_size<T, DS>();
441 else if (
sizeof(T) == 1)
443 else if (
sizeof(T) == 2)
445 else if (
sizeof(T) == 4)
447 else if (
sizeof(T) == 8)
465 static_assert(VS == 1 || VS == 2 || VS == 3 || VS == 4 || VS == 8 ||
466 VS == 16 || VS == 32 || VS == 64,
467 "Unsupported vector size");
475 "Unsupported vector size");
478 template <lsc_vector_size VS> constexpr uint8_t
to_int() {
479 check_lsc_vector_size<VS>();
503 check_lsc_vector_size<VS>();
533 template <cache_h
int Last>
534 struct is_one_of_t<Last>
535 : std::conditional_t<Last == Hint, std::true_type, std::false_type> {};
537 struct is_one_of_t<Head, Tail...>
538 : std::conditional_t<Head == Hint, std::true_type, is_one_of_t<Tail...>> {
544 return is_one_of_t<Hints...>::value;
548 template <cache_h
int Val>
550 return First == Val && Second == Val;
565 template <cache_action Action,
typename PropertyListT>
579 !are_all<cache_hint::uncached>(L1H, L2H),
580 "unsupported cache hint");
583 are_all<cache_hint::none>(L1H, L2H) ||
589 "unsupported cache hint");
591 static_assert(are_all<cache_hint::none>(L1H, L2H) ||
592 are_all<cache_hint::write_back>(L1H, L2H) ||
598 "unsupported cache hint");
600 static_assert(are_all<cache_hint::none>(L1H, L2H) ||
604 "unsupported cache hint");
617 using type = std::conditional_t<
619 std::conditional_t<std::is_signed_v<T>, int32_t, uint32_t>,
620 std::conditional_t<std::is_signed_v<T>,
int64_t, uint64_t>>;
constexpr bool is_one_of() const
Defines a shared image data.
constexpr int is_channel_enabled(rgba_channel_mask M, rgba_channel Ch)
rgba_channel
Represents a pixel's channel.
raw_send_eot
Specify if end of thread should be set.
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
raw_send_sendc
Specify if sendc should be used.
unsigned int SurfaceIndex
Surface index type.
static constexpr saturation_off_tag saturation_off
Type tag object representing "saturation off" behavior.
constexpr int get_num_channels_enabled(rgba_channel_mask M)
static constexpr saturation_on_tag saturation_on
Type tag object representing "saturation on" behavior.
atomic_op
Represents an atomic operation.
@ umin
Minimum: *addr = min(*addr, src0).
@ fsub
ACM/PVC: Subtraction (floating point): *addr = *addr - src0.
@ dec
Decrement: *addr = *addr - 1.
@ add
Addition: *addr = *addr + src0.
@ smin
Minimum (signed integer): *addr = min(*addr, src0).
@ cmpxchg
Compare and exchange. if (*addr == src0) *sddr = src1;
@ fmax
ACM/PVC: Minimum (floating point): *addr = min(*addr, src0).
@ fadd
ACM/PVC: Addition (floating point): *addr = *addr + src0.
@ umax
Maximum: *addr = max(*addr, src0).
@ sub
Subtraction: *addr = *addr - src0.
@ xchg
Exchange. *addr == src0;
@ fmin
ACM/PVC: Maximum (floating point): *addr = max(*addr, src0).
@ predec
Decrement: *addr = *addr - 1.
@ smax
Maximum (signed integer): *addr = max(*addr, src0).
@ inc
Increment: *addr = *addr + 1.
@ fcmpxchg
ACM/PVC: Compare and exchange (floating point).
atomic_op
LSC atomic operation codes.
void add(const void *DeviceGlobalPtr, const char *UniqueId)
conditional< sizeof(long)==8, long, long long >::type int64_t
constexpr sycl::ext::intel::esimd::atomic_op to_atomic_op()
constexpr bool has_lsc_equivalent()
constexpr lsc_vector_size to_lsc_vector_size()
constexpr void check_lsc_data_size()
static constexpr SurfaceIndex SLM_BTI
constexpr sycl::ext::intel::esimd::native::lsc::atomic_op to_lsc_atomic_op()
constexpr bool is_saturation_tag_v
static constexpr uint8_t chG
static constexpr uint8_t chR
constexpr uint8_t to_int()
constexpr lsc_data_size finalize_data_size()
static constexpr uint8_t chB
static constexpr uint8_t ch
static constexpr uint8_t chA
constexpr bool are_all(cache_hint First, cache_hint Second)
lsc_data_size
Data size or format to read or store.
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
constexpr void check_lsc_vector_size()
constexpr bool has_cache_hints()
constexpr int get_num_args()
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
constexpr lsc_data_size expand_data_size(lsc_data_size DS)
static constexpr SurfaceIndex INVALID_BTI
cache_hint
L1, L2 or L3 cache hints.
@ read_invalidate
load: asserts that the cache line containing the data will not be read again until it’s overwritten,...
@ write_through
store: immediately write data to the subsequent furthest cache, marking the cache line in the current...
@ write_back
store: write data into cache level and mark the cache line as "dirty".
@ streaming
load: cache data to cache using the evict-first policy to minimize cache pollution caused by temporar...
@ uncached
load/store/atomic: do not cache data to cache;
fence_scope
The scope that fence() operation should apply to.
@ gpu
Wait until all previous memory transactions from this thread are observed in the local GPU.
@ tile
Wait until all previous memory transactions from this thread are observed in the local tile.
@ gpus
Wait until all previous memory transactions from this thread are observed across all GPUs in the syst...
@ system_acquire
Global memory data-port only: for GPUs that do not follow PCIe Write ordering for downstream writes t...
@ system
Global memory data-port only: wait until all previous memory transactions from this thread are observ...
@ local
Wait until all previous memory transactions from this thread are observed within the local sub-slice.
fence_flush_op
The cache flush operation to apply to caches after fence() is complete.
@ clean
R/W and RO: invalidate all clean lines;.
@ invalidate
R/W: evict dirty lines; R/W and RO: invalidate clean lines.
memory_kind
The target memory kind for fence() operation.
@ local
image (also known as typed global memory)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmax(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::bit_xor< T > bit_xor
std::bit_and< T > bit_and
std::bit_and< T > bit_and
std::bit_xor< T > bit_xor
constexpr stream_manipulator dec
static constexpr bool value
std::conditional_t< sizeof(T)<=4, std::conditional_t< std::is_signed_v< T >, int32_t, uint32_t >, std::conditional_t< std::is_signed_v< T >, int64_t, uint64_t > > type
This type tag represents "saturation off" behavior.
Gen hardware supports applying saturation to results of certain operations.