38 return __ESIMD_ENS::cache_hint::uncached;
40 return __ESIMD_ENS::cache_hint::cached;
42 return __ESIMD_ENS::cache_hint::write_back;
44 return __ESIMD_ENS::cache_hint::write_through;
46 return __ESIMD_ENS::cache_hint::streaming;
48 return __ESIMD_ENS::cache_hint::read_invalidate;
58 return __ESIMD_ENS::lsc_data_size::default_size;
64 return __ESIMD_ENS::lsc_data_size::u8u32;
66 return __ESIMD_ENS::lsc_data_size::u16u32;
68 return __ESIMD_ENS::lsc_data_size::u16u32h;
79 return __ESIMD_ENS::lsc_memory_kind::untyped_global;
81 return __ESIMD_ENS::lsc_memory_kind::untyped_global_low_pri;
83 return __ESIMD_ENS::lsc_memory_kind::typed_global;
85 return __ESIMD_ENS::lsc_memory_kind::shared_local;
96 return __ESIMD_ENS::lsc_fence_op::evict;
98 return __ESIMD_ENS::lsc_fence_op::invalidate;
100 return __ESIMD_ENS::lsc_fence_op::discard;
102 return __ESIMD_ENS::lsc_fence_op::clean;
104 return __ESIMD_ENS::lsc_fence_op::flushl3;
114 return __ESIMD_ENS::lsc_scope::group;
116 return __ESIMD_ENS::lsc_scope::local;
121 return __ESIMD_ENS::lsc_scope::system;
123 return __ESIMD_ENS::lsc_scope::sysacq;
141 return __ESIMD_NS::atomic_op::cmpxchg;
142#pragma clang diagnostic push
143#pragma clang diagnostic ignored "-Wdeprecated-declarations"
149 return __ESIMD_NS::atomic_op::fcmpwr;
150#pragma clang diagnostic pop
152 return __ESIMD_NS::atomic_op::bit_and;
154 return __ESIMD_NS::atomic_op::bit_or;
156 return __ESIMD_NS::atomic_op::bit_xor;
183template <
typename Ty, uint8_t NElts = 1,
210template <
typename Ty, uint8_t NElts = 1,
219 (T *)p + (offset /
sizeof(T)));
241template <
typename Ty, uint8_t NElts = 1,
244 int N,
typename Toffset = uint32_t>
250 Ty>::template check_restriction<NElts, N>(offsets,
253 return __ESIMD_ENS::lsc_gather<T, NElts,
276template <
typename Ty, uint8_t NElts = 1,
280 Ty *p, uint64_t offset = 0) {
284 Ty>::template check_restriction<NElts>(offset,
287 return __ESIMD_ENS::lsc_block_load<T, NElts,
291 (T *)p + (offset /
sizeof(T)));
312template <
typename Ty, uint8_t NElts = 1,
315 int N,
typename Toffset = uint32_t>
322 (T *)p, offsets, vals, pred);
341template <
typename Ty, uint8_t NElts = 1,
347 __ESIMD_ENS::lsc_block_store<T, NElts,
351 (T *)p + (offset /
sizeof(T)), vals);
368template <
atomic_op Op,
typename T,
int N,
374 "The internal types are not yet supported!");
396template <
atomic_op Op,
typename T,
int N,
403 "The internal types are not yet supported!");
426template <
atomic_op Op,
typename T,
int N,
433 "The internal types are not yet supported!");
438 p, offsets, src0, src1, pred);
442template <u
int32_t SLMSize>
444 if constexpr (SLMSize != 0) { __ESIMD_NS::slm_init(SLMSize); }
462template <
typename Ty, uint8_t NElts = 1,
469 Ty>::template check_restriction<NElts, N>(offsets));
471 return __ESIMD_ENS::lsc_slm_gather<T, NElts,
473 xetla_cvt<uint64_t, uint32_t>(offsets), pred);
489template <
typename Ty, uint8_t NElts = 1,
495 Ty>::template check_restriction<NElts>((uint64_t)offset));
497 return __ESIMD_ENS::lsc_slm_block_load<T, NElts,
516template <
typename Ty, uint8_t NElts = 1,
523 NElts, N, uint32_t>(offsets));
525 __ESIMD_ENS::lsc_slm_scatter<T, NElts,
542template <
typename Ty, uint8_t NElts = 1,
549 Ty>::template check_restriction<NElts>(offset));
551 __ESIMD_ENS::lsc_slm_block_store<T, NElts,
566template <
atomic_op Op,
typename T,
int N,
571 "The internal types are not yet supported!");
589template <
atomic_op Op,
typename T,
int N,
595 "The internal types are not yet supported!");
614template <
atomic_op Op,
typename T,
int N,
620 "The internal types are not yet supported!");
624 offsets, src0, src1, pred);
#define __XETLA_API
Definition common.hpp:43
#define DEBUG_INVOKE(level,...)
Definition debug.hpp:180
typename native_type< T >::type native_type_t
Return the native data type of T.
Definition base_types.hpp:106
__ESIMD_NS::simd< native_type_t< Ty >, N > xetla_vector
wrapper for xetla_vector.
Definition base_types.hpp:149
__ESIMD_NS::simd_mask< N > xetla_mask
wrapper for xetla_mask.
Definition base_types.hpp:165
__XETLA_API xetla_vector< T, N > xetla_atomic_local(xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred)
SLM scattered atomic (0 src).
Definition memory.hpp:568
__XETLA_API void xetla_fence(xetla_mask< N > pred=1)
Memory fence.
Definition memory.hpp:638
__XETLA_API void xetla_prefetch_global(Ty *p, xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred=1)
Stateless scattered prefetch.
Definition memory.hpp:187
__XETLA_API void xetla_local_init()
Declare per-work-group slm size.
Definition memory.hpp:443
__XETLA_API xetla_vector< Ty, N *NElts > xetla_load_global(Ty *p, xetla_vector< Toffset, N > offsets, xetla_mask< N > pred=1)
Stateless scattered load.
Definition memory.hpp:245
__XETLA_API xetla_vector< Ty, N *NElts > xetla_load_local(xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred=1)
SLM scattered load.
Definition memory.hpp:464
__XETLA_API xetla_vector< T, N > xetla_atomic_global(T *p, xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred)
Stateless scattered atomic (0 src).
Definition memory.hpp:371
__XETLA_API void xetla_store_local(xetla_vector< uint32_t, N > offsets, xetla_vector< Ty, N *NElts > vals, xetla_mask< N > pred=1)
SLM scattered store.
Definition memory.hpp:518
__XETLA_API void xetla_store_global(Ty *p, xetla_vector< Toffset, N > offsets, xetla_vector< Ty, N *NElts > vals, xetla_mask< N > pred=1)
Stateless scattered store.
Definition memory.hpp:316
constexpr __ESIMD_ENS::lsc_scope get_fence_scope(gpu::xetla::fence_scope fs)
lookup table for fence scope.
Definition memory.hpp:111
constexpr __ESIMD_NS::atomic_op get_atomic_op(gpu::xetla::atomic_op ao)
lookup table for atomic op.
Definition memory.hpp:130
constexpr __ESIMD_ENS::lsc_data_size get_data_size(gpu::xetla::data_size ds)
lookup table for data size.
Definition memory.hpp:55
constexpr __ESIMD_ENS::lsc_fence_op get_fence_op(gpu::xetla::fence_op fo)
lookup table for fence op.
Definition memory.hpp:92
constexpr __ESIMD_ENS::lsc_memory_kind get_memory_kind(gpu::xetla::memory_kind mk)
lookup table for memory kind.
Definition memory.hpp:75
constexpr __ESIMD_ENS::cache_hint get_cache_hint(gpu::xetla::cache_hint ch)
lookup table for cache hint.
Definition memory.hpp:34
Definition arch_config.hpp:24
cache_hint
L1 or L2 cache hint kinds.
Definition common.hpp:89
data_size
Data size or format to read or store.
Definition common.hpp:100
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
fence_op
The xetla_fence operation to apply to caches.
Definition common.hpp:120
@ clean
direct and clean lines are discarded w/o eviction
@ flushl2
dirty lines are written to memory, but retained in cache
@ discard
invalidate all clean lines
@ invalidate
dirty lines evicted and invalidated from L1
fence_scope
The scope that xetla_fence operation should apply to.
Definition common.hpp:130
@ gpu
tile, flush out to several DSSs
@ tile
flush out to the local scope
@ gpus
entire GPU, flush out to the GPUs LLC
@ sysacq
the entire system memory space
@ system
all GPUs in the system, flush out to memory shared by all GPUs
@ local
flush out to the threadgroup's scope
memory_kind
The specific LSC shared function to fence with xetla_fence.
Definition common.hpp:112
@ typed_global
low-priority untyped global memory
@ untyped_global_low_pri
untyped global memory
@ shared_local
typed global memory
atomic_op
Represents an atomic operation.
Definition common.hpp:142
@ umin
Atomic store the unsigned int min of src1 and memory data and return the old value....
@ fsub
Atomic float subtract of src1 from memory data and return the old value. see
@ bit_or
Atomic store the bitwise OR of src1 and memory data and return the old value. see
@ iadd
Atomic signed int add of src1 from memory data and return the old value. see
@ smin
Atomic store the signed int min of src1 and memory data and return the old value. see
@ cmpxchg
Atomic bit-compare src1_X and memory data and replace if equal with src1_Y. Returns the old value....
@ fmax
Atomic store the float max of src1 and memory data and return the old value. see
@ fadd
Atomic float add of src1 from memory data and return the old value. see
@ idec
Atomic decrement of memory data and return the old value. see
@ umax
Atomic store the unsigned int max of src1 and memory data and return the old value....
@ store
Atomic store untyped data to memory. see
@ fmin
Atomic store the float min of src1 and memory data and return the old value. see
@ bit_and
Atomic store the bitwise AND of src1 and memory data and return the old value. see
@ iinc
Atomic increment of memory data and return the old value. see
@ smax
Atomic store the signed int max of src1 and memory data and return the old value. see
@ bit_xor
Atomic store the bitwise XOR of src1 and memory data and return the old value. see
@ isub
Atomic signed int subtract of src1 from memory data and return the old value. see
@ fcmpxchg
Atomic float compare src1_X and memory data and replace if equal with src1_Y. Returns the old value....
@ load
Atomic read of the memory data value, without modifying the data. see
Definition limitation.hpp:31
Used to check if the type is xetla internal data type.
Definition base_types.hpp:67