33template <u
int32_t element_size>
35 static_assert(element_size == 1 || element_size == 2 || element_size == 4
37 "element_size not supported!");
38 switch (element_size) {
48template <lsc_action Action, cache_h
int L1H, cache_h
int L2H, gpu_arch arch_tag>
49constexpr std::enable_if_t<arch_tag == gpu_arch::Xe, void>
58 "cache hint type not supported!");
71 "unsupported cache hint!");
84 "unsupported cache hint!");
91 "unsupported cache hint!");
95template <cache_h
int L1H, cache_h
int L2H, gpu_arch arch_tag>
96constexpr std::enable_if_t<arch_tag == gpu_arch::Xe, uint32_t>
98 check_lsc_cache_hint<lsc_action::load, L1H, L2H, arch_tag>();
113template <cache_h
int L1H, cache_h
int L2H, gpu_arch arch_tag>
114constexpr std::enable_if_t<arch_tag == gpu_arch::Xe, uint32_t>
116 check_lsc_cache_hint<lsc_action::prefetch, L1H, L2H, arch_tag>();
128template <cache_h
int L1H, cache_h
int L2H, gpu_arch arch_tag>
129constexpr std::enable_if_t<arch_tag == gpu_arch::Xe, uint32_t>
131 check_lsc_cache_hint<lsc_action::store, L1H, L2H, arch_tag>();
146template <cache_h
int L1H, cache_h
int L2H, gpu_arch arch_tag>
147constexpr std::enable_if_t<arch_tag == gpu_arch::Xe, uint32_t>
149 check_lsc_cache_hint<lsc_action::atomic, L1H, L2H, arch_tag>();
164template <u
int32_t num_channel>
166 static_assert(num_channel == 1 || num_channel == 2 || num_channel == 4
167 || num_channel == 8 || num_channel == 16
168 || num_channel == 32,
169 "num_channel not supported!");
170 switch (num_channel) {
180template <atomic_op Op>
184 "Other atomic op didn't added");
246template <
typename kernel_t>
248 xetla_nbarrier_init<kernel_t::get_barrier_count()>();
249 xetla_local_init<kernel_t::get_slm_size()>();
255template <u
int32_t slm_size, u
int32_t nbarrier_count>
257 xetla_nbarrier_init<nbarrier_count>();
258 xetla_local_init<slm_size>();
constexpr uint32_t get_execSize_code()
Definition common.hpp:165
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_store_cache_hint_code()
Definition common.hpp:130
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_atomic_cache_hint_code()
Definition common.hpp:148
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_load_cache_hint_code()
Definition common.hpp:97
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_prefetch_cache_hint_code()
Definition common.hpp:115
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, void > check_lsc_cache_hint()
Definition common.hpp:50
constexpr uint32_t get_atomic_opcode()
Definition common.hpp:181
lsc_action
Definition common.hpp:46
constexpr uint32_t get_element_size_code()
Get the element size code object.
Definition common.hpp:34
Definition arch_config.hpp:24
post_kind
Definition common.hpp:229
reg_layout
tile layout in register linear: linear layout with one tile tiled: 2d block stacked in raster order v...
Definition common.hpp:209
@ vnni_tiled_col_major
this is vnni tiled format, but for each block, they are stored in col major order
mma_engine
Definition common.hpp:225
pre_kind
Definition common.hpp:237
@ iadd
Atomic signed int add of src1 from memory data and return the old value. see
@ fmax
Atomic store the float max of src1 and memory data and return the old value. see
@ fadd
Atomic float add of src1 from memory data and return the old value. see
@ store
Atomic store untyped data to memory. see
@ load
Atomic read of the memory data value, without modifying the data. see
memory_op
Definition common.hpp:227
tdesc_update_dir
Definition common.hpp:228
offset_mode
Definition common.hpp:238
void slm_barrier_init()
Initial the local memory size and named barrier count with kernel_t.
Definition common.hpp:247
store_op
Definition common.hpp:218