28template <
typename payload_t>
32 && (payload_t::tile_desc::tile_size_y != 1)
37 && (payload_t::tile_desc::tile_size_y == 1)
60 using dtype =
typename payload_t::dtype;
61 static constexpr uint32_t num_tdesc = payload_t::num_tdesc;
63 = payload.tdesc_prefetch.xetla_format<uint32_t, num_tdesc, 16>();
66 for (uint32_t i = 0; i < num_tdesc; i++) {
67 xetla_tprefetch_global<dtype, L1, L2, payload_t::arch_tag>(
85 using dtype =
typename payload_t::dtype;
86 using tile_desc =
typename payload_t::tile_desc;
87 using prefetch_dtype =
typename payload_t::prefetch_dtype;
88 constexpr uint32_t prefetch_len
89 = tile_desc::tile_size_x / payload_t::scale_factor;
90 if constexpr (prefetch_len >= 64) {
92 for (uint32_t j = 0; j < prefetch_len / 64; j++) {
93 uint32_t offset_x = j * 64 * payload_t::scale_factor;
94 uint32_t address_offset = offset_x *
sizeof(dtype);
97 payload.base_ptr, payload.base_offset + address_offset);
100 constexpr uint32_t tail_len = prefetch_len % 64;
101 uint32_t tail_offset = prefetch_len / 64 * 64 * payload_t::scale_factor;
102 detail::process_1d_tail<tail_len, 32, L1, L2, payload_t>(
103 payload, tail_offset);
#define __XETLA_API
Definition common.hpp:43
__XETLA_API void xetla_prefetch_global(Ty *p, xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred=1)
Stateless scattered prefetch.
Definition memory.hpp:187
Definition limitation.hpp:457
__XETLA_API std::enable_if_t< detail::check_prefetch_type< payload_t >::is_global_2d_xe > tile_prefetch(payload_t &payload)
Is prefetch data func, which data located in global memory is prefetched to cache,...
Definition prefetch_xe.hpp:59
cache_hint
L1 or L2 cache hint kinds.
Definition common.hpp:89
Definition prefetch_xe.hpp:29
static constexpr bool is_global_2d_xe
Definition prefetch_xe.hpp:31
static constexpr bool is_local_xe
Definition prefetch_xe.hpp:41
static constexpr bool is_global_block_1d_xe
Definition prefetch_xe.hpp:36