XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
prefetch_xe.hpp
Go to the documentation of this file.
1/*******************************************************************************
2* Copyright (c) 2022-2023 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
19
20#pragma once
21
22#include "subgroup/tile/api.hpp"
25
26namespace gpu::xetla::subgroup {
27namespace detail {
28template <typename payload_t>
30 static constexpr bool is_global_2d_xe
31 = ((payload_t::memory_space == mem_space::global)
32 && (payload_t::tile_desc::tile_size_y != 1)
33 && (payload_t::arch_tag == gpu_arch::Xe));
34
35 static constexpr bool is_global_block_1d_xe
36 = ((payload_t::memory_space == mem_space::global)
37 && (payload_t::tile_desc::tile_size_y == 1)
38 && (payload_t::arch_tag == gpu_arch::Xe));
39
40 static constexpr bool is_local_xe
41 = ((payload_t::memory_space == mem_space::local)
42 && (payload_t::arch_tag == gpu_arch::Xe));
43};
44
45} // namespace detail
46
55template <cache_hint L1 = cache_hint::cached,
56 cache_hint L2 = cache_hint::cached, typename payload_t>
57__XETLA_API typename std::enable_if_t<
59tile_prefetch(payload_t &payload) {
60 using dtype = typename payload_t::dtype;
61 static constexpr uint32_t num_tdesc = payload_t::num_tdesc;
62 auto tdesc_2d
63 = payload.tdesc_prefetch.xetla_format<uint32_t, num_tdesc, 16>();
64
65#pragma unroll
66 for (uint32_t i = 0; i < num_tdesc; i++) {
67 xetla_tprefetch_global<dtype, L1, L2, payload_t::arch_tag>(
68 tdesc_2d.row(i));
69 }
70}
71
80template <cache_hint L1 = cache_hint::cached,
81 cache_hint L2 = cache_hint::cached, typename payload_t>
82__XETLA_API typename std::enable_if_t<
84tile_prefetch(payload_t &payload) {
85 using dtype = typename payload_t::dtype;
86 using tile_desc = typename payload_t::tile_desc;
87 using prefetch_dtype = typename payload_t::prefetch_dtype;
88 constexpr uint32_t prefetch_len
89 = tile_desc::tile_size_x / payload_t::scale_factor;
90 if constexpr (prefetch_len >= 64) {
91#pragma unroll
92 for (uint32_t j = 0; j < prefetch_len / 64; j++) {
93 uint32_t offset_x = j * 64 * payload_t::scale_factor;
94 uint32_t address_offset = offset_x * sizeof(dtype);
96 L1, L2>(
97 payload.base_ptr, payload.base_offset + address_offset);
98 }
99 }
100 constexpr uint32_t tail_len = prefetch_len % 64;
101 uint32_t tail_offset = prefetch_len / 64 * 64 * payload_t::scale_factor;
102 detail::process_1d_tail<tail_len, 32, L1, L2, payload_t>(
103 payload, tail_offset);
104}
105
112template <cache_hint L1 = cache_hint::cached,
113 cache_hint L2 = cache_hint::cached, typename payload_t>
114__XETLA_API typename std::enable_if_t<
116tile_prefetch([[maybe_unused]] payload_t &payload) {}
117
118} // namespace gpu::xetla::subgroup
#define __XETLA_API
Definition common.hpp:43
__XETLA_API void xetla_prefetch_global(Ty *p, xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred=1)
Stateless scattered prefetch.
Definition memory.hpp:187
Definition limitation.hpp:457
__XETLA_API std::enable_if_t< detail::check_prefetch_type< payload_t >::is_global_2d_xe > tile_prefetch(payload_t &payload)
Is prefetch data func, which data located in global memory is prefetched to cache,...
Definition prefetch_xe.hpp:59
cache_hint
L1 or L2 cache hint kinds.
Definition common.hpp:89
static constexpr bool is_global_2d_xe
Definition prefetch_xe.hpp:31
static constexpr bool is_local_xe
Definition prefetch_xe.hpp:41
static constexpr bool is_global_block_1d_xe
Definition prefetch_xe.hpp:36
C++ API.