xetla/prefetch__xe_8hpp_source.html

/*******************************************************************************

* Copyright (c) 2022-2023 Intel Corporation

*

* Licensed under the Apache License, Version 2.0 (the "License");

* you may not use this file except in compliance with the License.

* You may obtain a copy of the License at

*

*     http://www.apache.org/licenses/LICENSE-2.0

*

* Unless required by applicable law or agreed to in writing, software

* distributed under the License is distributed on an "AS IS" BASIS,

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

* See the License for the specific language governing permissions and

* limitations under the License.

*******************************************************************************/


#pragma once


#include "subgroup/tile/api.hpp"

#include "subgroup/tile/impl/op_function.hpp"

#include "subgroup/tile/impl/payload_xe.hpp"


namespace gpu::xetla::subgroup {

namespace detail {

template <typename payload_t>

struct check_prefetch_type {

    static constexpr bool is_global_2d_xe

            = ((payload_t::memory_space == mem_space::global)

                    && (payload_t::tile_desc::tile_size_y != 1)

                    && (payload_t::arch_tag == gpu_arch::Xe));


    static constexpr bool is_global_block_1d_xe

            = ((payload_t::memory_space == mem_space::global)

                    && (payload_t::tile_desc::tile_size_y == 1)

                    && (payload_t::arch_tag == gpu_arch::Xe));


    static constexpr bool is_local_xe

            = ((payload_t::memory_space == mem_space::local)

                    && (payload_t::arch_tag == gpu_arch::Xe));

};


} // namespace detail


template <cache_hint L1 = cache_hint::cached,

        cache_hint L2 = cache_hint::cached, typename payload_t>

__XETLA_API typename std::enable_if_t<

        detail::check_prefetch_type<payload_t>::is_global_2d_xe>

tile_prefetch(payload_t &payload) {

    using dtype = typename payload_t::dtype;

    static constexpr uint32_t num_tdesc = payload_t::num_tdesc;

    auto tdesc_2d

            = payload.tdesc_prefetch.xetla_format<uint32_t, num_tdesc, 16>();


#pragma unroll

    for (uint32_t i = 0; i < num_tdesc; i++) {

        xetla_tprefetch_global<dtype, L1, L2, payload_t::arch_tag>(

                tdesc_2d.row(i));

    }

}


template <cache_hint L1 = cache_hint::cached,

        cache_hint L2 = cache_hint::cached, typename payload_t>

__XETLA_API typename std::enable_if_t<

        detail::check_prefetch_type<payload_t>::is_global_block_1d_xe>

tile_prefetch(payload_t &payload) {

    using dtype = typename payload_t::dtype;

    using tile_desc = typename payload_t::tile_desc;

    using prefetch_dtype = typename payload_t::prefetch_dtype;

    constexpr uint32_t prefetch_len

            = tile_desc::tile_size_x / payload_t::scale_factor;

    if constexpr (prefetch_len >= 64) {

#pragma unroll

        for (uint32_t j = 0; j < prefetch_len / 64; j++) {

            uint32_t offset_x = j * 64 * payload_t::scale_factor;

            uint32_t address_offset = offset_x * sizeof(dtype);

            xetla_prefetch_global<prefetch_dtype, 64, data_size::default_size,

                    L1, L2>(

                    payload.base_ptr, payload.base_offset + address_offset);

        }

    }

    constexpr uint32_t tail_len = prefetch_len % 64;

    uint32_t tail_offset = prefetch_len / 64 * 64 * payload_t::scale_factor;

    detail::process_1d_tail<tail_len, 32, L1, L2, payload_t>(

            payload, tail_offset);

}


template <cache_hint L1 = cache_hint::cached,

        cache_hint L2 = cache_hint::cached, typename payload_t>

__XETLA_API typename std::enable_if_t<

        detail::check_prefetch_type<payload_t>::is_local_xe>

tile_prefetch([[maybe_unused]] payload_t &payload) {}


} // namespace gpu::xetla::subgroup

__XETLA_API
#define __XETLA_API
Definition common.hpp:43

gpu::xetla::xetla_prefetch_global
__XETLA_API void xetla_prefetch_global(Ty *p, xetla_vector< uint32_t, N > offsets, xetla_mask< N > pred=1)
Stateless scattered prefetch.
Definition memory.hpp:187

gpu::xetla::subgroup
Definition limitation.hpp:457

gpu::xetla::subgroup::tile_prefetch
__XETLA_API std::enable_if_t< detail::check_prefetch_type< payload_t >::is_global_2d_xe > tile_prefetch(payload_t &payload)
Is prefetch data func, which data located in global memory is prefetched to cache,...
Definition prefetch_xe.hpp:59

gpu::xetla::cache_hint
cache_hint
L1 or L2 cache hint kinds.
Definition common.hpp:89

gpu::xetla::cache_hint::cached
@ cached

gpu::xetla::data_size::default_size
@ default_size

gpu::xetla::mem_space::global
@ global

gpu::xetla::mem_space::local
@ local

gpu::xetla::gpu_arch::Xe
@ Xe

op_function.hpp
C++ API.

payload_xe.hpp
C++ API.

gpu::xetla::subgroup::detail::check_prefetch_type
Definition prefetch_xe.hpp:29

gpu::xetla::subgroup::detail::check_prefetch_type::is_global_2d_xe
static constexpr bool is_global_2d_xe
Definition prefetch_xe.hpp:31

gpu::xetla::subgroup::detail::check_prefetch_type::is_local_xe
static constexpr bool is_local_xe
Definition prefetch_xe.hpp:41

gpu::xetla::subgroup::detail::check_prefetch_type::is_global_block_1d_xe
static constexpr bool is_global_block_1d_xe
Definition prefetch_xe.hpp:36

api.hpp
C++ API.