XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
unaligned_xe.hpp
Go to the documentation of this file.
1/*******************************************************************************
2* Copyright (c) 2022-2023 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
19
20#pragma once
21
25
26namespace gpu::xetla::group {
27
30
32template <typename tile_shape_, typename mem_desc_c_t_, gpu_arch arch_tag_>
33class epilogue_t<epilogue_policy_unaligned<arch_tag_>, tile_shape_,
34 mem_desc_c_t_, std::enable_if_t<((arch_tag_ == gpu_arch::Xe))>> {
35public:
37 using tile_shape = tile_shape_;
38 using mem_desc_c_t = mem_desc_c_t_;
39 static constexpr gpu_arch arch_tag = arch_tag_;
40 static constexpr uint32_t barrier_count = 0;
41 static constexpr uint32_t slm_size = mem_desc_c_t::is_local
42 ? tile_shape::wg_tile_size_x * tile_shape::wg_tile_size_y
43 : 0;
45 struct arguments_t {};
46
47private:
48 using work_group_t = typename tile_shape::work_group_t;
49 static constexpr uint32_t sg_tile_m = tile_shape::sg_tile_size_y;
50 static constexpr uint32_t sg_tile_n = tile_shape::sg_tile_size_x;
51 static constexpr uint32_t wg_size_x = tile_shape::wg_size_x;
52 static constexpr uint32_t wg_size_y = tile_shape::wg_size_y;
53 using dtype_c = typename mem_desc_c_t::dtype;
54 static constexpr mem_layout mem_layout_c = mem_desc_c_t::layout;
55 static constexpr mem_space mem_space_c = mem_desc_c_t::space;
56
58 __XETLA_API static void update_sg_tile_tdesc(
59 work_group_t &g, mem_desc_c_t &mem_desc_c) {
60 int32_t sg_idx = g.get_id() % wg_size_x;
61 int32_t sg_idy = g.get_id() / wg_size_x;
62 int32_t tile_offset_n = sg_idx * sg_tile_n;
63 int32_t tile_offset_m = sg_idy * sg_tile_m;
64 mem_desc_c.update_coord(tile_offset_n, tile_offset_m);
65 }
66
67public:
68 static constexpr msg_type msg_type_c
71
81 template <typename matAcc_t>
82 __XETLA_API KERNEL_FUNC void operator()(work_group_t &g, matAcc_t &matAcc,
83 mem_desc_c_t mem_desc_c, [[maybe_unused]] arguments_t args = {},
84 [[maybe_unused]] uint32_t slm_base = 0,
85 [[maybe_unused]] uint32_t nbarrier_base = 0) {
86 using matC_tile_desc_t = subgroup::tile_desc_t<matAcc_t::tile_size_x,
87 matAcc_t::tile_size_y, matAcc_t::block_size_x,
88 matAcc_t::block_size_y, reg_layout::tiled>;
90 using matC_payload_t = subgroup::mem_payload_t<mem_desc_c_t,
91 matC_tile_desc_t, msg_type_c, arch_tag>;
92 update_sg_tile_tdesc(g, mem_desc_c);
93 matC_t matC;
94 matC_payload_t matC_payload(mem_desc_c);
95 subgroup::elemwise_cvt(matC, matAcc);
96 subgroup::tile_store<cache_hint::streaming, cache_hint::write_back>(
97 matC, matC_payload);
98 }
99};
100
102
103} // namespace gpu::xetla::group
__XETLA_API KERNEL_FUNC void operator()(work_group_t &g, matAcc_t &matAcc, mem_desc_c_t mem_desc_c, arguments_t args={}, uint32_t slm_base=0, uint32_t nbarrier_base=0)
Default epilogue.
Definition unaligned_xe.hpp:82
Is the epilogue functor.
Definition api.hpp:35
#define __XETLA_API
Definition common.hpp:43
C++ API.
C++ API.
#define KERNEL_FUNC
KERNEL_FUNC macro.
Definition common.hpp:39
Definition limitation.hpp:607
__XETLA_API std::enable_if_t<(T_src::register_layout !=reg_layout::linear) &&(T_dst::register_layout !=reg_layout::linear) &&is_same_layout< T_dst, T_src >::value &&(!is_floating_to_integer< T_dst, T_src >::value)> elemwise_cvt(T_dst &dst, T_src &src)
Is the element wise data conversion, the src and dst tile should have the same layout.
Definition op_function.hpp:40
mem_space
Definition common.hpp:77
gpu_arch
Definition common.hpp:73
msg_type
Definition common.hpp:78
mem_layout
Definition common.hpp:76
Epilogue policy for store unaligned C.
Definition epilogue_policy.hpp:65
Is to illustrate the memory information.
Definition api.hpp:44
Is to illustrate the tile information about a sub matrix.
Definition api.hpp:64
Is a struct contains some register file.
Definition api.hpp:99