23template <
typename dtype_, u
int32_t N>
33 template <
typename matAcc_t,
typename coord_t>
36 [[maybe_unused]] uint32_t slm_base = 0,
37 [[maybe_unused]] uint32_t nbarrier_base = 0) {
38 using dtype_acc =
typename matAcc_t::dtype;
40 constexpr uint32_t elems = 8 * 16;
41 constexpr uint32_t rounds = matAcc_t::tile_elems / elems;
43 for (uint32_t r = 0; r < rounds; ++r) {
45 auto x = matAcc.reg.xetla_select<elems, 1>(elems * r);
47 for (uint32_t i = 0; i < N; ++i) {
49 res +=
static_cast<dtype_acc
>(args.
coeff[i]);
53 constexpr uint32_t remained_elems = matAcc_t::tile_elems % elems;
54 if constexpr (remained_elems != 0) {
56 auto x = matAcc.reg.xetla_select<remained_elems, 1>(
57 elems * (matAcc_t::tile_elems / elems));
59 for (uint32_t i = 0; i < N; ++i) {
61 res +=
static_cast<dtype_acc
>(args.
coeff[i]);
#define __XETLA_API
Definition common.hpp:43
__ESIMD_NS::simd< native_type_t< Ty >, N > xetla_vector
wrapper for xetla_vector.
Definition base_types.hpp:149
#define KERNEL_FUNC
KERNEL_FUNC macro.
Definition common.hpp:39
Definition limitation.hpp:457
Definition gemm_polynomial.hpp:28
coeff_t coeff
Definition gemm_polynomial.hpp:29
arguments_t(coeff_t coeff_)
Definition gemm_polynomial.hpp:31
Definition gemm_polynomial.hpp:24
dtype_ dtype
Definition gemm_polynomial.hpp:25
__XETLA_API KERNEL_FUNC void operator()(matAcc_t &matAcc, coord_t coord, arguments_t args, uint32_t slm_base=0, uint32_t nbarrier_base=0)
Definition gemm_polynomial.hpp:34
xetla_vector< dtype, N > coeff_t
Definition gemm_polynomial.hpp:26