XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ > Struct Template Reference

#include <dropout_mask_gen.hpp>

Classes

struct  arguments_t
 

Public Types

using dtype_mask = dtype_mask_
 
using load_store_attr = typename arch_attr_t< arch_ >::template load_store_attr< msg_type::block_2d >
 
using mask_out_tile_desc_t = subgroup::tile_desc_t< tile_size_x, tile_size_y, block_size_x, block_size_y, reg_layout::tiled >
 
using mask_out_tile_t = subgroup::tile_t< dtype_mask, mask_out_tile_desc_t >
 
using mask_out_payload_t = subgroup::mem_payload_t< mem_desc_t< dtype_mask, mem_layout::row_major, mem_space::global >, mask_out_tile_desc_t,(sg_tile_m==1) ? msg_type::block_1d :msg_type::block_2d, gpu_arch::Xe >
 

Public Member Functions

__XETLA_API KERNEL_FUNC void operator() (arguments_t *args, uint32_t wg_idx, uint32_t wg_idy, uint32_t sg_idx, uint32_t sg_idy, uint32_t linear_idx)
 

Static Public Attributes

static constexpr uint32_t wg_tile_n = wg_tile_n_
 
static constexpr uint32_t wg_tile_m = wg_tile_m_
 
static constexpr uint32_t sg_tile_n = sg_tile_n_
 
static constexpr uint32_t sg_tile_m = sg_tile_m_
 
static constexpr uint32_t random_simd = random_simd_
 
static constexpr uint32_t wg_size_x = (wg_tile_n + sg_tile_n - 1) / sg_tile_n
 
static constexpr uint32_t wg_size_y = (wg_tile_m + sg_tile_m - 1) / sg_tile_m
 
static constexpr uint32_t max_store_width_in_bytes = load_store_attr::max_store_width_in_bytes
 
static constexpr uint32_t max_store_width_in_elem = max_store_width_in_bytes / sizeof(dtype_mask)
 
static constexpr uint32_t max_store_height_in_elem = load_store_attr::max_store_height_in_elem
 
static constexpr uint32_t tile_size_x = sg_tile_n
 
static constexpr uint32_t tile_size_y = sg_tile_m
 
static constexpr uint32_t block_size_x
 
static constexpr uint32_t block_size_y
 
static constexpr uint32_t tile_size = tile_size_x * tile_size_y
 

Detailed Description

template<typename dtype_mask_, uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
struct gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >
Template Parameters
dtype_mask_
wg_tile_n_
wg_tile_m_
sg_tile_n_
sg_tile_m_
random_simd_
arch_

Member Typedef Documentation

◆ dtype_mask

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
using gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::dtype_mask = dtype_mask_

◆ load_store_attr

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
using gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::load_store_attr = typename arch_attr_t< arch_>::template load_store_attr<msg_type::block_2d>

◆ mask_out_payload_t

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
using gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::mask_out_payload_t = subgroup::mem_payload_t< mem_desc_t<dtype_mask, mem_layout::row_major, mem_space::global>, mask_out_tile_desc_t, (sg_tile_m == 1) ? msg_type::block_1d : msg_type::block_2d, gpu_arch::Xe>

◆ mask_out_tile_desc_t

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
using gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::mask_out_tile_desc_t = subgroup::tile_desc_t<tile_size_x, tile_size_y, block_size_x, block_size_y, reg_layout::tiled>

◆ mask_out_tile_t

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
using gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::mask_out_tile_t = subgroup::tile_t<dtype_mask, mask_out_tile_desc_t>

Member Function Documentation

◆ operator()()

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
__XETLA_API KERNEL_FUNC void gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::operator() ( arguments_t args,
uint32_t  wg_idx,
uint32_t  wg_idy,
uint32_t  sg_idx,
uint32_t  sg_idy,
uint32_t  linear_idx 
)
inline
Parameters
args
wg_idx
wg_idy
sg_idx
sg_idy
linear_idx
Returns

Member Data Documentation

◆ block_size_x

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::block_size_x
staticconstexpr
Initial value:
static constexpr uint32_t tile_size_x
Definition dropout_mask_gen.hpp:72
static constexpr uint32_t max_store_width_in_elem
Definition dropout_mask_gen.hpp:69
Definition common.hpp:80

◆ block_size_y

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::block_size_y
staticconstexpr
Initial value:
static constexpr uint32_t tile_size_y
Definition dropout_mask_gen.hpp:73
static constexpr uint32_t max_store_height_in_elem
Definition dropout_mask_gen.hpp:71

◆ max_store_height_in_elem

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::max_store_height_in_elem = load_store_attr::max_store_height_in_elem
staticconstexpr

◆ max_store_width_in_bytes

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::max_store_width_in_bytes = load_store_attr::max_store_width_in_bytes
staticconstexpr

◆ max_store_width_in_elem

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::max_store_width_in_elem = max_store_width_in_bytes / sizeof(dtype_mask)
staticconstexpr

◆ random_simd

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::random_simd = random_simd_
staticconstexpr

◆ sg_tile_m

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::sg_tile_m = sg_tile_m_
staticconstexpr

◆ sg_tile_n

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::sg_tile_n = sg_tile_n_
staticconstexpr

◆ tile_size

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::tile_size = tile_size_x * tile_size_y
staticconstexpr

◆ tile_size_x

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::tile_size_x = sg_tile_n
staticconstexpr

◆ tile_size_y

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::tile_size_y = sg_tile_m
staticconstexpr

◆ wg_size_x

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::wg_size_x = (wg_tile_n + sg_tile_n - 1) / sg_tile_n
staticconstexpr

◆ wg_size_y

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::wg_size_y = (wg_tile_m + sg_tile_m - 1) / sg_tile_m
staticconstexpr

◆ wg_tile_m

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::wg_tile_m = wg_tile_m_
staticconstexpr

◆ wg_tile_n

template<typename dtype_mask_ , uint32_t wg_tile_n_, uint32_t wg_tile_m_, uint32_t sg_tile_n_, uint32_t sg_tile_m_, uint32_t random_simd_ = 16, gpu_arch arch_ = gpu_arch::Xe>
constexpr uint32_t gpu::xetla::group::mask_gen_t< dtype_mask_, wg_tile_n_, wg_tile_m_, sg_tile_n_, sg_tile_m_, random_simd_, arch_ >::wg_tile_n = wg_tile_n_
staticconstexpr