XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
gpu::xetla::kernel Namespace Reference

Namespaces

namespace  detail
 

Classes

class  batch_gemm_t
 
class  block_2d
 
class  block_2d< gpu_arch::Xe, T >
 
struct  data_transformer_attr_t
 
struct  default_gemm_config_t
 
struct  default_gemm_t
 
struct  dispatch_policy_default
 Default GEMM_UNIVERSAL implementation. More...
 
struct  dispatch_policy_int4_dequantize_kslicing
 4bit kslicing GEMM implementation. More...
 
struct  dispatch_policy_kslicing
 Kslicing GEMM_UNIVERSAL implementation. More...
 
struct  dispatch_policy_stream_k
 StreamK GEMM implementation. More...
 
class  gemm_universal_t
 GEMM_UNIVERSAL functor. More...
 
class  gemm_universal_t< dispatch_policy_default< group_swizzle_ >, gemm_t_, epilogue_t_, std::enable_if_t<(group_swizzle_::arch_tag==gpu_arch::Xe)> >
 Default GEMM_UNIVERSAL functor, specialized for Xe architecture. More...
 
class  gemm_universal_t< dispatch_policy_int4_dequantize_kslicing< group_swizzle_, num_global_kslicing_, num_local_kslicing_ >, gemm_t_, epilogue_t_ >
 Is the GEMM functor, specialized in bit4 matB kslicing dispatch policy and Xe architecture. More...
 
class  gemm_universal_t< dispatch_policy_kslicing< group_swizzle_, num_global_kslicing_, num_local_kslicing_ >, gemm_t_, epilogue_t_, std::enable_if_t<(group_swizzle_::arch_tag==gpu_arch::Xe)> >
 Is the gemm_universal functor, specialized in kslicing dispatch policy and Xe architecture. More...
 
class  gemm_universal_t< dispatch_policy_stream_k< gpu_arch::Xe >, gemm_t_, epilogue_t_ >
 Default GEMM_UNIVERSAL functor, specialized for Xe architecture. More...
 
class  general_1d
 
class  general_1d< gpu_arch::Xe, T >
 
struct  group_swizzle_default
 Default GROUP_SWIZZLE implementation. More...
 
struct  group_swizzle_snake
 GROUP_SWIZZLE implementation of snake curve. More...
 
struct  layer_norm_attr_t
 Sets up attribute of the layer norm. More...
 
struct  layer_norm_bwd_t
 
struct  layer_norm_bwd_t< dtype_x_, dtype_y_, dtype_weight_, dtype_acc_, layer_norm_attr_, gpu_arch::Xe, ln_bwd_fused_op_ >
 
struct  layer_norm_fwd_t
 
struct  layer_norm_fwd_t< dtype_x_, dtype_y_, dtype_weight_, dtype_acc_, layer_norm_attr_, store_for_bwd_, gpu_arch::Xe, ln_fwd_fused_op_ >
 
class  multi_layer_perceptron_t
 
struct  row_reduction_attr_t
 Sets up attribute of the row reduction. More...
 
struct  xetla_data_transformer
 Is the data_transformer functor. More...
 
struct  xetla_data_transformer< dtype_in_, dtype_out_, dtype_compute_, data_transformer_attr_, mem_layout_in_, need_fp8_op, gpu_arch::Xe >
 Is the data_transformer functor for Xe Each time, each thread will load sg_tile_m x sg_tile_n data into register and do the data convert. More...
 
struct  xetla_mha_attn_reg_bwd_t
 
struct  xetla_mha_attn_reg_fwd_t
 
struct  xetla_mha_core_attn_bwd_t
 
struct  xetla_mha_core_attn_fwd_t
 
struct  xetla_row_reduction_t
 Is the row_reduction functor. More...
 
struct  xetla_row_reduction_t< dtype_in_, dtype_out_, dtype_acc_, reduction_attr_, gpu_arch::Xe, fused_op_t_ >
 Is the row_reduction functor for Xe The idea is threads in group will cooperatively process matrix_m x wg_tile_n. More...
 

Typedefs

using param_kslicing_g1l1_t = default_param_t::template update_t< elem_v_t< tune_key::global_kslicing_ratio, 1UL, uint32_t >, elem_v_t< tune_key::local_kslicing_ratio, 1UL, uint32_t >, elem_t_t< tune_key::wg_tile_shape, shape< 256, 256 > >, elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_t_t< tune_key::sg_tile_shape, shape< 64, 32 > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_kslicing > >
 
using param_kslicing_g2l1_t = default_param_t::template update_t< elem_v_t< tune_key::global_kslicing_ratio, 2UL, uint32_t >, elem_v_t< tune_key::local_kslicing_ratio, 1UL, uint32_t >, elem_t_t< tune_key::wg_tile_shape, shape< 256, 256 > >, elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_t_t< tune_key::sg_tile_shape, shape< 64, 32 > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_kslicing > >
 
using param_kslicing_g1l2_t = default_param_t::template update_t< elem_v_t< tune_key::global_kslicing_ratio, 1UL, uint32_t >, elem_v_t< tune_key::local_kslicing_ratio, 2UL, uint32_t >, elem_t_t< tune_key::wg_tile_shape, shape< 128, 64 > >, elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_t_t< tune_key::sg_tile_shape, shape< 32, 16 > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_kslicing > >
 

Typedef Documentation

◆ param_kslicing_g1l1_t

◆ param_kslicing_g1l2_t

◆ param_kslicing_g2l1_t