|
| class | batch_gemm_t |
| |
| class | block_2d |
| |
| class | block_2d< gpu_arch::Xe, T > |
| |
| struct | data_transformer_attr_t |
| |
| struct | default_gemm_config_t |
| |
| struct | default_gemm_t |
| |
| struct | dispatch_policy_default |
| | Default GEMM_UNIVERSAL implementation. More...
|
| |
| struct | dispatch_policy_int4_dequantize_kslicing |
| | 4bit kslicing GEMM implementation. More...
|
| |
| struct | dispatch_policy_kslicing |
| | Kslicing GEMM_UNIVERSAL implementation. More...
|
| |
| struct | dispatch_policy_stream_k |
| | StreamK GEMM implementation. More...
|
| |
| class | gemm_universal_t |
| | GEMM_UNIVERSAL functor. More...
|
| |
| class | gemm_universal_t< dispatch_policy_default< group_swizzle_ >, gemm_t_, epilogue_t_, std::enable_if_t<(group_swizzle_::arch_tag==gpu_arch::Xe)> > |
| | Default GEMM_UNIVERSAL functor, specialized for Xe architecture. More...
|
| |
| class | gemm_universal_t< dispatch_policy_int4_dequantize_kslicing< group_swizzle_, num_global_kslicing_, num_local_kslicing_ >, gemm_t_, epilogue_t_ > |
| | Is the GEMM functor, specialized in bit4 matB kslicing dispatch policy and Xe architecture. More...
|
| |
| class | gemm_universal_t< dispatch_policy_kslicing< group_swizzle_, num_global_kslicing_, num_local_kslicing_ >, gemm_t_, epilogue_t_, std::enable_if_t<(group_swizzle_::arch_tag==gpu_arch::Xe)> > |
| | Is the gemm_universal functor, specialized in kslicing dispatch policy and Xe architecture. More...
|
| |
| class | gemm_universal_t< dispatch_policy_stream_k< gpu_arch::Xe >, gemm_t_, epilogue_t_ > |
| | Default GEMM_UNIVERSAL functor, specialized for Xe architecture. More...
|
| |
| class | general_1d |
| |
| class | general_1d< gpu_arch::Xe, T > |
| |
| struct | group_swizzle_default |
| | Default GROUP_SWIZZLE implementation. More...
|
| |
| struct | group_swizzle_snake |
| | GROUP_SWIZZLE implementation of snake curve. More...
|
| |
| struct | layer_norm_attr_t |
| | Sets up attribute of the layer norm. More...
|
| |
| struct | layer_norm_bwd_t |
| |
| struct | layer_norm_bwd_t< dtype_x_, dtype_y_, dtype_weight_, dtype_acc_, layer_norm_attr_, gpu_arch::Xe, ln_bwd_fused_op_ > |
| |
| struct | layer_norm_fwd_t |
| |
| struct | layer_norm_fwd_t< dtype_x_, dtype_y_, dtype_weight_, dtype_acc_, layer_norm_attr_, store_for_bwd_, gpu_arch::Xe, ln_fwd_fused_op_ > |
| |
| class | multi_layer_perceptron_t |
| |
| struct | row_reduction_attr_t |
| | Sets up attribute of the row reduction. More...
|
| |
| struct | xetla_data_transformer |
| | Is the data_transformer functor. More...
|
| |
| struct | xetla_data_transformer< dtype_in_, dtype_out_, dtype_compute_, data_transformer_attr_, mem_layout_in_, need_fp8_op, gpu_arch::Xe > |
| | Is the data_transformer functor for Xe Each time, each thread will load sg_tile_m x sg_tile_n data into register and do the data convert. More...
|
| |
| struct | xetla_mha_attn_reg_bwd_t |
| |
| struct | xetla_mha_attn_reg_fwd_t |
| |
| struct | xetla_mha_core_attn_bwd_t |
| |
| struct | xetla_mha_core_attn_fwd_t |
| |
| struct | xetla_row_reduction_t |
| | Is the row_reduction functor. More...
|
| |
| struct | xetla_row_reduction_t< dtype_in_, dtype_out_, dtype_acc_, reduction_attr_, gpu_arch::Xe, fused_op_t_ > |
| | Is the row_reduction functor for Xe The idea is threads in group will cooperatively process matrix_m x wg_tile_n. More...
|
| |
|
| using | param_kslicing_g1l1_t = default_param_t::template update_t< elem_v_t< tune_key::global_kslicing_ratio, 1UL, uint32_t >, elem_v_t< tune_key::local_kslicing_ratio, 1UL, uint32_t >, elem_t_t< tune_key::wg_tile_shape, shape< 256, 256 > >, elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_t_t< tune_key::sg_tile_shape, shape< 64, 32 > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_kslicing > > |
| |
| using | param_kslicing_g2l1_t = default_param_t::template update_t< elem_v_t< tune_key::global_kslicing_ratio, 2UL, uint32_t >, elem_v_t< tune_key::local_kslicing_ratio, 1UL, uint32_t >, elem_t_t< tune_key::wg_tile_shape, shape< 256, 256 > >, elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_t_t< tune_key::sg_tile_shape, shape< 64, 32 > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_kslicing > > |
| |
| using | param_kslicing_g1l2_t = default_param_t::template update_t< elem_v_t< tune_key::global_kslicing_ratio, 1UL, uint32_t >, elem_v_t< tune_key::local_kslicing_ratio, 2UL, uint32_t >, elem_t_t< tune_key::wg_tile_shape, shape< 128, 64 > >, elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_t_t< tune_key::sg_tile_shape, shape< 32, 16 > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_kslicing > > |
| |