XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
gpu::xetla::detail Namespace Reference

Typedefs

using param_dtype_bf16_bf16_bf16 = dict_t< elem_t_t< tune_key::data_type_a, bf16 >, elem_t_t< tune_key::data_type_b, bf16 >, elem_t_t< tune_key::data_type_c, bf16 > >
 
using param_memalignment_8_8_8 = dict_t< elem_v_t< tune_key::memory_alignment_a, 8UL, uint32_t >, elem_v_t< tune_key::memory_alignment_b, 8UL, uint32_t >, elem_v_t< tune_key::memory_alignment_c, 8UL, uint32_t > >
 
using param_memlayout_rrr = dict_t< elem_v_t< tune_key::memory_layout_a, mem_layout::row_major >, elem_v_t< tune_key::memory_layout_b, mem_layout::row_major >, elem_v_t< tune_key::memory_layout_c, mem_layout::row_major > >
 
using param_memspace_ggg = dict_t< elem_v_t< tune_key::memory_space_a, mem_space::global >, elem_v_t< tune_key::memory_space_b, mem_space::global >, elem_v_t< tune_key::memory_space_c, mem_space::global > >
 
using param_performance_default = dict_t< elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_v_t< tune_key::prefetch_distance, 3UL, uint32_t >, elem_v_t< tune_key::periodic_sync_interval, 8UL, uint32_t > >
 
using param_runtime_default = dict_t< elem_v_t< tune_key::pre_processing, tune_key_value::pre_processing_default >, elem_v_t< tune_key::mma_engine, mma_engine::xmx >, elem_v_t< tune_key::gpu_arch, gpu_arch::Xe >, elem_t_t< tune_key::epilogue_policy, group::epilogue_policy_default< gpu_arch::Xe > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_default >, elem_t_t< tune_key::group_swizzle_policy, kernel::group_swizzle_default< gpu_arch::Xe > > >
 

Enumerations

enum class  lsc_action : uint8_t { prefetch , load , store , atomic }
 

Functions

template<typename dtype >
constexpr gpu::xetla::argument_type mma_argument_type ()
 convert normal data type to dpas argument type
 
template<>
constexpr gpu::xetla::argument_type mma_argument_type< tf32 > ()
 
template<>
constexpr gpu::xetla::argument_type mma_argument_type< float > ()
 
template<>
constexpr gpu::xetla::argument_type mma_argument_type< int8_t > ()
 
template<>
constexpr gpu::xetla::argument_type mma_argument_type< uint8_t > ()
 
template<>
constexpr gpu::xetla::argument_type mma_argument_type< bf16 > ()
 
template<>
constexpr gpu::xetla::argument_type mma_argument_type< fp16 > ()
 
template<gpu::xetla::argument_type arg_type>
constexpr __ESIMD_NS::xmx::dpas_argument_type get_argument_type ()
 lookup table for dpas argument type
 
constexpr __ESIMD_ENS::cache_hint get_cache_hint (gpu::xetla::cache_hint ch)
 lookup table for cache hint.
 
constexpr __ESIMD_ENS::lsc_data_size get_data_size (gpu::xetla::data_size ds)
 lookup table for data size.
 
constexpr __ESIMD_ENS::lsc_memory_kind get_memory_kind (gpu::xetla::memory_kind mk)
 lookup table for memory kind.
 
constexpr __ESIMD_ENS::lsc_fence_op get_fence_op (gpu::xetla::fence_op fo)
 lookup table for fence op.
 
constexpr __ESIMD_ENS::lsc_scope get_fence_scope (gpu::xetla::fence_scope fs)
 lookup table for fence scope.
 
constexpr __ESIMD_NS::atomic_op get_atomic_op (gpu::xetla::atomic_op ao)
 lookup table for atomic op.
 
template<uint32_t element_size>
constexpr uint32_t get_element_size_code ()
 Get the element size code object.
 
template<lsc_action Action, cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, void > check_lsc_cache_hint ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_load_cache_hint_code ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_prefetch_cache_hint_code ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_store_cache_hint_code ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > get_atomic_cache_hint_code ()
 
template<uint32_t num_channel>
constexpr uint32_t get_execSize_code ()
 
template<atomic_op Op>
constexpr uint32_t get_atomic_opcode ()
 
__XETLA_API void xetla_set_tensor_base_address (xetla_tdescriptor_ref desc, uint64_t base_address)
 
__XETLA_API void xetla_set_tensor_base_address (xetla_tdescriptor_ref desc, uint32_t base_address)
 
__XETLA_API uint64_t xetla_get_tensor_base_address (xetla_tdescriptor desc)
 
__XETLA_API void xetla_set_tensor_width_x (xetla_tdescriptor_ref desc, uint32_t width_x)
 
__XETLA_API uint32_t xetla_get_tensor_width_x (xetla_tdescriptor desc)
 
__XETLA_API void xetla_set_tensor_width_y (xetla_tdescriptor_ref desc, uint32_t width_y)
 
__XETLA_API uint32_t xetla_get_tensor_width_y (xetla_tdescriptor desc)
 
__XETLA_API void xetla_set_tensor_pitch_x (xetla_tdescriptor_ref desc, uint32_t pitch_x)
 
__XETLA_API uint32_t xetla_get_tensor_pitch_x (xetla_tdescriptor desc)
 
__XETLA_API void xetla_set_tensor_offset_x (xetla_tdescriptor_ref desc, int32_t offset_x)
 
__XETLA_API int32_t xetla_get_tensor_offset_x (xetla_tdescriptor desc)
 
__XETLA_API void xetla_set_tensor_offset_y (xetla_tdescriptor_ref desc, int32_t offset_y)
 
__XETLA_API int32_t xetla_get_tensor_offset_y (xetla_tdescriptor desc)
 
__XETLA_API void xetla_set_block_widthx_widthy_arrlen (xetla_tdescriptor_ref desc, uint32_t block_widthx_widthy_arrlen)
 
__XETLA_API uint8_t xetla_get_block_width_x (xetla_tdescriptor desc)
 
__XETLA_API uint8_t xetla_get_block_width_y (xetla_tdescriptor desc)
 
__XETLA_API uint8_t xetla_get_block_array_len (xetla_tdescriptor desc)
 

Typedef Documentation

◆ param_dtype_bf16_bf16_bf16

◆ param_memalignment_8_8_8

◆ param_memlayout_rrr

◆ param_memspace_ggg

◆ param_performance_default

◆ param_runtime_default

Enumeration Type Documentation

◆ lsc_action

enum class gpu::xetla::detail::lsc_action : uint8_t
strong
Enumerator
prefetch 
load 
store 
atomic 

Function Documentation

◆ check_lsc_cache_hint()

template<lsc_action Action, cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, void > gpu::xetla::detail::check_lsc_cache_hint ( )
constexpr

◆ get_argument_type()

template<gpu::xetla::argument_type arg_type>
constexpr __ESIMD_NS::xmx::dpas_argument_type gpu::xetla::detail::get_argument_type ( )
constexpr

lookup table for dpas argument type

◆ get_atomic_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_atomic_cache_hint_code ( )
constexpr

◆ get_atomic_op()

constexpr __ESIMD_NS::atomic_op gpu::xetla::detail::get_atomic_op ( gpu::xetla::atomic_op  ao)
constexpr

lookup table for atomic op.

◆ get_atomic_opcode()

template<atomic_op Op>
constexpr uint32_t gpu::xetla::detail::get_atomic_opcode ( )
constexpr

◆ get_cache_hint()

constexpr __ESIMD_ENS::cache_hint gpu::xetla::detail::get_cache_hint ( gpu::xetla::cache_hint  ch)
constexpr

lookup table for cache hint.

◆ get_data_size()

constexpr __ESIMD_ENS::lsc_data_size gpu::xetla::detail::get_data_size ( gpu::xetla::data_size  ds)
constexpr

lookup table for data size.

◆ get_element_size_code()

template<uint32_t element_size>
constexpr uint32_t gpu::xetla::detail::get_element_size_code ( )
constexpr

Get the element size code object.

Parameters
element_size
Returns
constexpr uint32_t

◆ get_execSize_code()

template<uint32_t num_channel>
constexpr uint32_t gpu::xetla::detail::get_execSize_code ( )
constexpr

◆ get_fence_op()

constexpr __ESIMD_ENS::lsc_fence_op gpu::xetla::detail::get_fence_op ( gpu::xetla::fence_op  fo)
constexpr

lookup table for fence op.

◆ get_fence_scope()

constexpr __ESIMD_ENS::lsc_scope gpu::xetla::detail::get_fence_scope ( gpu::xetla::fence_scope  fs)
constexpr

lookup table for fence scope.

◆ get_load_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_load_cache_hint_code ( )
constexpr

◆ get_memory_kind()

constexpr __ESIMD_ENS::lsc_memory_kind gpu::xetla::detail::get_memory_kind ( gpu::xetla::memory_kind  mk)
constexpr

lookup table for memory kind.

◆ get_prefetch_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_prefetch_cache_hint_code ( )
constexpr

◆ get_store_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_store_cache_hint_code ( )
constexpr

◆ mma_argument_type()

template<typename dtype >
constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type ( )
constexpr

convert normal data type to dpas argument type

◆ mma_argument_type< bf16 >()

◆ mma_argument_type< float >()

template<>
constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< float > ( )
constexpr

◆ mma_argument_type< fp16 >()

◆ mma_argument_type< int8_t >()

template<>
constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< int8_t > ( )
constexpr

◆ mma_argument_type< tf32 >()

◆ mma_argument_type< uint8_t >()

template<>
constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< uint8_t > ( )
constexpr

◆ xetla_get_block_array_len()

__XETLA_API uint8_t gpu::xetla::detail::xetla_get_block_array_len ( xetla_tdescriptor  desc)

◆ xetla_get_block_width_x()

__XETLA_API uint8_t gpu::xetla::detail::xetla_get_block_width_x ( xetla_tdescriptor  desc)

◆ xetla_get_block_width_y()

__XETLA_API uint8_t gpu::xetla::detail::xetla_get_block_width_y ( xetla_tdescriptor  desc)

◆ xetla_get_tensor_base_address()

__XETLA_API uint64_t gpu::xetla::detail::xetla_get_tensor_base_address ( xetla_tdescriptor  desc)

◆ xetla_get_tensor_offset_x()

__XETLA_API int32_t gpu::xetla::detail::xetla_get_tensor_offset_x ( xetla_tdescriptor  desc)

◆ xetla_get_tensor_offset_y()

__XETLA_API int32_t gpu::xetla::detail::xetla_get_tensor_offset_y ( xetla_tdescriptor  desc)

◆ xetla_get_tensor_pitch_x()

__XETLA_API uint32_t gpu::xetla::detail::xetla_get_tensor_pitch_x ( xetla_tdescriptor  desc)

◆ xetla_get_tensor_width_x()

__XETLA_API uint32_t gpu::xetla::detail::xetla_get_tensor_width_x ( xetla_tdescriptor  desc)

◆ xetla_get_tensor_width_y()

__XETLA_API uint32_t gpu::xetla::detail::xetla_get_tensor_width_y ( xetla_tdescriptor  desc)

◆ xetla_set_block_widthx_widthy_arrlen()

__XETLA_API void gpu::xetla::detail::xetla_set_block_widthx_widthy_arrlen ( xetla_tdescriptor_ref  desc,
uint32_t  block_widthx_widthy_arrlen 
)

◆ xetla_set_tensor_base_address() [1/2]

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_base_address ( xetla_tdescriptor_ref  desc,
uint32_t  base_address 
)

◆ xetla_set_tensor_base_address() [2/2]

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_base_address ( xetla_tdescriptor_ref  desc,
uint64_t  base_address 
)

◆ xetla_set_tensor_offset_x()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_offset_x ( xetla_tdescriptor_ref  desc,
int32_t  offset_x 
)

◆ xetla_set_tensor_offset_y()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_offset_y ( xetla_tdescriptor_ref  desc,
int32_t  offset_y 
)

◆ xetla_set_tensor_pitch_x()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_pitch_x ( xetla_tdescriptor_ref  desc,
uint32_t  pitch_x 
)

◆ xetla_set_tensor_width_x()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_width_x ( xetla_tdescriptor_ref  desc,
uint32_t  width_x 
)

◆ xetla_set_tensor_width_y()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_width_y ( xetla_tdescriptor_ref  desc,
uint32_t  width_y 
)