Typedefs
using	param_dtype_bf16_bf16_bf16 = dict_t< elem_t_t< tune_key::data_type_a, bf16 >, elem_t_t< tune_key::data_type_b, bf16 >, elem_t_t< tune_key::data_type_c, bf16 > >

using	param_memalignment_8_8_8 = dict_t< elem_v_t< tune_key::memory_alignment_a, 8UL, uint32_t >, elem_v_t< tune_key::memory_alignment_b, 8UL, uint32_t >, elem_v_t< tune_key::memory_alignment_c, 8UL, uint32_t > >

using	param_memlayout_rrr = dict_t< elem_v_t< tune_key::memory_layout_a, mem_layout::row_major >, elem_v_t< tune_key::memory_layout_b, mem_layout::row_major >, elem_v_t< tune_key::memory_layout_c, mem_layout::row_major > >

using	param_memspace_ggg = dict_t< elem_v_t< tune_key::memory_space_a, mem_space::global >, elem_v_t< tune_key::memory_space_b, mem_space::global >, elem_v_t< tune_key::memory_space_c, mem_space::global > >

using	param_performance_default = dict_t< elem_v_t< tune_key::wg_tile_k, 32UL, uint32_t >, elem_v_t< tune_key::prefetch_distance, 3UL, uint32_t >, elem_v_t< tune_key::periodic_sync_interval, 8UL, uint32_t > >

using	param_runtime_default = dict_t< elem_v_t< tune_key::pre_processing, tune_key_value::pre_processing_default >, elem_v_t< tune_key::mma_engine, mma_engine::xmx >, elem_v_t< tune_key::gpu_arch, gpu_arch::Xe >, elem_t_t< tune_key::epilogue_policy, group::epilogue_policy_default< gpu_arch::Xe > >, elem_v_t< tune_key::dispatch_policy, tune_key_value::dispatch_policy_default >, elem_t_t< tune_key::group_swizzle_policy, kernel::group_swizzle_default< gpu_arch::Xe > > >

Enumerations
enum class	lsc_action : uint8_t { prefetch , load , store , atomic }

Functions
template<typename dtype >
constexpr gpu::xetla::argument_type	mma_argument_type ()
	convert normal data type to dpas argument type

template<>
constexpr gpu::xetla::argument_type	mma_argument_type< tf32 > ()

template<>
constexpr gpu::xetla::argument_type	mma_argument_type< float > ()

template<>
constexpr gpu::xetla::argument_type	mma_argument_type< int8_t > ()

template<>
constexpr gpu::xetla::argument_type	mma_argument_type< uint8_t > ()

template<>
constexpr gpu::xetla::argument_type	mma_argument_type< bf16 > ()

template<>
constexpr gpu::xetla::argument_type	mma_argument_type< fp16 > ()

template<gpu::xetla::argument_type arg_type>
constexpr __ESIMD_NS::xmx::dpas_argument_type	get_argument_type ()
	lookup table for dpas argument type

constexpr __ESIMD_ENS::cache_hint	get_cache_hint (gpu::xetla::cache_hint ch)
	lookup table for cache hint.

constexpr __ESIMD_ENS::lsc_data_size	get_data_size (gpu::xetla::data_size ds)
	lookup table for data size.

constexpr __ESIMD_ENS::lsc_memory_kind	get_memory_kind (gpu::xetla::memory_kind mk)
	lookup table for memory kind.

constexpr __ESIMD_ENS::lsc_fence_op	get_fence_op (gpu::xetla::fence_op fo)
	lookup table for fence op.

constexpr __ESIMD_ENS::lsc_scope	get_fence_scope (gpu::xetla::fence_scope fs)
	lookup table for fence scope.

constexpr __ESIMD_NS::atomic_op	get_atomic_op (gpu::xetla::atomic_op ao)
	lookup table for atomic op.

template<uint32_t element_size>
constexpr uint32_t	get_element_size_code ()
	Get the element size code object.

template<lsc_action Action, cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, void >	check_lsc_cache_hint ()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t >	get_load_cache_hint_code ()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t >	get_prefetch_cache_hint_code ()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t >	get_store_cache_hint_code ()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t >	get_atomic_cache_hint_code ()

template<uint32_t num_channel>
constexpr uint32_t	get_execSize_code ()

template<atomic_op Op>
constexpr uint32_t	get_atomic_opcode ()

__XETLA_API void	xetla_set_tensor_base_address (xetla_tdescriptor_ref desc, uint64_t base_address)

__XETLA_API void	xetla_set_tensor_base_address (xetla_tdescriptor_ref desc, uint32_t base_address)

__XETLA_API uint64_t	xetla_get_tensor_base_address (xetla_tdescriptor desc)

__XETLA_API void	xetla_set_tensor_width_x (xetla_tdescriptor_ref desc, uint32_t width_x)

__XETLA_API uint32_t	xetla_get_tensor_width_x (xetla_tdescriptor desc)

__XETLA_API void	xetla_set_tensor_width_y (xetla_tdescriptor_ref desc, uint32_t width_y)

__XETLA_API uint32_t	xetla_get_tensor_width_y (xetla_tdescriptor desc)

__XETLA_API void	xetla_set_tensor_pitch_x (xetla_tdescriptor_ref desc, uint32_t pitch_x)

__XETLA_API uint32_t	xetla_get_tensor_pitch_x (xetla_tdescriptor desc)

__XETLA_API void	xetla_set_tensor_offset_x (xetla_tdescriptor_ref desc, int32_t offset_x)

__XETLA_API int32_t	xetla_get_tensor_offset_x (xetla_tdescriptor desc)

__XETLA_API void	xetla_set_tensor_offset_y (xetla_tdescriptor_ref desc, int32_t offset_y)

__XETLA_API int32_t	xetla_get_tensor_offset_y (xetla_tdescriptor desc)

__XETLA_API void	xetla_set_block_widthx_widthy_arrlen (xetla_tdescriptor_ref desc, uint32_t block_widthx_widthy_arrlen)

__XETLA_API uint8_t	xetla_get_block_width_x (xetla_tdescriptor desc)

__XETLA_API uint8_t	xetla_get_block_width_y (xetla_tdescriptor desc)

__XETLA_API uint8_t	xetla_get_block_array_len (xetla_tdescriptor desc)

Typedef Documentation

◆ param_dtype_bf16_bf16_bf16

using gpu::xetla::detail::param_dtype_bf16_bf16_bf16 = typedef dict_t<elem_t_t<tune_key::data_type_a, bf16>, elem_t_t<tune_key::data_type_b, bf16>, elem_t_t<tune_key::data_type_c, bf16> >

◆ param_memalignment_8_8_8

using gpu::xetla::detail::param_memalignment_8_8_8 = typedef dict_t<elem_v_t<tune_key::memory_alignment_a, 8UL, uint32_t>, elem_v_t<tune_key::memory_alignment_b, 8UL, uint32_t>, elem_v_t<tune_key::memory_alignment_c, 8UL, uint32_t> >

◆ param_memlayout_rrr

using gpu::xetla::detail::param_memlayout_rrr = typedef dict_t<elem_v_t<tune_key::memory_layout_a, mem_layout::row_major>, elem_v_t<tune_key::memory_layout_b, mem_layout::row_major>, elem_v_t<tune_key::memory_layout_c, mem_layout::row_major> >

◆ param_memspace_ggg

using gpu::xetla::detail::param_memspace_ggg = typedef dict_t<elem_v_t<tune_key::memory_space_a, mem_space::global>, elem_v_t<tune_key::memory_space_b, mem_space::global>, elem_v_t<tune_key::memory_space_c, mem_space::global> >

◆ param_performance_default

using gpu::xetla::detail::param_performance_default = typedef dict_t<elem_v_t<tune_key::wg_tile_k, 32UL, uint32_t>, elem_v_t<tune_key::prefetch_distance, 3UL, uint32_t>, elem_v_t<tune_key::periodic_sync_interval, 8UL, uint32_t> >

◆ param_runtime_default

Enumeration Type Documentation

◆ lsc_action

enum class gpu::xetla::detail::lsc_action : uint8_t

strong

Enumerator
prefetch
load
store
atomic

Function Documentation

◆ check_lsc_cache_hint()

template<lsc_action Action, cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>

constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, void > gpu::xetla::detail::check_lsc_cache_hint ( )

constexpr

◆ get_argument_type()

template<gpu::xetla::argument_type arg_type>

constexpr __ESIMD_NS::xmx::dpas_argument_type gpu::xetla::detail::get_argument_type ( )

constexpr

lookup table for dpas argument type

◆ get_atomic_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>

constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_atomic_cache_hint_code ( )

constexpr

◆ get_atomic_op()

constexpr __ESIMD_NS::atomic_op gpu::xetla::detail::get_atomic_op ( gpu::xetla::atomic_op ao )

constexpr

lookup table for atomic op.

◆ get_atomic_opcode()

template<atomic_op Op>

constexpr uint32_t gpu::xetla::detail::get_atomic_opcode ( )

constexpr

◆ get_cache_hint()

constexpr __ESIMD_ENS::cache_hint gpu::xetla::detail::get_cache_hint ( gpu::xetla::cache_hint ch )

constexpr

lookup table for cache hint.

◆ get_data_size()

constexpr __ESIMD_ENS::lsc_data_size gpu::xetla::detail::get_data_size ( gpu::xetla::data_size ds )

constexpr

lookup table for data size.

◆ get_element_size_code()

template<uint32_t element_size>

constexpr uint32_t gpu::xetla::detail::get_element_size_code ( )

constexpr

Get the element size code object.

Parameters

element_size

Returns: constexpr uint32_t

◆ get_execSize_code()

template<uint32_t num_channel>

constexpr uint32_t gpu::xetla::detail::get_execSize_code ( )

constexpr

◆ get_fence_op()

constexpr __ESIMD_ENS::lsc_fence_op gpu::xetla::detail::get_fence_op ( gpu::xetla::fence_op fo )

constexpr

lookup table for fence op.

◆ get_fence_scope()

constexpr __ESIMD_ENS::lsc_scope gpu::xetla::detail::get_fence_scope ( gpu::xetla::fence_scope fs )

constexpr

lookup table for fence scope.

◆ get_load_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>

constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_load_cache_hint_code ( )

constexpr

◆ get_memory_kind()

constexpr __ESIMD_ENS::lsc_memory_kind gpu::xetla::detail::get_memory_kind ( gpu::xetla::memory_kind mk )

constexpr

lookup table for memory kind.

◆ get_prefetch_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>

constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_prefetch_cache_hint_code ( )

constexpr

◆ get_store_cache_hint_code()

template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>

constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_store_cache_hint_code ( )

constexpr

◆ mma_argument_type()

template<typename dtype >

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type ( )

constexpr

convert normal data type to dpas argument type

◆ mma_argument_type< bf16 >()

template<>

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< bf16 > ( )

constexpr

◆ mma_argument_type< float >()

template<>

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< float > ( )

constexpr

◆ mma_argument_type< fp16 >()

template<>

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< fp16 > ( )

constexpr

◆ mma_argument_type< int8_t >()

template<>

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< int8_t > ( )

constexpr

◆ mma_argument_type< tf32 >()

template<>

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< tf32 > ( )

constexpr

◆ mma_argument_type< uint8_t >()

template<>

constexpr gpu::xetla::argument_type gpu::xetla::detail::mma_argument_type< uint8_t > ( )

constexpr

◆ xetla_get_block_array_len()

__XETLA_API uint8_t gpu::xetla::detail::xetla_get_block_array_len ( xetla_tdescriptor desc )

◆ xetla_get_block_width_x()

__XETLA_API uint8_t gpu::xetla::detail::xetla_get_block_width_x ( xetla_tdescriptor desc )

◆ xetla_get_block_width_y()

__XETLA_API uint8_t gpu::xetla::detail::xetla_get_block_width_y ( xetla_tdescriptor desc )

◆ xetla_get_tensor_base_address()

__XETLA_API uint64_t gpu::xetla::detail::xetla_get_tensor_base_address ( xetla_tdescriptor desc )

◆ xetla_get_tensor_offset_x()

__XETLA_API int32_t gpu::xetla::detail::xetla_get_tensor_offset_x ( xetla_tdescriptor desc )

◆ xetla_get_tensor_offset_y()

__XETLA_API int32_t gpu::xetla::detail::xetla_get_tensor_offset_y ( xetla_tdescriptor desc )

◆ xetla_get_tensor_pitch_x()

__XETLA_API uint32_t gpu::xetla::detail::xetla_get_tensor_pitch_x ( xetla_tdescriptor desc )

◆ xetla_get_tensor_width_x()

__XETLA_API uint32_t gpu::xetla::detail::xetla_get_tensor_width_x ( xetla_tdescriptor desc )

◆ xetla_get_tensor_width_y()

__XETLA_API uint32_t gpu::xetla::detail::xetla_get_tensor_width_y ( xetla_tdescriptor desc )

◆ xetla_set_block_widthx_widthy_arrlen()

__XETLA_API void gpu::xetla::detail::xetla_set_block_widthx_widthy_arrlen	(	xetla_tdescriptor_ref	desc,
		uint32_t	block_widthx_widthy_arrlen
	)

◆ xetla_set_tensor_base_address() [1/2]

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_base_address	(	xetla_tdescriptor_ref	desc,
		uint32_t	base_address
	)

◆ xetla_set_tensor_base_address() [2/2]

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_base_address	(	xetla_tdescriptor_ref	desc,
		uint64_t	base_address
	)

◆ xetla_set_tensor_offset_x()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_offset_x	(	xetla_tdescriptor_ref	desc,
		int32_t	offset_x
	)

◆ xetla_set_tensor_offset_y()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_offset_y	(	xetla_tdescriptor_ref	desc,
		int32_t	offset_y
	)

◆ xetla_set_tensor_pitch_x()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_pitch_x	(	xetla_tdescriptor_ref	desc,
		uint32_t	pitch_x
	)

◆ xetla_set_tensor_width_x()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_width_x	(	xetla_tdescriptor_ref	desc,
		uint32_t	width_x
	)

◆ xetla_set_tensor_width_y()

__XETLA_API void gpu::xetla::detail::xetla_set_tensor_width_y	(	xetla_tdescriptor_ref	desc,
		uint32_t	width_y
	)

Typedefs

Enumerations

Functions

Typedef Documentation

◆ param_dtype_bf16_bf16_bf16

◆ param_memalignment_8_8_8

◆ param_memlayout_rrr

◆ param_memspace_ggg

◆ param_performance_default

◆ param_runtime_default

Enumeration Type Documentation

◆ lsc_action

Function Documentation

◆ check_lsc_cache_hint()

◆ get_argument_type()

◆ get_atomic_cache_hint_code()

◆ get_atomic_op()

◆ get_atomic_opcode()

◆ get_cache_hint()

◆ get_data_size()

◆ get_element_size_code()

◆ get_execSize_code()

◆ get_fence_op()

◆ get_fence_scope()

◆ get_load_cache_hint_code()

◆ get_memory_kind()

◆ get_prefetch_cache_hint_code()

◆ get_store_cache_hint_code()

◆ mma_argument_type()

◆ mma_argument_type< bf16 >()

◆ mma_argument_type< float >()

◆ mma_argument_type< fp16 >()

◆ mma_argument_type< int8_t >()

◆ mma_argument_type< tf32 >()

◆ mma_argument_type< uint8_t >()

◆ xetla_get_block_array_len()

◆ xetla_get_block_width_x()

◆ xetla_get_block_width_y()

◆ xetla_get_tensor_base_address()

◆ xetla_get_tensor_offset_x()

◆ xetla_get_tensor_offset_y()

◆ xetla_get_tensor_pitch_x()

◆ xetla_get_tensor_width_x()

◆ xetla_get_tensor_width_y()

◆ xetla_set_block_widthx_widthy_arrlen()

◆ xetla_set_tensor_base_address() [1/2]

◆ xetla_set_tensor_base_address() [2/2]

◆ xetla_set_tensor_offset_x()

◆ xetla_set_tensor_offset_y()

◆ xetla_set_tensor_pitch_x()

◆ xetla_set_tensor_width_x()

◆ xetla_set_tensor_width_y()