C++ API. More...

#include "common/utils/common.hpp"
#include "common/utils/limitation.hpp"
#include "common/utils/tensor_descriptor.hpp"

Include dependency graph for raw_send_load_store.hpp:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces
namespace	gpu

namespace	gpu::xetla

Functions
template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1>
__XETLA_API void	gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
	Tensor descriptor construction(global memory version).

template<typename Ty >
__XETLA_API void	gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
	Tensor descriptor construction(local memory version).

template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1>
__XETLA_API xetla_tdescriptor	gpu::xetla::xetla_get_tdesc (Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
	Generate a new tensor descriptor(global memory version).

template<typename Ty >
__XETLA_API xetla_tdescriptor	gpu::xetla::xetla_get_tdesc (uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
	Generate a new tensor descriptor(local memory version).

__XETLA_API void	gpu::xetla::xetla_update_tdesc_offsetx (xetla_tdescriptor_ref tdesc, int32_t doffset_x)
	Update the x coordinate in the given tensor descriptor.

__XETLA_API void	gpu::xetla::xetla_update_tdesc_offsety (xetla_tdescriptor_ref tdesc, int32_t doffset_y)
	Update the y coordinate in the given tensor descriptor.

template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, bool transpose = false, bool transform = false, gpu_arch arch_tag = gpu_arch::Xe>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, xetla_vector< Ty, N > >	gpu::xetla::xetla_tload_global (xetla_tdescriptor tdesc)
	Tensor load API.

template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, gpu_arch arch_tag = gpu_arch::Xe>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void >	gpu::xetla::xetla_tstore_global (xetla_tdescriptor tdesc, xetla_vector< Ty, N > data)
	Tensor store API.

template<typename Ty , cache_hint L1H = cache_hint::cached, cache_hint L2H = cache_hint::cached, gpu_arch arch_tag = gpu_arch::Xe>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void >	gpu::xetla::xetla_tprefetch_global (xetla_tdescriptor tdesc)
	Tensor prefetch API.

template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, atomic_op Op, gpu_arch arch_tag = gpu_arch::Xe, typename Toffset = uint32_t>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void >	gpu::xetla::xetla_tatomic_store_global (uint64_t base_address, xetla_vector< Toffset, N > offset, xetla_vector< Ty, N > data, xetla_mask< N > pred=1)
	Tensor atomic store API.

Detailed Description

C++ API.

Namespaces

Functions

Detailed Description