XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
raw_send_load_store.hpp File Reference

C++ API. More...

Include dependency graph for raw_send_load_store.hpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces

namespace  gpu
 
namespace  gpu::xetla
 

Functions

template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1>
__XETLA_API void gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
 Tensor descriptor construction(global memory version).
 
template<typename Ty >
__XETLA_API void gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
 Tensor descriptor construction(local memory version).
 
template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1>
__XETLA_API xetla_tdescriptor gpu::xetla::xetla_get_tdesc (Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
 Generate a new tensor descriptor(global memory version).
 
template<typename Ty >
__XETLA_API xetla_tdescriptor gpu::xetla::xetla_get_tdesc (uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y)
 Generate a new tensor descriptor(local memory version).
 
__XETLA_API void gpu::xetla::xetla_update_tdesc_offsetx (xetla_tdescriptor_ref tdesc, int32_t doffset_x)
 Update the x coordinate in the given tensor descriptor.
 
__XETLA_API void gpu::xetla::xetla_update_tdesc_offsety (xetla_tdescriptor_ref tdesc, int32_t doffset_y)
 Update the y coordinate in the given tensor descriptor.
 
template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, bool transpose = false, bool transform = false, gpu_arch arch_tag = gpu_arch::Xe>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, xetla_vector< Ty, N > > gpu::xetla::xetla_tload_global (xetla_tdescriptor tdesc)
 Tensor load API.
 
template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, gpu_arch arch_tag = gpu_arch::Xe>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > gpu::xetla::xetla_tstore_global (xetla_tdescriptor tdesc, xetla_vector< Ty, N > data)
 Tensor store API.
 
template<typename Ty , cache_hint L1H = cache_hint::cached, cache_hint L2H = cache_hint::cached, gpu_arch arch_tag = gpu_arch::Xe>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > gpu::xetla::xetla_tprefetch_global (xetla_tdescriptor tdesc)
 Tensor prefetch API.
 
template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, atomic_op Op, gpu_arch arch_tag = gpu_arch::Xe, typename Toffset = uint32_t>
__XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > gpu::xetla::xetla_tatomic_store_global (uint64_t base_address, xetla_vector< Toffset, N > offset, xetla_vector< Ty, N > data, xetla_mask< N > pred=1)
 Tensor atomic store API.
 

Detailed Description

C++ API.