C++ API. More...
#include "common/utils/common.hpp"#include "common/utils/limitation.hpp"#include "common/utils/tensor_descriptor.hpp"

Go to the source code of this file.
Namespaces | |
| namespace | gpu |
| namespace | gpu::xetla |
Functions | |
| template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1> | |
| __XETLA_API void | gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| Tensor descriptor construction(global memory version). | |
| template<typename Ty > | |
| __XETLA_API void | gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| Tensor descriptor construction(local memory version). | |
| template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1> | |
| __XETLA_API xetla_tdescriptor | gpu::xetla::xetla_get_tdesc (Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| Generate a new tensor descriptor(global memory version). | |
| template<typename Ty > | |
| __XETLA_API xetla_tdescriptor | gpu::xetla::xetla_get_tdesc (uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| Generate a new tensor descriptor(local memory version). | |
| __XETLA_API void | gpu::xetla::xetla_update_tdesc_offsetx (xetla_tdescriptor_ref tdesc, int32_t doffset_x) |
| Update the x coordinate in the given tensor descriptor. | |
| __XETLA_API void | gpu::xetla::xetla_update_tdesc_offsety (xetla_tdescriptor_ref tdesc, int32_t doffset_y) |
| Update the y coordinate in the given tensor descriptor. | |
| template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, bool transpose = false, bool transform = false, gpu_arch arch_tag = gpu_arch::Xe> | |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, xetla_vector< Ty, N > > | gpu::xetla::xetla_tload_global (xetla_tdescriptor tdesc) |
| Tensor load API. | |
| template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, gpu_arch arch_tag = gpu_arch::Xe> | |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > | gpu::xetla::xetla_tstore_global (xetla_tdescriptor tdesc, xetla_vector< Ty, N > data) |
| Tensor store API. | |
| template<typename Ty , cache_hint L1H = cache_hint::cached, cache_hint L2H = cache_hint::cached, gpu_arch arch_tag = gpu_arch::Xe> | |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > | gpu::xetla::xetla_tprefetch_global (xetla_tdescriptor tdesc) |
| Tensor prefetch API. | |
| template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, atomic_op Op, gpu_arch arch_tag = gpu_arch::Xe, typename Toffset = uint32_t> | |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > | gpu::xetla::xetla_tatomic_store_global (uint64_t base_address, xetla_vector< Toffset, N > offset, xetla_vector< Ty, N > data, xetla_mask< N > pred=1) |
| Tensor atomic store API. | |
C++ API.