|
| template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1> |
| __XETLA_API void | gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| | Tensor descriptor construction(global memory version).
|
| |
| template<typename Ty > |
| __XETLA_API void | gpu::xetla::xetla_fill_tdesc (xetla_tdescriptor_ref tdesc, uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| | Tensor descriptor construction(local memory version).
|
| |
| template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1> |
| __XETLA_API xetla_tdescriptor | gpu::xetla::xetla_get_tdesc (Ty *p, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| | Generate a new tensor descriptor(global memory version).
|
| |
| template<typename Ty > |
| __XETLA_API xetla_tdescriptor | gpu::xetla::xetla_get_tdesc (uint32_t base_address, int tensor_width, int tensor_height, int tensor_pitch, int offset_x, int offset_y) |
| | Generate a new tensor descriptor(local memory version).
|
| |
| __XETLA_API void | gpu::xetla::xetla_update_tdesc_offsetx (xetla_tdescriptor_ref tdesc, int32_t doffset_x) |
| | Update the x coordinate in the given tensor descriptor.
|
| |
| __XETLA_API void | gpu::xetla::xetla_update_tdesc_offsety (xetla_tdescriptor_ref tdesc, int32_t doffset_y) |
| | Update the y coordinate in the given tensor descriptor.
|
| |
| template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, bool transpose = false, bool transform = false, gpu_arch arch_tag = gpu_arch::Xe> |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, xetla_vector< Ty, N > > | gpu::xetla::xetla_tload_global (xetla_tdescriptor tdesc) |
| | Tensor load API.
|
| |
| template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, gpu_arch arch_tag = gpu_arch::Xe> |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > | gpu::xetla::xetla_tstore_global (xetla_tdescriptor tdesc, xetla_vector< Ty, N > data) |
| | Tensor store API.
|
| |
| template<typename Ty , cache_hint L1H = cache_hint::cached, cache_hint L2H = cache_hint::cached, gpu_arch arch_tag = gpu_arch::Xe> |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > | gpu::xetla::xetla_tprefetch_global (xetla_tdescriptor tdesc) |
| | Tensor prefetch API.
|
| |
| template<typename Ty , uint32_t N, cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none, atomic_op Op, gpu_arch arch_tag = gpu_arch::Xe, typename Toffset = uint32_t> |
| __XETLA_API std::enable_if_t< arch_tag==gpu_arch::Xe, void > | gpu::xetla::xetla_tatomic_store_global (uint64_t base_address, xetla_vector< Toffset, N > offset, xetla_vector< Ty, N > data, xetla_mask< N > pred=1) |
| | Tensor atomic store API.
|
| |
Implements the tensor load store functionality using raw send instructions.
template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1>
| __XETLA_API void gpu::xetla::xetla_fill_tdesc |
( |
xetla_tdescriptor_ref |
tdesc, |
|
|
Ty * |
p, |
|
|
int |
tensor_width, |
|
|
int |
tensor_height, |
|
|
int |
tensor_pitch, |
|
|
int |
offset_x, |
|
|
int |
offset_y |
|
) |
| |
Tensor descriptor construction(global memory version).
Constructs a tensor descriptor based on the given arguments, check here for more details.
- Template Parameters
-
| Ty | is the data type per element. |
| block_width | is the width of the block to be loaded. |
| block_height | is the height of the block to be loaded. |
| array_len | is the array length of the block to be loaded. |
- Parameters
-
| tdesc | [in|out] is the reference of tensor descriptor. |
| p | [in] is the base address pointer of the tensor. |
| tensor_width | [in] is the width of the tensor. |
| tensor_height | [in] is the height of the tensor. |
| tensor_pitch | [in] is the pitch(physical width of tensor in memory). |
| offset_x | [in] is the x coordinate of the start point. |
| offset_y | [in] is the y coordinate of the start point. |
template<typename Ty , uint32_t block_width = 1, uint32_t block_height = 1, uint8_t array_len = 1>
| __XETLA_API xetla_tdescriptor gpu::xetla::xetla_get_tdesc |
( |
Ty * |
p, |
|
|
int |
tensor_width, |
|
|
int |
tensor_height, |
|
|
int |
tensor_pitch, |
|
|
int |
offset_x, |
|
|
int |
offset_y |
|
) |
| |
Generate a new tensor descriptor(global memory version).
Generate a tensor descriptor based on the given arguments, check here for more details.
- Template Parameters
-
| Ty | is the data type per element. |
| block_width | is the width of the block to be loaded. |
| block_height | is the height of the block to be loaded. |
| array_len | is the array length of the block to be loaded. |
- Parameters
-
| p | [in] is the base address pointer of the tensor. |
| tensor_width | [in] is the width of the tensor. |
| tensor_height | [in] is the height of the tensor. |
| tensor_pitch | [in] is the pitch(physical width of tensor in memory). |
| offset_x | [in] is the x coordinate of the start point. |
| offset_y | [in] is the y coordinate of the start point. |
- Returns
- return a new tensor