XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
common.hpp File Reference

C++ API. More...

Include dependency graph for common.hpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces

namespace  gpu
 
namespace  gpu::xetla
 
namespace  gpu::xetla::detail
 

Enumerations

enum class  gpu::xetla::detail::lsc_action : uint8_t { gpu::xetla::detail::prefetch , gpu::xetla::detail::load , gpu::xetla::detail::store , gpu::xetla::detail::atomic }
 
enum class  gpu::xetla::reg_layout : uint8_t {
  gpu::xetla::linear = 0 , gpu::xetla::tiled = 1 , gpu::xetla::vnni_tiled = 2 , gpu::xetla::transpose_tiled = 3 ,
  gpu::xetla::vnni_tiled_col_major = 4
}
 tile layout in register linear: linear layout with one tile tiled: 2d block stacked in raster order vnni_tiled: vnni pack with 2d block and 2d block stacked in raster order for dword and qword, there is no impact for word, two rows are interleaved, i.e. More...
 
enum class  gpu::xetla::store_op : uint8_t {
  gpu::xetla::normal = 0 , gpu::xetla::atomic_fadd = 1 , gpu::xetla::atomic_iadd = 2 , gpu::xetla::scattered_transpose = 3 ,
  gpu::xetla::block_1d = 4
}
 
enum class  gpu::xetla::mma_engine : uint8_t { gpu::xetla::xmx = 0 , gpu::xetla::fpu = 1 }
 
enum class  gpu::xetla::memory_op : uint8_t { gpu::xetla::load = 0 , gpu::xetla::store = 1 }
 
enum class  gpu::xetla::tdesc_update_dir : uint8_t { gpu::xetla::x_dir = 0 , gpu::xetla::y_dir = 1 }
 
enum class  gpu::xetla::post_kind : uint8_t {
  gpu::xetla::none = 0 , gpu::xetla::relu = 1 , gpu::xetla::gelu = 2 , gpu::xetla::gelu_bwd_w = 3 ,
  gpu::xetla::sigmoid = 4 , gpu::xetla::tanh = 5
}
 
enum class  gpu::xetla::pre_kind : uint8_t { gpu::xetla::none = 0 , gpu::xetla::bias_add = 1 , gpu::xetla::res_add = 2 }
 
enum class  gpu::xetla::offset_mode : uint8_t { gpu::xetla::const_offset = 0 , gpu::xetla::cyclic_offset = 1 , gpu::xetla::acyclic_offset = 2 }
 

Functions

template<uint32_t element_size>
constexpr uint32_t gpu::xetla::detail::get_element_size_code ()
 Get the element size code object.
 
template<lsc_action Action, cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, void > gpu::xetla::detail::check_lsc_cache_hint ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_load_cache_hint_code ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_prefetch_cache_hint_code ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_store_cache_hint_code ()
 
template<cache_hint L1H, cache_hint L2H, gpu_arch arch_tag>
constexpr std::enable_if_t< arch_tag==gpu_arch::Xe, uint32_t > gpu::xetla::detail::get_atomic_cache_hint_code ()
 
template<uint32_t num_channel>
constexpr uint32_t gpu::xetla::detail::get_execSize_code ()
 
template<atomic_op Op>
constexpr uint32_t gpu::xetla::detail::get_atomic_opcode ()
 
template<typename kernel_t >
void gpu::xetla::slm_barrier_init ()
 Initial the local memory size and named barrier count with kernel_t.
 
template<uint32_t slm_size, uint32_t nbarrier_count>
void gpu::xetla::slm_barrier_init ()
 Initial the local memory size and named barrier count.
 

Detailed Description

C++ API.