DPC++ Runtime
Runtime libraries for oneAPI DPC++
matrix-aot-amx.hpp File Reference
#include <CL/sycl/detail/defines_elementary.hpp>
#include <immintrin.h>
Include dependency graph for matrix-aot-amx.hpp:

Go to the source code of this file.

Classes

class  cl::sycl::ext::intel::detail::submatrix< T >
 
struct  cl::sycl::ext::intel::detail::elems_per_dword< T >
 
struct  cl::sycl::ext::intel::experimental::matrix::joint_matrix< Group, T, NumRows, NumCols, Layout, Enabled >
 
struct  cl::sycl::ext::intel::experimental::matrix::enable_if<!((NumRows<=tile_size) &&(NumCols *sizeof(T)/4<=tile_size) &&(Layout !=matrix_layout::col_major))>::type >
 
struct  cl::sycl::ext::intel::experimental::matrix::enable_if<(NumRows<=tile_size) &&(NumCols *sizeof(T)/4<=tile_size)>::type >
 

Namespaces

 cl
 We provide new interfaces for matrix muliply in this patch:
 
 cl::sycl
 
 cl::sycl::ext
 
 cl::sycl::ext::intel
 
 cl::sycl::ext::intel::detail
 
 cl::sycl::ext::intel::experimental
 
 cl::sycl::ext::intel::experimental::matrix
 

Macros

#define ELEMS_PER_DWORD(TYPE, NUM)
 

Enumerations

enum  cl::sycl::ext::intel::experimental::matrix::matrix_layout { cl::sycl::ext::intel::experimental::matrix::matrix_layout::row_major, cl::sycl::ext::intel::experimental::matrix::matrix_layout::col_major, cl::sycl::ext::intel::experimental::matrix::matrix_layout::packed_a, cl::sycl::ext::intel::experimental::matrix::matrix_layout::packed_b }
 

Functions

static _tile1024i cl::sycl::ext::intel::experimental::matrix::tileloadd64_internal (short row, short col, char *buf, size_t stride)
 
static _tile1024i cl::sycl::ext::intel::experimental::matrix::tdpbssd_internal (unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2)
 
static _tile1024i cl::sycl::ext::intel::experimental::matrix::tdpbf16ps_internal (unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, _tile1024i src1, _tile1024i src2)
 
static void cl::sycl::ext::intel::experimental::matrix::tilestored64_internal (short row, short col, char *buf, size_t stride, _tile1024i tile)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols, matrix::matrix_layout Layout>
static __SYCL_ALWAYS_INLINE std::enable_if<(NumRows > matrix::tile_size)||(NumCols *sizeof(T)/4 > matrix::tile_size), void >::type cl::sycl::ext::intel::detail::submatrix_load (detail::submatrix< T > &sub_m, matrix::joint_matrix< Group, T, NumRows, NumCols, Layout > jm, uint32_t row, uint32_t col, size_t stride, matrix::matrix_layout layout, bool shouldreload)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols, matrix::matrix_layout Layout>
static __SYCL_ALWAYS_INLINE std::enable_if<(NumRows<=matrix::tile_size) &&(NumCols *sizeof(T)/4<=matrix::tile_size), void >::type cl::sycl::ext::intel::detail::submatrix_load (detail::submatrix< T > &sub_m, matrix::joint_matrix< Group, T, NumRows, NumCols, Layout > &jm, uint32_t row, uint32_t col, size_t stride, matrix::matrix_layout layout, bool shouldreload)
 
static __SYCL_ALWAYS_INLINE void cl::sycl::ext::intel::detail::submatrix_mad (detail::submatrix< int8_t > &sub_ma, detail::submatrix< int8_t > &sub_mb, detail::submatrix< int32_t > &sub_mc)
 
static __SYCL_ALWAYS_INLINE void cl::sycl::ext::intel::detail::submatrix_mad (detail::submatrix< unsigned short > &sub_ma, detail::submatrix< unsigned short > &sub_mb, detail::submatrix< float > &sub_mc)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols>
static __SYCL_ALWAYS_INLINE std::enable_if<(NumRows > matrix::tile_size)||(NumCols *sizeof(T)/4 > matrix::tile_size), void >::type cl::sycl::ext::intel::detail::submatrix_store (detail::submatrix< T > &sub_m, matrix::joint_matrix< Group, T, NumRows, NumCols > &jm, uint32_t row, uint32_t col, size_t stride, matrix::matrix_layout layout, bool shouldreload)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols>
static __SYCL_ALWAYS_INLINE std::enable_if<(NumRows<=matrix::tile_size) &&(NumCols *sizeof(T)/4<=matrix::tile_size), void >::type cl::sycl::ext::intel::detail::submatrix_store (detail::submatrix< T > &sub_m, matrix::joint_matrix< Group, T, NumRows, NumCols > &jm, uint32_t row, uint32_t col, size_t stride, matrix::matrix_layout layout, bool shouldreload)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols, matrix_layout Layout, access::address_space Space>
__SYCL_ALWAYS_INLINE std::enable_if<(NumRows > tile_size)||(NumCols *sizeof(T)/4 > tile_size), void >::type cl::sycl::ext::intel::experimental::matrix::joint_matrix_load (Group sg, joint_matrix< Group, T, NumRows, NumCols, Layout > &jm, multi_ptr< T, Space > src, size_t stride, matrix_layout layout)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols, matrix_layout Layout, access::address_space Space>
__SYCL_ALWAYS_INLINE std::enable_if<(NumRows<=tile_size) &&(NumCols *sizeof(T)/4<=tile_size), void >::type cl::sycl::ext::intel::experimental::matrix::joint_matrix_load (Group sg, joint_matrix< Group, T, NumRows, NumCols, Layout > &jm, multi_ptr< T, Space > src, size_t stride, matrix_layout layout)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols, matrix_layout Layout, access::address_space Space>
__SYCL_ALWAYS_INLINE std::enable_if<(NumRows > tile_size)||(NumCols *sizeof(T)/4 > tile_size), void >::type cl::sycl::ext::intel::experimental::matrix::joint_matrix_store (Group sg, joint_matrix< Group, T, NumRows, NumCols, Layout > &jm, multi_ptr< T, Space > dst, size_t stride, matrix_layout layout)
 
template<typename Group , typename T , size_t NumRows, size_t NumCols, matrix_layout Layout, access::address_space Space>
__SYCL_ALWAYS_INLINE std::enable_if<(NumRows<=tile_size) &&(NumCols *sizeof(T)/4<=tile_size), void >::type cl::sycl::ext::intel::experimental::matrix::joint_matrix_store (Group sg, joint_matrix< Group, T, NumRows, NumCols, Layout > &jm, multi_ptr< T, Space > dst, size_t stride, matrix_layout layout)
 
template<typename Group , typename T1 , typename T2 , size_t NumRowsA, size_t NumColsA, size_t NumRowsB, size_t NumColsB, size_t NumRowsC, size_t NumColsC, matrix_layout LayoutA, matrix_layout LayoutB, matrix_layout LayoutC>
__SYCL_ALWAYS_INLINE std::enable_if<((std::is_same< T1, int8_t >::value &&std::is_same< T2, int32_t >::value)||(std::is_same< T1, unsigned short >::value &&std::is_same< T2, float >::value)) &&(LayoutA==matrix_layout::row_major) &&(LayoutB==matrix_layout::packed_b) &&(LayoutC==matrix_layout::row_major), joint_matrix< Group, T2, NumRowsC, NumColsC, LayoutC > >::type cl::sycl::ext::intel::experimental::matrix::joint_matrix_mad (Group sg, joint_matrix< Group, T1, NumRowsA, NumColsA, LayoutA > &jmA, joint_matrix< Group, T1, NumRowsB, NumColsB, LayoutB > &jmB, joint_matrix< Group, T2, NumRowsC, NumColsC, LayoutC > &jmC)
 

Variables

constexpr size_t cl::sycl::ext::intel::detail::dynamic_extent = std::numeric_limits<size_t>::max()
 
constexpr size_t cl::sycl::ext::intel::experimental::matrix::tile_size = 16
 

Macro Definition Documentation

◆ ELEMS_PER_DWORD

#define ELEMS_PER_DWORD (   TYPE,
  NUM 
)
Value:
template <> struct elems_per_dword<TYPE> { \
static constexpr size_t value = NUM; \
};

Definition at line 56 of file matrix-aot-amx.hpp.