llvm-docs/doxygen/matrix-unified_8hpp_source.html

 //===------- matrix-unified.hpp - SYCL matrix extension ----*- C++ -*------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 // ===--------------------------------------------------------------------=== //


 #pragma once


 #include "matrix-intel.hpp"


 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

 #include "matrix-tensorcores.hpp"

 #elif defined(__gfx90a__)

 #include "matrix-hip.hpp"

 #endif // defined(__NVPTX__)

 #endif // defined(__SYCL_DEVICE_ONLY__)


 #include <sycl/access/access.hpp>             // for address_space

 #include <sycl/detail/defines_elementary.hpp> // for __SYCL_ALWAYS_...

 #include <sycl/exception.hpp>

 #include <sycl/ext/oneapi/matrix/matrix-unified-utils.hpp> // for layout, use, tf32, convertMatrixUseEnumToString

 #include <sycl/ext/oneapi/matrix/query-types.hpp> // for convertTypeToMatrixTypeString

 #include <sycl/marray.hpp>                        // for marray

 #include <sycl/multi_ptr.hpp>                     // for multi_ptr


 #include <cstring>     // for size_t, memcpy

 #include <stdint.h>    // for uint32_t

 #include <tuple>       // for ignore, _Swall...

 #include <type_traits> // for is_same, remov...


 namespace sycl {

 inline namespace _V1 {

 namespace ext {

 namespace oneapi {

 namespace experimental {

 namespace matrix {


 template <typename Group, typename T, use Use, size_t Rows, size_t Cols,

           layout Layout>

 struct joint_matrix {


 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   sycl::ext::oneapi::detail::joint_matrix_cuda<T, Use, Rows, Cols, Layout>

       matrix_impl;

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   sycl::ext::oneapi::detail::joint_matrix_hip<T, Use, Rows, Cols, Layout>

       matrix_impl;

 #elif defined(__SPIR__) || defined(__SPIRV__)

   __spv::__spirv_JointMatrixINTEL<

       T, Rows, Cols, spv_matrix_layout_traits<Layout>::value,

       spv_scope_traits<Group>::value, spv_matrix_use_traits<Use>::value> *spvm;

 #else

   static_assert(false, "The joint_matrix API is only supported by the Intel, "

                        "CUDA and HIP (GFX90A) backends");

 #endif // defined(__NVPTX__)

 #endif // defined(__SYCL_DEVICE_ONLY__)


 #if defined(__SYCL_DEVICE_ONLY__)

   [[__sycl_detail__::add_ir_attributes_function(

       "sycl-joint-matrix-type", "sycl-joint-matrix-use",

       "sycl-joint-matrix-rows", "sycl-joint-matrix-cols",

       sycl::detail::convertTypeToMatrixTypeString<T>(),

       sycl::detail::convertMatrixUseEnumToString(Use), Rows, Cols)]]

 #endif // defined(__SYCL_DEVICE_ONLY__)

   joint_matrix() {

 #ifndef __SYCL_DEVICE_ONLY__

     throw exception(make_error_code(errc::runtime),

                     "joint matrix is not supported on host.");

 #endif

   }

 #ifdef __SYCL_DEVICE_ONLY__

 #if defined(__SPIR__) || defined(__SPIRV__)

   joint_matrix(const joint_matrix &other) = delete;

   joint_matrix &operator=(const joint_matrix &rhs) = delete;

 #endif // defined(__SPIR__) || defined(__SPIRV__)

 #endif

 };


 template <typename Group, typename T, use Use, size_t M, size_t N,

           layout Layout, typename F>

 inline __SYCL_ALWAYS_INLINE void

 joint_matrix_apply(Group sg, joint_matrix<Group, T, Use, M, N, Layout> &jm,

                    F &&lambda) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__) || defined(__HIP_PLATFORM_AMD_MFMA__)

   std::ignore = sg;

   for (int i = 0; i < jm.matrix_impl.wi_marray.size(); i++) {

     lambda(jm.matrix_impl.wi_marray[i]);

   }

 #else // NVPTX

   using storage_element_type =

       typename oneapi::detail::jm_type_interpretation_helper_trait<

           T>::storage_element_type;

   auto wi_data_c = sycl::ext::oneapi::detail::get_wi_data(sg, jm);

   for (int i = 0; i < wi_data_c.length(); i++) {

     storage_element_type element = wi_data_c[i];

     lambda(element);

     wi_data_c[i] = element;

   }

 #endif

 #else

   std::ignore = sg;

   std::ignore = jm;

   std::ignore = lambda;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif

   return;

 }


 template <typename Group, typename T, use Use, size_t M, size_t N,

           layout Layout, typename F>

 inline __SYCL_ALWAYS_INLINE void

 joint_matrix_apply(Group sg, joint_matrix<Group, T, Use, M, N, Layout> &jmsrc,

                    joint_matrix<Group, T, Use, M, N, Layout> &jmdest,

                    F &&lambda) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__) || defined(__HIP_PLATFORM_AMD_MFMA__)

   std::ignore = sg;

   for (int i = 0; i < jmsrc.matrix_impl.wi_marray.size(); i++) {

     lambda(jmsrc.matrix_impl.wi_marray[i], jmdest.matrix_impl.wi_marray[i]);

   }

 #else // NVPTX

   using storage_element_type =

       typename oneapi::detail::jm_type_interpretation_helper_trait<

           T>::storage_element_type;

   auto wi_data_c = sycl::ext::oneapi::detail::get_wi_data(sg, jmsrc);

   auto wi_data_d = sycl::ext::oneapi::detail::get_wi_data(sg, jmdest);

   for (int i = 0; i < wi_data_c.length(); i++) {

     storage_element_type elementsrc = wi_data_c[i];

     storage_element_type elementdest = wi_data_d[i];

     lambda(elementsrc, elementdest);

     wi_data_d[i] = elementdest;

   }

 #endif

 #else

   std::ignore = sg;

   std::ignore = jmsrc;

   std::ignore = jmdest;

   std::ignore = lambda;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif

   return;

 }


 template <typename Group, typename T, size_t NumRows, size_t NumCols, use Use,

           layout Layout, typename T2>

 inline __SYCL_ALWAYS_INLINE void

 joint_matrix_fill(Group,

                   joint_matrix<Group, T, Use, NumRows, NumCols, Layout> &res,

                   const T2 &v) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__) || defined(__HIP_PLATFORM_AMD_MFMA__)

   res.matrix_impl.wi_marray = v;

 #else

   using storage_element_type =

       typename oneapi::detail::jm_type_interpretation_helper_trait<

           T>::storage_element_type;

   res.spvm =

       __spirv_CompositeConstruct<storage_element_type, T, NumRows, NumCols,

                                  spv_matrix_use_traits<Use>::value,

                                  spv_matrix_layout_traits<Layout>::value>(

           static_cast<storage_element_type>(v));

 #endif // defined(__NVPTX__)

 #else

   std::ignore = res;

   std::ignore = v;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <

     typename Group, typename S, typename T, size_t NumRows, size_t NumCols,

     access::address_space Space, access::decorated IsDecorated,

     std::enable_if_t<std::is_same<S, std::remove_const_t<T>>::value, bool> =

         true>

 inline __SYCL_ALWAYS_INLINE void joint_matrix_load(

     Group sg,

     joint_matrix<Group, S, use::accumulator, NumRows, NumCols,

                  sycl::ext::oneapi::experimental::matrix::layout::dynamic> &res,

     multi_ptr<T, Space, IsDecorated> src, size_t stride,

     sycl::ext::oneapi::experimental::matrix::layout Layout) {

 #if defined(__SYCL_DEVICE_ONLY__)

   static_assert(Space != access::address_space::private_space,

                 "Joint Matrix doesn't support load from private memory!");

 #if defined(__NVPTX__)

   std::ignore = sg;

   sycl::ext::oneapi::detail::load_accumulator_cuda(res.matrix_impl, src, stride,

                                                    Layout);

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   sycl::ext::oneapi::detail::load_accumulator_hip(res.matrix_impl, src, stride,

                                                   Layout, sg);

 #else

   std::ignore = sg;

   using DecorT = typename sycl::detail::DecoratedType<T, Space>::type;

   DecorT *Ptr = sycl::detail::getDecorated<DecorT>(src);

   res.spvm = __spirv_JointMatrixLoadINTEL<

       DecorT, S, NumRows, NumCols,

       spv_matrix_use_traits<use::accumulator>::value,

       spv_matrix_layout_traits<layout::dynamic>::value>(

       Ptr, stride, sycl::detail::joint_matrix_layout_to_spv(Layout),

       spv_scope_traits<Group>::value);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = res;

   std::ignore = src;

   std::ignore = stride;

   std::ignore = Layout;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <

     typename Group, typename S, typename T, use Use, size_t NumRows,

     size_t NumCols, matrix::layout Layout, access::address_space Space,

     access::decorated IsDecorated,

     std::enable_if_t<std::is_same<S, std::remove_const_t<T>>::value ||

                          (std::is_same<S, precision::tf32>::value &&

                           std::is_same<std::remove_const_t<T>, float>::value),

                      bool> = true>

 inline __SYCL_ALWAYS_INLINE void

 joint_matrix_load(Group sg,

                   joint_matrix<Group, S, Use, NumRows, NumCols, Layout> &res,

                   multi_ptr<T, Space, IsDecorated> src, size_t stride) {

 #if defined(__SYCL_DEVICE_ONLY__)

   static_assert(Space != access::address_space::private_space,

                 "Joint Matrix doesn't support load from private memory!");

 #if defined(__NVPTX__)

   std::ignore = sg;

   sycl::ext::oneapi::detail::load_multiplicand_cuda<S, T, NumRows, NumCols, Use,

                                                     Layout, Space>(

       res.matrix_impl, src, stride);

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   sycl::ext::oneapi::detail::load_multiplicand_hip<Group, S, T, NumRows,

                                                    NumCols, Use, Layout, Space>(

       res.matrix_impl, src, stride, sg);

 #else

   std::ignore = sg;

   using DecorT = typename sycl::detail::DecoratedType<T, Space>::type;

   DecorT *Ptr = sycl::detail::getDecorated<DecorT>(src);

   res.spvm =

       __spirv_JointMatrixLoadINTEL<DecorT, S, NumRows, NumCols,

                                    spv_matrix_use_traits<Use>::value,

                                    spv_matrix_layout_traits<Layout>::value>(

           Ptr, stride, spv_matrix_layout_traits<Layout>::value,

           spv_scope_traits<Group>::value);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = res;

   std::ignore = src;

   std::ignore = stride;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <typename Group, typename S, typename T, size_t NumRows,

           size_t NumCols, typename PropertyListT,

           std::enable_if_t<std::is_same<S, std::remove_const_t<T>>::value,

                            bool> = true>

 inline __SYCL_ALWAYS_INLINE void joint_matrix_load(

     Group sg,

     joint_matrix<Group, S, use::accumulator, NumRows, NumCols,

                  sycl::ext::oneapi::experimental::matrix::layout::dynamic> &res,

     ext::oneapi::experimental::annotated_ptr<T, PropertyListT> src,

     size_t stride, sycl::ext::oneapi::experimental::matrix::layout Layout) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   std::ignore = sg;

   throw exception(make_error_code(errc::runtime),

                   "Use joint_matrix_load on multi_ptr on Nvidia device.");

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   throw exception(make_error_code(errc::runtime),

                   "Use joint_matrix_load on multi_ptr on AMD device.");

 #else

   std::ignore = sg;

   T *Ptr = src.get();

   res.spvm = __spirv_JointMatrixLoadINTEL<

       T, S, NumRows, NumCols, spv_matrix_use_traits<use::accumulator>::value,

       spv_matrix_layout_traits<layout::dynamic>::value>(

       Ptr, stride, sycl::detail::joint_matrix_layout_to_spv(Layout),

       spv_scope_traits<Group>::value);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = res;

   std::ignore = src;

   std::ignore = stride;

   std::ignore = Layout;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <

     typename Group, typename S, typename T, use Use, size_t NumRows,

     size_t NumCols, matrix::layout Layout, typename PropertyListT,

     std::enable_if_t<std::is_same<S, std::remove_const_t<T>>::value ||

                          (std::is_same<S, precision::tf32>::value &&

                           std::is_same<std::remove_const_t<T>, float>::value),

                      bool> = true>

 inline __SYCL_ALWAYS_INLINE void joint_matrix_load(

     Group sg, joint_matrix<Group, S, Use, NumRows, NumCols, Layout> &res,

     ext::oneapi::experimental::annotated_ptr<T, PropertyListT> src,

     size_t stride) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   std::ignore = sg;

   throw exception(make_error_code(errc::runtime),

                   "Use joint_matrix_load on multi_ptr on Nvidia device.");

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   throw exception(make_error_code(errc::runtime),

                   "Use joint_matrix_load on multi_ptr on AMD device.");

 #else

   std::ignore = sg;

   T *Ptr = src.get();

   res.spvm =

       __spirv_JointMatrixLoadINTEL<T, S, NumRows, NumCols,

                                    spv_matrix_use_traits<Use>::value,

                                    spv_matrix_layout_traits<Layout>::value>(

           Ptr, stride, spv_matrix_layout_traits<Layout>::value,

           spv_scope_traits<Group>::value);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = res;

   std::ignore = src;

   std::ignore = stride;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <typename Group, typename T, size_t NumRows, size_t NumCols,

           access::address_space Space, access::decorated IsDecorated>

 inline __SYCL_ALWAYS_INLINE void joint_matrix_store(

     Group sg,

     const joint_matrix<Group, T, use::accumulator, NumRows, NumCols,

                        sycl::ext::oneapi::experimental::matrix::layout::dynamic>

         &src,

     multi_ptr<T, Space, IsDecorated> dst, size_t stride,

     sycl::ext::oneapi::experimental::matrix::layout Layout) {

 #if defined(__SYCL_DEVICE_ONLY__)

   static_assert(Space != access::address_space::private_space,

                 "Joint Matrix doesn't support store to private memory!");

 #if defined(__NVPTX__)

   std::ignore = sg;

   sycl::ext::oneapi::detail::joint_matrix_store_cuda<T, NumRows, NumCols,

                                                      Space>(

       src.matrix_impl, dst, stride, Layout);

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   sycl::ext::oneapi::detail::joint_matrix_store_hip<Group, T, NumRows, NumCols,

                                                     Space>(src.matrix_impl, dst,

                                                            stride, Layout, sg);

 #else

   std::ignore = sg;

   using DecorT = typename sycl::detail::DecoratedType<T, Space>::type;

   DecorT *Ptr = sycl::detail::getDecorated<DecorT>(dst);

   __spirv_JointMatrixStoreINTEL<

       DecorT, T, NumRows, NumCols,

       spv_matrix_use_traits<use::accumulator>::value,

       spv_matrix_layout_traits<layout::dynamic>::value>(

       Ptr, src.spvm, stride, sycl::detail::joint_matrix_layout_to_spv(Layout),

       spv_scope_traits<Group>::value);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = src;

   std::ignore = dst;

   std::ignore = stride;

   std::ignore = Layout;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <typename Group, typename T, size_t NumRows, size_t NumCols,

           typename PropertyListT>

 inline __SYCL_ALWAYS_INLINE void joint_matrix_store(

     Group sg,

     const joint_matrix<Group, T, use::accumulator, NumRows, NumCols,

                        sycl::ext::oneapi::experimental::matrix::layout::dynamic>

         &src,

     ext::oneapi::experimental::annotated_ptr<T, PropertyListT> dst,

     size_t stride, sycl::ext::oneapi::experimental::matrix::layout Layout) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   std::ignore = sg;

   throw exception(make_error_code(errc::runtime),

                   "Use joint_matrix_store on multi_ptr on Nvidia device.");

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   throw exception(make_error_code(errc::runtime),

                   "Use joint_matrix_store on multi_ptr on AMD device.");

 #else

   std::ignore = sg;

   T *Ptr = dst.get();

   __spirv_JointMatrixStoreINTEL<

       T, T, NumRows, NumCols, spv_matrix_use_traits<use::accumulator>::value,

       spv_matrix_layout_traits<layout::dynamic>::value>(

       Ptr, src.spvm, stride, sycl::detail::joint_matrix_layout_to_spv(Layout),

       spv_scope_traits<Group>::value);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = src;

   std::ignore = dst;

   std::ignore = stride;

   std::ignore = Layout;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <typename Group, typename Ta, typename Tb, typename Tc, typename Td,

           std::size_t M, std::size_t K, std::size_t N, layout LayoutA,

           layout LayoutB>

 #if defined(__SYCL_DEVICE_ONLY__)

 [[__sycl_detail__::add_ir_attributes_function(

     "sycl-joint-matrix-mad-type-A", "sycl-joint-matrix-mad-type-B",

     "sycl-joint-matrix-mad-type-C", "sycl-joint-matrix-mad-type-D",

     "sycl-joint-matrix-mad-size-M", "sycl-joint-matrix-mad-size-K",

     "sycl-joint-matrix-mad-size-N",

     sycl::detail::convertTypeToMatrixTypeString<Ta>(),

     sycl::detail::convertTypeToMatrixTypeString<Tb>(),

     sycl::detail::convertTypeToMatrixTypeString<Tc>(),

     sycl::detail::convertTypeToMatrixTypeString<Td>(), M, K, N)]]

 #endif // defined(__SYCL_DEVICE_ONLY__)

 inline __SYCL_ALWAYS_INLINE void

 joint_matrix_mad(

     Group,

     joint_matrix<Group, Td, use::accumulator, M, N,

                  sycl::ext::oneapi::experimental::matrix::layout::dynamic> &D,

     const joint_matrix<Group, Ta, use::a, M, K, LayoutA> &A,

     const joint_matrix<Group, Tb, use::b, K, N, LayoutB> &B,

     const joint_matrix<Group, Tc, use::accumulator, M, N,

                        sycl::ext::oneapi::experimental::matrix::layout::dynamic>

         &C) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   if constexpr (std::is_same<Ta, Tb>::value) {

     sycl::ext::oneapi::detail::joint_matrix_mad_cuda<Ta, Tc, Td, M, K, N,

                                                      LayoutA, LayoutB>(

         D.matrix_impl, A.matrix_impl, B.matrix_impl, C.matrix_impl);

   } else {

     assert(false && "Ta != Tb : In the CUDA backend joint_matrix_mad "

                     "requires that joint_matrix data types Ta and Tb match");

   }

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   if constexpr (std::is_same<Ta, Tb>::value) {

     sycl::ext::oneapi::detail::joint_matrix_mad_hip<Ta, Tc, M, K, N, LayoutA,

                                                     LayoutB>(

         D.matrix_impl, A.matrix_impl, B.matrix_impl, C.matrix_impl);

   } else {

     assert(false && "Ta != Tb : In the HIP backend joint_matrix_mad "

                     "requires that joint_matrix data types Ta and Tb match");

   }

 #else

   if constexpr (std::is_same<Ta, uint16_t>::value &&

                 std::is_same<Tb, uint16_t>::value &&

                 std::is_same<Tc, float>::value)

     D.spvm = __spirv_JointMatrixMadINTEL(A.spvm, B.spvm, C.spvm);

   else if constexpr (std::is_unsigned<Ta>::value && std::is_unsigned<Tb>::value)

     D.spvm = __spirv_JointMatrixUUMadINTEL(A.spvm, B.spvm, C.spvm);

   else if constexpr (std::is_signed<Ta>::value && std::is_unsigned<Tb>::value)

     D.spvm = __spirv_JointMatrixSUMadINTEL(A.spvm, B.spvm, C.spvm);

   else if constexpr (std::is_unsigned<Ta>::value && std::is_signed<Tb>::value)

     D.spvm = __spirv_JointMatrixUSMadINTEL(A.spvm, B.spvm, C.spvm);

   else

     D.spvm = __spirv_JointMatrixMadINTEL(A.spvm, B.spvm, C.spvm);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = A;

   std::ignore = B;

   std::ignore = C;

   std::ignore = D;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <typename Group, typename T1, typename T2, size_t Rows, size_t Cols,

           use Use1, use Use2, layout Layout1, layout Layout2>

 void joint_matrix_copy(

     Group sg, joint_matrix<Group, T1, Use1, Rows, Cols, Layout1> &src,

     joint_matrix<Group, T2, Use2, Rows, Cols, Layout2> &dst) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__) || defined(__HIP_PLATFORM_AMD_MFMA__)

   std::ignore = sg;

   dst.matrix_impl.wi_marray = src.matrix_impl.wi_marray;

 #else

   using storage_element_type =

       typename oneapi::detail::jm_type_interpretation_helper_trait<

           T2>::storage_element_type;

   auto wi_data_c = sycl::ext::oneapi::detail::get_wi_data(sg, src);

   auto wi_data_dst = sycl::ext::oneapi::detail::get_wi_data(sg, dst);

   for (int i = 0; i < wi_data_c.length(); i++) {

     wi_data_dst[i] = static_cast<storage_element_type>(wi_data_c[i]);

   }

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = dst;

   std::ignore = src;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 // This function rounds the bottom 13 bits up or down, and then zeros out the

 // bottom bits

 inline __SYCL_ALWAYS_INLINE float round_to_tf32(const float &a) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   int32_t tmp_int = __nvvm_f2tf32_rna(a);

   return __nvvm_bitcast_i2f(tmp_int);

 #else

   return __spirv_RoundFToTF32INTEL(a);

 #endif // defined(__NVPTX__)

 #else

   uint32_t tmp_uint = reinterpret_cast<const uint32_t &>(a);

   tmp_uint += 0x1000u;

   tmp_uint &= 0xFFFFE000u;

   float ret = 0;

   std::memcpy(&ret, &tmp_uint, sizeof(float));

   return ret;

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 template <size_t NumRows, size_t NumCols, typename Group, typename T,

           typename Properties = ext::oneapi::experimental::empty_properties_t>

 inline __SYCL_ALWAYS_INLINE void

 joint_matrix_prefetch(Group sg, T *Ptr, size_t stride,

                       sycl::ext::oneapi::experimental::matrix::layout Layout,

                       Properties properties = {}) {

 #if defined(__SYCL_DEVICE_ONLY__)

 #if defined(__NVPTX__)

   std::ignore = sg;

   std::ignore = properties;

   throw exception(make_error_code(errc::runtime),

                   "joint_matrix_prefetch is not supported on Nvidia device.");

 #elif defined(__HIP_PLATFORM_AMD_MFMA__)

   std::ignore = sg;

   std::ignore = properties;

   throw exception(make_error_code(errc::runtime),

                   "joint_matrix_prefetch is not supported on AMD device.");

 #else

   std::ignore = sg;

   auto prop = properties.template get_property<prefetch_hint_key>();

   __spirv_CooperativeMatrixPrefetchINTEL<T>(

       Ptr, NumRows, NumCols, detail::PropertyMetaInfo<decltype(prop)>::value,

       sycl::detail::joint_matrix_layout_to_spv(Layout), stride);

 #endif // defined(__NVPTX__)

 #else

   std::ignore = sg;

   std::ignore = Ptr;

   std::ignore = stride;

   std::ignore = Layout;

   std::ignore = properties;

   throw exception(make_error_code(errc::runtime),

                   "joint matrix is not supported on host.");

 #endif // defined(__SYCL_DEVICE_ONLY__)

 }


 } // namespace matrix

 } // namespace experimental

 } // namespace oneapi

 } // namespace ext

 } // namespace _V1

 } // namespace sycl

access.hpp

sycl::_V1::exception
Definition: exception.hpp:75

sycl::_V1::ext::oneapi::experimental::annotated_ptr
Definition: annotated_ptr.hpp:254

sycl::_V1::ext::oneapi::experimental::properties
Definition: properties.hpp:155

sycl::_V1::multi_ptr
Definition: atomic.hpp:34

defines_elementary.hpp

__SYCL_ALWAYS_INLINE
#define __SYCL_ALWAYS_INLINE
Definition: defines_elementary.hpp:23

exception.hpp

marray.hpp

matrix-hip.hpp

matrix-intel.hpp

matrix-tensorcores.hpp

matrix-unified-utils.hpp

multi_ptr.hpp

sycl::_V1::access::decorated
decorated
Definition: access.hpp:63

sycl::_V1::access::address_space
address_space
Definition: access.hpp:51

sycl::_V1::access::address_space::private_space
@ private_space

sycl::_V1::detail::joint_matrix_layout_to_spv
__SYCL_ALWAYS_INLINE __spv::MatrixLayout joint_matrix_layout_to_spv(sycl::ext::oneapi::experimental::matrix::layout Layout)
Definition: matrix-unified-utils.hpp:71

sycl::_V1::detail::convertMatrixUseEnumToString
constexpr const char * convertMatrixUseEnumToString(ext::oneapi::experimental::matrix::use Use)
Definition: matrix-unified-utils.hpp:54

sycl::_V1::ext::oneapi::detail::joint_matrix_store_hip
void joint_matrix_store_hip(const joint_matrix_hip< T, sycl::ext::oneapi::experimental::matrix::use::accumulator, M, N, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &src, multi_ptr< T, Space, IsDecorated > dst, size_t stride, sycl::ext::oneapi::experimental::matrix::layout layout, Group &sg)
Definition: matrix-hip.hpp:317

sycl::_V1::ext::oneapi::detail::joint_matrix_mad_hip
void joint_matrix_mad_hip(joint_matrix_hip< Tc, sycl::ext::oneapi::experimental::matrix::use::accumulator, M, N, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &D, const joint_matrix_hip< Tm, sycl::ext::oneapi::experimental::matrix::use::a, M, K, LayoutA > &A, const joint_matrix_hip< Tm, sycl::ext::oneapi::experimental::matrix::use::b, K, N, LayoutB > &B, const joint_matrix_hip< Tc, sycl::ext::oneapi::experimental::matrix::use::accumulator, M, N, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &C)
Definition: matrix-hip.hpp:337

sycl::_V1::ext::oneapi::detail::load_accumulator_hip
void load_accumulator_hip(joint_matrix_hip< S, sycl::ext::oneapi::experimental::matrix::use::accumulator, M, N, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &res, multi_ptr< T, Space, IsDecorated > src, size_t stride, sycl::ext::oneapi::experimental::matrix::layout layout, Group &sg)
Definition: matrix-hip.hpp:185

sycl::_V1::ext::oneapi::detail::load_multiplicand_hip
void load_multiplicand_hip(joint_matrix_hip< S, Use, M, N, Layout > &res, multi_ptr< T, Space, IsDecorated > src, size_t stride, Group &sg)
Definition: matrix-hip.hpp:211

sycl::_V1::ext::oneapi::detail::get_wi_data
decltype(auto) __SYCL_ALWAYS_INLINE get_wi_data(Group sg, sycl::ext::oneapi::experimental::matrix::joint_matrix< Group, T, Use, Rows, Cols, Layout > &jm)
Definition: matrix-intel.hpp:467

sycl::_V1::ext::oneapi::experimental::matrix::round_to_tf32
__SYCL_ALWAYS_INLINE float round_to_tf32(const float &a)
Definition: matrix-unified.hpp:520

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_copy
void joint_matrix_copy(Group sg, joint_matrix< Group, T1, Use1, Rows, Cols, Layout1 > &src, joint_matrix< Group, T2, Use2, Rows, Cols, Layout2 > &dst)
Definition: matrix-unified.hpp:492

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_load
__SYCL_ALWAYS_INLINE void joint_matrix_load(Group sg, joint_matrix< Group, S, use::accumulator, NumRows, NumCols, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &res, multi_ptr< T, Space, IsDecorated > src, size_t stride, sycl::ext::oneapi::experimental::matrix::layout Layout)
Definition: matrix-unified.hpp:183

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_fill
__SYCL_ALWAYS_INLINE void joint_matrix_fill(Group, joint_matrix< Group, T, Use, NumRows, NumCols, Layout > &res, const T2 &v)
Definition: matrix-unified.hpp:154

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_store
__SYCL_ALWAYS_INLINE void joint_matrix_store(Group sg, const joint_matrix< Group, T, use::accumulator, NumRows, NumCols, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &src, multi_ptr< T, Space, IsDecorated > dst, size_t stride, sycl::ext::oneapi::experimental::matrix::layout Layout)
Definition: matrix-unified.hpp:345

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_mad
__SYCL_ALWAYS_INLINE void joint_matrix_mad(Group, joint_matrix< Group, Td, use::accumulator, M, N, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &D, const joint_matrix< Group, Ta, use::a, M, K, LayoutA > &A, const joint_matrix< Group, Tb, use::b, K, N, LayoutB > &B, const joint_matrix< Group, Tc, use::accumulator, M, N, sycl::ext::oneapi::experimental::matrix::layout::dynamic > &C)
Definition: matrix-unified.hpp:438

sycl::_V1::ext::oneapi::experimental::matrix::layout
layout
Definition: matrix-unified-utils.hpp:25

sycl::_V1::ext::oneapi::experimental::matrix::use
use
Definition: matrix-unified-utils.hpp:23

sycl::_V1::ext::oneapi::experimental::matrix::use::accumulator
@ accumulator

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_prefetch
__SYCL_ALWAYS_INLINE void joint_matrix_prefetch(Group sg, T *Ptr, size_t stride, sycl::ext::oneapi::experimental::matrix::layout Layout, Properties properties={})
Definition: matrix-unified.hpp:541

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix_apply
__SYCL_ALWAYS_INLINE void joint_matrix_apply(Group sg, joint_matrix< Group, T, Use, M, N, Layout > &jm, F &&lambda)
Definition: matrix-unified.hpp:86

sycl::_V1::ext::oneapi::experimental::operator=
annotated_arg & operator=(annotated_arg &)=default

sycl::_V1::errc::runtime
@ runtime

sycl::_V1::a
auto a
Definition: integer_functions.cpp:179

sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65

sycl
Definition: access.hpp:18

query-types.hpp

__spv::__spirv_JointMatrixINTEL
Definition: spirv_types.hpp:124

sycl::_V1::ext::oneapi::detail::jm_type_interpretation_helper_trait
Definition: matrix-intel.hpp:82

sycl::_V1::ext::oneapi::detail::joint_matrix_cuda
Definition: matrix-tensorcores.hpp:44

sycl::_V1::ext::oneapi::detail::joint_matrix_hip
Definition: matrix-hip.hpp:36

sycl::_V1::ext::oneapi::experimental::detail::PropertyMetaInfo
Definition: property.hpp:266

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix
Definition: matrix-unified.hpp:43

sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix::joint_matrix
joint_matrix()
Definition: matrix-unified.hpp:69

sycl::_V1::ext::oneapi::experimental::matrix::spv_matrix_layout_traits
Definition: matrix-intel.hpp:37

sycl::_V1::ext::oneapi::experimental::matrix::spv_matrix_use_traits
Definition: matrix-intel.hpp:51

sycl::_V1::ext::oneapi::experimental::matrix::spv_scope_traits
Definition: matrix-intel.hpp:64