xetla/math__general_8hpp_source.html

/*******************************************************************************

* Copyright (c) 2022-2023 Intel Corporation

*

* Licensed under the Apache License, Version 2.0 (the "License");

* you may not use this file except in compliance with the License.

* You may obtain a copy of the License at

*

*     http://www.apache.org/licenses/LICENSE-2.0

*

* Unless required by applicable law or agreed to in writing, software

* distributed under the License is distributed on an "AS IS" BASIS,

* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

* See the License for the specific language governing permissions and

* limitations under the License.

*******************************************************************************/


#pragma once


#include "common/core/base_ops.hpp"

#include "common/core/base_types.hpp"

#include "common/core/common.hpp"

#pragma clang diagnostic push

#pragma clang diagnostic ignored "-Wunused-parameter"

namespace gpu::xetla {


template <typename T0, typename T1, int SZ>

__XETLA_API xetla_vector<T0, SZ> xetla_abs(xetla_vector<T1, SZ> src0) {

    static_assert(!(is_internal_type<T0>::value || is_internal_type<T1>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::abs<T0, T1, SZ>(src0);

}


template <typename T0, typename T1>

std::enable_if_t<!std::is_same<remove_const_t<T0>, remove_const_t<T1>>::value,

        remove_const_t<T0>>

        __XETLA_API xetla_abs(T1 src0) {

    static_assert(!(is_internal_type<T0>::value || is_internal_type<T1>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::abs<T0, T1>(src0);

}


template <typename T1, int SZ>

__XETLA_API xetla_vector<T1, SZ> xetla_abs(xetla_vector<T1, SZ> src0) {

    static_assert(!(is_internal_type<T1>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::abs<T1, SZ>(src0);

}


template <typename T1>

__XETLA_API typename std::remove_const<T1>::type xetla_abs(T1 src0) {

    static_assert(!(is_internal_type<T1>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::abs<T1>(src0);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_max(

        xetla_vector<T, SZ> src0, xetla_vector<T, SZ> src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::max<T, SZ>(src0, src1, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_max(

        xetla_vector<T, SZ> src0, T src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::max<T, SZ>(src0, src1, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_max(

        T src0, xetla_vector<T, SZ> src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::max<T, SZ>(src0, src1, Sat::value);

}


template <typename T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_max(T src0, T src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::max<T>(src0, src1, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_min(

        xetla_vector<T, SZ> src0, xetla_vector<T, SZ> src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::min<T, SZ>(src0, src1, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_min(

        xetla_vector<T, SZ> src0, T src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::min<T, SZ>(src0, src1, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_min(

        T src0, xetla_vector<T, SZ> src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::min<T, SZ>(src0, src1, Sat::value);

}


template <typename T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_min(T src0, T src1, Sat sat = {}) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::min<T>(src0, src1, Sat::value);

}


template <class T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_exp(

        xetla_vector<T, SZ> src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::exp<T, SZ>(src, Sat::value);

}


template <class T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_exp(T src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::exp<T>(src, Sat::value);

}


template <class T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_exp2(

        xetla_vector<T, SZ> src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::exp2<T, SZ>(src, Sat::value);

}


template <class T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_exp2(T src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::exp2<T>(src, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_inv(

        xetla_vector<T, SZ> src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::inv(src, Sat::value);

}


template <typename T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_inv(T src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::inv(src, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_sqrt(

        xetla_vector<T, SZ> src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::sqrt(src, Sat::value);

}


template <typename T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_sqrt(T src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::sqrt(src, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_sqrt_ieee(

        xetla_vector<T, SZ> src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, double>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::sqrt_ieee(src, Sat::value);

}


template <typename T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_sqrt_ieee(T src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, double>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::sqrt_ieee(src, Sat::value);

}


template <typename T, int SZ, typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T, SZ> xetla_rsqrt(

        xetla_vector<T, SZ> src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::rsqrt(src, Sat::value);

}


template <typename T, typename Sat = xetla_saturation_off_tag>

__XETLA_API T xetla_rsqrt(T src, Sat sat = {}) {

    static_assert((std::is_same<remove_const_t<T>, float>::value)

                    || (std::is_same<remove_const_t<T>, fp16>::value),

            "Only support fp32 and fp16");

    return __ESIMD_NS::rsqrt(src, Sat::value);

}


template <typename T, int SZ>

__XETLA_API xetla_vector<T, SZ> xetla_tanh(xetla_vector<T, SZ> src) {

    static_assert(std::is_same<remove_const_t<T>, float>::value,

            "Only support fp32! ");

    constexpr uint32_t flag_elems = 8 * 16;

    xetla_vector<T, SZ> ret;

    if constexpr (SZ / flag_elems > 0) {

#pragma unroll

        for (uint32_t i = 0; i < SZ / flag_elems; i++) {

            auto src_sub = src.xetla_select<flag_elems, 1>(i * flag_elems);

            auto ret_sub = ret.xetla_select<flag_elems, 1>(i * flag_elems);

            xetla_mask<flag_elems> mask = src_sub >= 10;

            xetla_vector<T, flag_elems> exp2x

                    = xetla_exp<T, flag_elems>(src_sub * 2.f);

            ret_sub = (exp2x - 1.f) / (exp2x + 1.f);

            xetla_vector<T, flag_elems> ones(1);

            ret_sub.xetla_merge(ones, mask);

        }

    }


    if constexpr (SZ % flag_elems != 0) {

        constexpr uint32_t start_pos = SZ / flag_elems * flag_elems;

        constexpr uint32_t remain_elems = SZ % flag_elems;


        auto src_sub = src.xetla_select<remain_elems, 1>(start_pos);

        auto ret_sub = ret.xetla_select<remain_elems, 1>(start_pos);

        xetla_mask<remain_elems> mask = src_sub >= 10;

        xetla_vector<T, remain_elems> exp2x

                = xetla_exp<T, remain_elems>(src_sub * 2.f);

        ret_sub = (exp2x - 1.f) / (exp2x + 1.f);

        xetla_vector<T, remain_elems> ones(1);

        ret_sub.xetla_merge(ones, mask);

    }


    return ret;

}


template <typename T>

__XETLA_API T xetla_tanh(T src) {

    static_assert(std::is_same<remove_const_t<T>, float>::value,

            "Only support fp32! ");

    T exp2x = xetla_exp<T>(src * 2.f);

    T ret = (exp2x - 1.f) / (exp2x + 1.f);

    return (src >= 10) ? 1 : ret;

}


template <typename T, int SZ>

__XETLA_API xetla_vector<T, SZ> xetla_add_c(xetla_vector<T, SZ> src0,

        xetla_vector<T, SZ> src1, xetla_vector_ref<T, SZ> __REF__ carry) {

    static_assert((std::is_same<remove_const_t<T>, uint32_t>::value),

            "For addc, only uint32_t is supported");

    xetla_vector<T, SZ> carry_tmp;

    xetla_vector<T, SZ> out = __ESIMD_ENS::addc(carry_tmp, src0, src1);

    carry = carry_tmp;

    return out;

}


template <typename T, int SZ>

__XETLA_API xetla_vector<T, SZ> xetla_add_c(xetla_vector<T, SZ> src0, T src1,

        xetla_vector_ref<T, SZ> __REF__ carry) {

    static_assert((std::is_same<remove_const_t<T>, uint32_t>::value),

            "For addc, only uint32_t is supported");

    xetla_vector<T, SZ> carry_tmp;

    xetla_vector<T, SZ> out = __ESIMD_ENS::addc(carry_tmp, src0, src1);

    carry = carry_tmp;

    return out;

}


template <typename T0, typename T1, typename T2, int SZ>

__XETLA_API xetla_vector<T0, SZ> xetla_imul(xetla_vector_ref<T0, SZ> __REF__ lo,

        xetla_vector<T1, SZ> src0, T2 src1) {

    xetla_vector<T0, SZ> lo_tmp;

    xetla_vector<T0, SZ> hi_tmp

            = __ESIMD_ENS::imul<T0, T1, T2, SZ>(lo_tmp, src0, src1);

    lo = lo_tmp;

    return hi_tmp;

}


template <typename T0, typename T1, int SZ, reduce_op BinaryOperation>

__XETLA_API T0 xetla_reduce(xetla_vector<T1, SZ> v) {

    if constexpr (BinaryOperation == reduce_op::sum) {

        return __ESIMD_NS::detail::sum<T0, T1, SZ>(v);

    } else if constexpr (BinaryOperation == reduce_op::prod) {

        return __ESIMD_NS::detail::prod<T0, T1, SZ>(v);

    } else if constexpr (BinaryOperation == reduce_op::min) {

        return __ESIMD_NS::hmin<T0, T1, SZ>(v);

    } else if constexpr (BinaryOperation == reduce_op::max) {

        return __ESIMD_NS::hmax<T0, T1, SZ>(v);

    }

}


template <typename T, int SZ>

__XETLA_API xetla_vector<T, SZ> xetla_rnde(xetla_vector<T, SZ> src0) {

    static_assert(!(is_internal_type<T>::value),

            "The internal types are not yet supported!");

    return __ESIMD_NS::rnde<T, SZ>(src0);

}


template <typename T1, typename T0, int SZ,

        typename Sat = xetla_saturation_off_tag>

__XETLA_API xetla_vector<T1, SZ> xetla_add(

        xetla_vector<T0, SZ> src0, xetla_vector<T0, SZ> src1, Sat sat = {}) {

    static_assert(

            !((is_internal_type<T0>::value) || (is_internal_type<T1>::value)),

            "The internal types are not yet supported!");

    xetla_vector<T0, SZ> temp = src0 + src1;

    if constexpr (std::is_same_v<Sat, xetla_saturation_off_tag>)

        return xetla_vector<T1, SZ>(temp);

    else

        return __ESIMD_NS::saturate<T1, T0, SZ>(temp);

}


template <typename T1, typename T0, int SZ>

__XETLA_API xetla_vector<T1, SZ> xetla_sat(xetla_vector<T0, SZ> src) {

    static_assert(

            !((is_internal_type<T0>::value) || (is_internal_type<T1>::value)),

            "The internal types are not yet supported!");

    return __ESIMD_NS::saturate<T1, T0, SZ>(src);

}


} // namespace gpu::xetla

#pragma clang diagnostic pop

base_ops.hpp
C++ API.

base_types.hpp
C++ API.

common.hpp
C++ API.

remove_const_t
typename std::remove_const< T >::type remove_const_t
Definition common.hpp:26

__XETLA_API
#define __XETLA_API
Definition common.hpp:43

gpu::xetla::xetla_vector_ref
Workaround for ESIMD vector(1D) ref type.
Definition base_types.hpp:187

__REF__
#define __REF__
Workaround for ESIMD reference usage.
Definition base_types.hpp:177

gpu::xetla::fp16
sycl::half fp16
xetla fp16 data type.
Definition base_types.hpp:43

gpu::xetla::xetla_vector
__ESIMD_NS::simd< native_type_t< Ty >, N > xetla_vector
wrapper for xetla_vector.
Definition base_types.hpp:149

gpu::xetla::xetla_mask
__ESIMD_NS::simd_mask< N > xetla_mask
wrapper for xetla_mask.
Definition base_types.hpp:165

gpu::xetla::xetla_reduce
__XETLA_API T0 xetla_reduce(xetla_vector< T1, SZ > v)
Performs reduction over elements of the input vector.
Definition math_general.hpp:520

gpu::xetla::xetla_abs
__XETLA_API xetla_vector< T0, SZ > xetla_abs(xetla_vector< T1, SZ > src0)
Get absolute value (vector version)
Definition math_general.hpp:39

gpu::xetla::xetla_max
__XETLA_API xetla_vector< T, SZ > xetla_max(xetla_vector< T, SZ > src0, xetla_vector< T, SZ > src1, Sat sat={})
Selects component-wise the maximum of the two vectors.
Definition math_general.hpp:97

gpu::xetla::xetla_exp
__XETLA_API xetla_vector< T, SZ > xetla_exp(xetla_vector< T, SZ > src, Sat sat={})
Calculate exponent value for each element of the input vector, the base is e.
Definition math_general.hpp:232

gpu::xetla::xetla_min
__XETLA_API xetla_vector< T, SZ > xetla_min(xetla_vector< T, SZ > src0, xetla_vector< T, SZ > src1, Sat sat={})
Selects component-wise the minimum of the two vectors.
Definition math_general.hpp:166

gpu::xetla::xetla_sat
__XETLA_API xetla_vector< T1, SZ > xetla_sat(xetla_vector< T0, SZ > src)
Saturation function.
Definition math_general.hpp:574

gpu::xetla::xetla_sqrt
__XETLA_API xetla_vector< T, SZ > xetla_sqrt(xetla_vector< T, SZ > src, Sat sat={})
Calculate the square root, i.e.
Definition math_general.hpp:319

gpu::xetla::xetla_imul
__XETLA_API xetla_vector< T0, SZ > xetla_imul(xetla_vector_ref< T0, SZ > __REF__ lo, xetla_vector< T1, SZ > src0, T2 src1)
Multiply src0 with src1, return the hi part and in-place update the lo part.
Definition math_general.hpp:498

gpu::xetla::xetla_exp2
__XETLA_API xetla_vector< T, SZ > xetla_exp2(xetla_vector< T, SZ > src, Sat sat={})
Calculate exponent value for each element of the input vector, the base is 2.
Definition math_general.hpp:262

gpu::xetla::xetla_tanh
__XETLA_API xetla_vector< T, SZ > xetla_tanh(xetla_vector< T, SZ > src)
Calculate the tanh (vector version).
Definition math_general.hpp:402

gpu::xetla::xetla_rsqrt
__XETLA_API xetla_vector< T, SZ > xetla_rsqrt(xetla_vector< T, SZ > src, Sat sat={})
Calculate the inversion of square root, i.e.
Definition math_general.hpp:375

gpu::xetla::xetla_add_c
__XETLA_API xetla_vector< T, SZ > xetla_add_c(xetla_vector< T, SZ > src0, xetla_vector< T, SZ > src1, xetla_vector_ref< T, SZ > __REF__ carry)
Add two unsigned integer vectors, return the result and in-place update the carry.
Definition math_general.hpp:460

gpu::xetla::xetla_inv
__XETLA_API xetla_vector< T, SZ > xetla_inv(xetla_vector< T, SZ > src, Sat sat={})
Calculate the inversion, i.e.
Definition math_general.hpp:291

gpu::xetla::xetla_sqrt_ieee
__XETLA_API xetla_vector< T, SZ > xetla_sqrt_ieee(xetla_vector< T, SZ > src, Sat sat={})
Calculate the square root, i.e.
Definition math_general.hpp:347

gpu::xetla::xetla_add
__XETLA_API xetla_vector< T1, SZ > xetla_add(xetla_vector< T0, SZ > src0, xetla_vector< T0, SZ > src1, Sat sat={})
Adds two vectors with saturation The source operands must be both of floating-point type.
Definition math_general.hpp:556

gpu::xetla::xetla_rnde
__XETLA_API xetla_vector< T, SZ > xetla_rnde(xetla_vector< T, SZ > src0)
Get rounded value.
Definition math_general.hpp:538

gpu::xetla
Definition arch_config.hpp:24

gpu::xetla::reduce_op::sum
@ sum

gpu::xetla::reduce_op::max
@ max

gpu::xetla::reduce_op::prod
@ prod

gpu::xetla::reduce_op::min
@ min

gpu::xetla::is_internal_type
Used to check if the type is xetla internal data type.
Definition base_types.hpp:67

gpu::xetla::is_internal_type::value
static constexpr bool value
Definition base_types.hpp:68