16 namespace ext::intel::math {
24 return sycl::fma(x, y, z);
107 sycl::half2
hadd2(sycl::half2 x, sycl::half2 y) {
return x + y; }
110 return sycl::clamp((x + y), sycl::half2{0.f, 0.f}, sycl::half2{1.f, 1.f});
113 sycl::half2
hfma2(sycl::half2 x, sycl::half2 y, sycl::half2 z) {
114 return sycl::fma(x, y, z);
117 sycl::half2
hfma2_sat(sycl::half2 x, sycl::half2 y, sycl::half2 z) {
118 return sycl::clamp(sycl::fma(x, y, z), sycl::half2{0.f, 0.f},
119 sycl::half2{1.f, 1.f});
122 sycl::half2
hmul2(sycl::half2 x, sycl::half2 y) {
return x * y; }
125 return sycl::clamp((x * y), sycl::half2{0.f, 0.f}, sycl::half2{1.f, 1.f});
128 sycl::half2
h2div(sycl::half2 x, sycl::half2 y) {
return x / y; }
130 sycl::half2
hneg2(sycl::half2 x) {
return -x; }
132 sycl::half2
hsub2(sycl::half2 x, sycl::half2 y) {
return x - y; }
135 return sycl::clamp((x - y), sycl::half2{0.f, 0.f}, sycl::half2{1.f, 1.f});
138 bool hbeq2(sycl::half2 x, sycl::half2 y) {
139 return heq(x.s0(), y.s0()) &&
heq(x.s1(), y.s1());
142 bool hbequ2(sycl::half2 x, sycl::half2 y) {
143 return hequ(x.s0(), y.s0()) &&
hequ(x.s1(), y.s1());
146 bool hbge2(sycl::half2 x, sycl::half2 y) {
147 return hge(x.s0(), y.s0()) &&
hge(x.s1(), y.s1());
150 bool hbgeu2(sycl::half2 x, sycl::half2 y) {
151 return hgeu(x.s0(), y.s0()) &&
hgeu(x.s1(), y.s1());
154 bool hbgt2(sycl::half2 x, sycl::half2 y) {
155 return hgt(x.s0(), y.s0()) &&
hgt(x.s1(), y.s1());
158 bool hbgtu2(sycl::half2 x, sycl::half2 y) {
159 return hgtu(x.s0(), y.s0()) &&
hgtu(x.s1(), y.s1());
162 bool hble2(sycl::half2 x, sycl::half2 y) {
163 return hle(x.s0(), y.s0()) &&
hle(x.s1(), y.s1());
166 bool hbleu2(sycl::half2 x, sycl::half2 y) {
167 return hleu(x.s0(), y.s0()) &&
hleu(x.s1(), y.s1());
170 bool hblt2(sycl::half2 x, sycl::half2 y) {
171 return hlt(x.s0(), y.s0()) &&
hlt(x.s1(), y.s1());
174 bool hbltu2(sycl::half2 x, sycl::half2 y) {
175 return hltu(x.s0(), y.s0()) &&
hltu(x.s1(), y.s1());
178 bool hbne2(sycl::half2 x, sycl::half2 y) {
179 return hne(x.s0(), y.s0()) &&
hne(x.s1(), y.s1());
182 bool hbneu2(sycl::half2 x, sycl::half2 y) {
183 return hneu(x.s0(), y.s0()) &&
hneu(x.s1(), y.s1());
186 sycl::half2
heq2(sycl::half2 x, sycl::half2 y) {
187 return sycl::half2{(
heq(x.s0(), y.s0()) ? 1.0f : 0.f),
188 (
heq(x.s1(), y.s1()) ? 1.0f : 0.f)};
191 sycl::half2
hequ2(sycl::half2 x, sycl::half2 y) {
192 return sycl::half2{(
hequ(x.s0(), y.s0()) ? 1.0f : 0.f),
193 (
hequ(x.s1(), y.s1()) ? 1.0f : 0.f)};
196 sycl::half2
hge2(sycl::half2 x, sycl::half2 y) {
197 return sycl::half2{(
hge(x.s0(), y.s0()) ? 1.0f : 0.f),
198 (
hge(x.s1(), y.s1()) ? 1.0f : 0.f)};
201 sycl::half2
hgeu2(sycl::half2 x, sycl::half2 y) {
202 return sycl::half2{(
hgeu(x.s0(), y.s0()) ? 1.0f : 0.f),
203 (
hgeu(x.s1(), y.s1()) ? 1.0f : 0.f)};
206 sycl::half2
hgt2(sycl::half2 x, sycl::half2 y) {
207 return sycl::half2{(
hgt(x.s0(), y.s0()) ? 1.0f : 0.f),
208 (
hgt(x.s1(), y.s1()) ? 1.0f : 0.f)};
211 sycl::half2
hgtu2(sycl::half2 x, sycl::half2 y) {
212 return sycl::half2{(
hgtu(x.s0(), y.s0()) ? 1.0f : 0.f),
213 (
hgtu(x.s1(), y.s1()) ? 1.0f : 0.f)};
216 sycl::half2
hle2(sycl::half2 x, sycl::half2 y) {
217 return sycl::half2{(
hle(x.s0(), y.s0()) ? 1.0f : 0.f),
218 (
hle(x.s1(), y.s1()) ? 1.0f : 0.f)};
221 sycl::half2
hleu2(sycl::half2 x, sycl::half2 y) {
222 return sycl::half2{(
hleu(x.s0(), y.s0()) ? 1.0f : 0.f),
223 (
hleu(x.s1(), y.s1()) ? 1.0f : 0.f)};
226 sycl::half2
hlt2(sycl::half2 x, sycl::half2 y) {
227 return sycl::half2{(
hlt(x.s0(), y.s0()) ? 1.0f : 0.f),
228 (
hlt(x.s1(), y.s1()) ? 1.0f : 0.f)};
231 sycl::half2
hltu2(sycl::half2 x, sycl::half2 y) {
232 return sycl::half2{(
hltu(x.s0(), y.s0()) ? 1.0f : 0.f),
233 (
hltu(x.s1(), y.s1()) ? 1.0f : 0.f)};
237 return sycl::half2{(
hisnan(x.s0()) ? 1.0f : 0.f),
238 (
hisnan(x.s1()) ? 1.0f : 0.f)};
241 sycl::half2
hne2(sycl::half2 x, sycl::half2 y) {
242 return sycl::half2{(
hne(x.s0(), y.s0()) ? 1.0f : 0.f),
243 (
hne(x.s1(), y.s1()) ? 1.0f : 0.f)};
246 sycl::half2
hneu2(sycl::half2 x, sycl::half2 y) {
247 return sycl::half2{(
hneu(x.s0(), y.s0()) ? 1.0f : 0.f),
248 (
hneu(x.s1(), y.s1()) ? 1.0f : 0.f)};
257 return sycl::fmax(x, y);
260 sycl::half2
hmax2(sycl::half2 x, sycl::half2 y) {
261 return sycl::half2{
hmax(x.s0(), y.s0()),
hmax(x.s1(), y.s1())};
274 return sycl::fmin(x, y);
277 sycl::half2
hmin2(sycl::half2 x, sycl::half2 y) {
278 return sycl::half2{
hmin(x.s0(), y.s0()),
hmin(x.s1(), y.s1())};
285 sycl::half2
hcmadd(sycl::half2 x, sycl::half2 y, sycl::half2 z) {
286 return sycl::half2{x.s0() * y.s0() - x.s1() * y.s1() + z.s0(),
287 x.s0() * y.s1() + x.s1() * y.s0() + z.s1()};
301 sycl::half2
hfma2_relu(sycl::half2 x, sycl::half2 y, sycl::half2 z) {
302 sycl::half2
r = sycl::fma(x, y, z);
303 if (!
hisnan(
r.s0()) &&
r.s0() < 0.f)
305 if (!
hisnan(
r.s1()) &&
r.s1() < 0.f)
#define __SYCL_INLINE_VER_NAMESPACE(X)
sycl::half habs(sycl::half x)
sycl::half2 hfma2_relu(sycl::half2 x, sycl::half2 y, sycl::half2 z)
sycl::half hfma_sat(sycl::half x, sycl::half y, sycl::half z)
bool hbleu2(sycl::half2 x, sycl::half2 y)
bool hbgtu2(sycl::half2 x, sycl::half2 y)
sycl::half2 hlt2(sycl::half2 x, sycl::half2 y)
bool hequ(sycl::half x, sycl::half y)
sycl::half hfma(sycl::half x, sycl::half y, sycl::half z)
sycl::half2 hne2(sycl::half2 x, sycl::half2 y)
bool hleu(sycl::half x, sycl::half y)
sycl::half hmul(sycl::half x, sycl::half y)
sycl::half hmul_sat(sycl::half x, sycl::half y)
sycl::half2 hmin2(sycl::half2 x, sycl::half2 y)
sycl::half2 hadd2(sycl::half2 x, sycl::half2 y)
bool hisnan(sycl::half y)
sycl::half2 hgeu2(sycl::half2 x, sycl::half2 y)
sycl::half2 hmax2(sycl::half2 x, sycl::half2 y)
bool hble2(sycl::half2 x, sycl::half2 y)
sycl::half2 hisnan2(sycl::half2 x)
bool hbneu2(sycl::half2 x, sycl::half2 y)
bool hbgt2(sycl::half2 x, sycl::half2 y)
sycl::half2 hsub2(sycl::half2 x, sycl::half2 y)
sycl::half hsub_sat(sycl::half x, sycl::half y)
bool hbltu2(sycl::half2 x, sycl::half2 y)
sycl::half2 hfma2(sycl::half2 x, sycl::half2 y, sycl::half2 z)
sycl::half2 hmin2_nan(sycl::half2 x, sycl::half2 y)
bool hgt(sycl::half x, sycl::half y)
sycl::half2 hneg2(sycl::half2 x)
sycl::half hmax_nan(sycl::half x, sycl::half y)
sycl::half hsub(sycl::half x, sycl::half y)
sycl::half2 hfma2_sat(sycl::half2 x, sycl::half2 y, sycl::half2 z)
sycl::half2 hmul2(sycl::half2 x, sycl::half2 y)
sycl::half2 hleu2(sycl::half2 x, sycl::half2 y)
sycl::half hmax(sycl::half x, sycl::half y)
bool hbequ2(sycl::half2 x, sycl::half2 y)
bool heq(sycl::half x, sycl::half y)
sycl::half2 hle2(sycl::half2 x, sycl::half2 y)
bool hltu(sycl::half x, sycl::half y)
sycl::half2 hequ2(sycl::half2 x, sycl::half2 y)
bool hisinf(sycl::half x)
sycl::half2 hgtu2(sycl::half2 x, sycl::half2 y)
sycl::half hadd_sat(sycl::half x, sycl::half y)
sycl::half2 hltu2(sycl::half2 x, sycl::half2 y)
sycl::half2 hgt2(sycl::half2 x, sycl::half2 y)
sycl::half2 hsub2_sat(sycl::half2 x, sycl::half2 y)
bool hle(sycl::half x, sycl::half y)
sycl::half hneg(sycl::half x)
bool hbge2(sycl::half2 x, sycl::half2 y)
sycl::half hfma_relu(sycl::half x, sycl::half y, sycl::half z)
sycl::half2 hmax2_nan(sycl::half2 x, sycl::half2 y)
bool hgtu(sycl::half x, sycl::half y)
sycl::half2 hge2(sycl::half2 x, sycl::half2 y)
sycl::half2 h2div(sycl::half2 x, sycl::half2 y)
bool hlt(sycl::half x, sycl::half y)
bool hgeu(sycl::half x, sycl::half y)
sycl::half hmin_nan(sycl::half x, sycl::half y)
sycl::half2 hmul2_sat(sycl::half2 x, sycl::half2 y)
bool hblt2(sycl::half2 x, sycl::half2 y)
bool hbeq2(sycl::half2 x, sycl::half2 y)
sycl::half hadd(sycl::half x, sycl::half y)
sycl::half2 hadd2_sat(sycl::half2 x, sycl::half2 y)
sycl::half hmin(sycl::half x, sycl::half y)
bool hbne2(sycl::half2 x, sycl::half2 y)
sycl::half2 hcmadd(sycl::half2 x, sycl::half2 y, sycl::half2 z)
sycl::half hdiv(sycl::half x, sycl::half y)
sycl::half2 heq2(sycl::half2 x, sycl::half2 y)
sycl::half2 hneu2(sycl::half2 x, sycl::half2 y)
bool hne(sycl::half x, sycl::half y)
sycl::half2 habs2(sycl::half2 x)
bool hge(sycl::half x, sycl::half y)
bool hneu(sycl::half x, sycl::half y)
bool hbgeu2(sycl::half2 x, sycl::half2 y)
std::enable_if_t< std::is_same< T, bfloat16 >::value, bool > isnan(T x)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fabs(T x)
sycl::detail::half_impl::half half
---— Error handling, matching OpenCL plugin semantics.