DPC++ Runtime
Runtime libraries for oneAPI DPC++
bfloat16.hpp
Go to the documentation of this file.
1 //==--------- bfloat16.hpp ------- SYCL bfloat16 conversion ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/aliases.hpp> // for half
12 #include <sycl/detail/defines_elementary.hpp> // for __DPCPP_SYCL_EXTERNAL
13 #include <sycl/half_type.hpp> // for half
14 
15 #include <stdint.h> // for uint16_t, uint32_t
16 
17 extern "C" __DPCPP_SYCL_EXTERNAL uint16_t
19 extern "C" __DPCPP_SYCL_EXTERNAL float
21 extern "C" __DPCPP_SYCL_EXTERNAL void
22 __devicelib_ConvertFToBF16INTELVec1(const float *, uint16_t *) noexcept;
23 extern "C" __DPCPP_SYCL_EXTERNAL void
24 __devicelib_ConvertBF16ToFINTELVec1(const uint16_t *, float *) noexcept;
25 extern "C" __DPCPP_SYCL_EXTERNAL void
26 __devicelib_ConvertFToBF16INTELVec2(const float *, uint16_t *) noexcept;
27 extern "C" __DPCPP_SYCL_EXTERNAL void
28 __devicelib_ConvertBF16ToFINTELVec2(const uint16_t *, float *) noexcept;
29 extern "C" __DPCPP_SYCL_EXTERNAL void
30 __devicelib_ConvertFToBF16INTELVec3(const float *, uint16_t *) noexcept;
31 extern "C" __DPCPP_SYCL_EXTERNAL void
32 __devicelib_ConvertBF16ToFINTELVec3(const uint16_t *, float *) noexcept;
33 extern "C" __DPCPP_SYCL_EXTERNAL void
34 __devicelib_ConvertFToBF16INTELVec4(const float *, uint16_t *) noexcept;
35 extern "C" __DPCPP_SYCL_EXTERNAL void
36 __devicelib_ConvertBF16ToFINTELVec4(const uint16_t *, float *) noexcept;
37 extern "C" __DPCPP_SYCL_EXTERNAL void
38 __devicelib_ConvertFToBF16INTELVec8(const float *, uint16_t *) noexcept;
39 extern "C" __DPCPP_SYCL_EXTERNAL void
40 __devicelib_ConvertBF16ToFINTELVec8(const uint16_t *, float *) noexcept;
41 extern "C" __DPCPP_SYCL_EXTERNAL void
43 extern "C" __DPCPP_SYCL_EXTERNAL void
45 
46 namespace sycl {
47 inline namespace _V1 {
48 namespace ext::oneapi {
49 
50 class bfloat16;
51 
52 namespace detail {
53 using Bfloat16StorageT = uint16_t;
56 // Class to convert different data types to Bfloat16
57 // with different rounding modes.
58 class ConvertToBfloat16;
59 
60 template <int N> void BF16VecToFloatVec(const bfloat16 src[N], float dst[N]) {
61 #if defined(__SYCL_DEVICE_ONLY__) && (defined(__SPIR__) || defined(__SPIRV__))
62  const uint16_t *src_i16 = sycl::bit_cast<const uint16_t *>(src);
63  if constexpr (N == 1)
65  else if constexpr (N == 2)
67  else if constexpr (N == 3)
69  else if constexpr (N == 4)
71  else if constexpr (N == 8)
73  else if constexpr (N == 16)
75 #else
76  for (int i = 0; i < N; ++i) {
77  dst[i] = (float)src[i];
78  }
79 #endif
80 }
81 } // namespace detail
82 
83 class bfloat16 {
84 protected:
86 
87  friend inline detail::Bfloat16StorageT
89  friend inline bfloat16
92 
93 public:
94  bfloat16() = default;
95  constexpr bfloat16(const bfloat16 &) = default;
96  constexpr bfloat16(bfloat16 &&) = default;
97  constexpr bfloat16 &operator=(const bfloat16 &rhs) = default;
98  ~bfloat16() = default;
99 
100 private:
101  static detail::Bfloat16StorageT from_float_fallback(const float &a) {
102  // We don't call sycl::isnan because we don't want a data type to depend on
103  // builtins.
104  if (a != a)
105  return 0xffc1;
106 
107  union {
108  uint32_t intStorage;
109  float floatValue;
110  };
111  floatValue = a;
112  // Do RNE and truncate
113  uint32_t roundingBias = ((intStorage >> 16) & 0x1) + 0x00007FFF;
114  return static_cast<uint16_t>((intStorage + roundingBias) >> 16);
115  }
116 
117  // Explicit conversion functions
118  static detail::Bfloat16StorageT from_float(const float &a) {
119 #if defined(__SYCL_DEVICE_ONLY__)
120 #if defined(__NVPTX__)
121 #if (__SYCL_CUDA_ARCH__ >= 800)
123  asm("cvt.rn.bf16.f32 %0, %1;" : "=h"(res) : "f"(a));
124  return res;
125 #else
126  return from_float_fallback(a);
127 #endif
128 #elif defined(__AMDGCN__)
129  return from_float_fallback(a);
130 #else
132 #endif
133 #endif
134  return from_float_fallback(a);
135  }
136 
137  static float to_float(const detail::Bfloat16StorageT &a) {
138 #if defined(__SYCL_DEVICE_ONLY__) && (defined(__SPIR__) || defined(__SPIRV__))
140 #else
141  union {
142  uint32_t intStorage;
143  float floatValue;
144  };
145  intStorage = a << 16;
146  return floatValue;
147 #endif
148  }
149 
150 protected:
151  friend class sycl::vec<bfloat16, 1>;
152  friend class sycl::vec<bfloat16, 2>;
153  friend class sycl::vec<bfloat16, 3>;
154  friend class sycl::vec<bfloat16, 4>;
155  friend class sycl::vec<bfloat16, 8>;
156  friend class sycl::vec<bfloat16, 16>;
157 
158 public:
159  // Implicit conversion from float to bfloat16
160  bfloat16(const float &a) { value = from_float(a); }
161 
162  bfloat16 &operator=(const float &rhs) {
163  value = from_float(rhs);
164  return *this;
165  }
166 
167  // Implicit conversion from sycl::half to bfloat16
168  bfloat16(const sycl::half &a) { value = from_float(a); }
169 
171  value = from_float(rhs);
172  return *this;
173  }
174 
175  // Implicit conversion from bfloat16 to float
176  operator float() const { return to_float(value); }
177 
178  // Implicit conversion from bfloat16 to sycl::half
179  operator sycl::half() const { return to_float(value); }
180 
181  // Logical operators (!,||,&&) are covered if we can cast to bool
182  explicit operator bool() { return to_float(value) != 0.0f; }
183 
184  // Unary minus operator overloading
185  friend bfloat16 operator-(bfloat16 &lhs) {
186 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__) && \
187  (__SYCL_CUDA_ARCH__ >= 800)
189  asm("neg.bf16 %0, %1;" : "=h"(res) : "h"(lhs.value));
190  return detail::bitsToBfloat16(res);
191 #elif defined(__SYCL_DEVICE_ONLY__) && (defined(__SPIR__) || defined(__SPIRV__))
193 #else
194  return bfloat16{-to_float(lhs.value)};
195 #endif
196  }
197 
198 // Increment and decrement operators overloading
199 #define OP(op) \
200  friend bfloat16 &operator op(bfloat16 & lhs) { \
201  float f = to_float(lhs.value); \
202  lhs.value = from_float(op f); \
203  return lhs; \
204  } \
205  friend bfloat16 operator op(bfloat16 &lhs, int) { \
206  bfloat16 old = lhs; \
207  operator op(lhs); \
208  return old; \
209  }
210  OP(++)
211  OP(--)
212 #undef OP
213 
214  // Assignment operators overloading
215 #define OP(op) \
216  friend bfloat16 &operator op(bfloat16 & lhs, const bfloat16 & rhs) { \
217  float f = static_cast<float>(lhs); \
218  f op static_cast<float>(rhs); \
219  return lhs = f; \
220  }
221  OP(+=)
222  OP(-=)
223  OP(*=)
224  OP(/=)
225 #undef OP
226 
227 // Binary operators overloading
228 #define OP(type, op) \
229  friend type operator op(const bfloat16 &lhs, const bfloat16 &rhs) { \
230  return type{static_cast<float>(lhs) op static_cast<float>(rhs)}; \
231  } \
232  template <typename T> \
233  friend std::enable_if_t<std::is_convertible_v<T, float>, type> operator op( \
234  const bfloat16 & lhs, const T & rhs) { \
235  return type{static_cast<float>(lhs) op static_cast<float>(rhs)}; \
236  } \
237  template <typename T> \
238  friend std::enable_if_t<std::is_convertible_v<T, float>, type> operator op( \
239  const T & lhs, const bfloat16 & rhs) { \
240  return type{static_cast<float>(lhs) op static_cast<float>(rhs)}; \
241  }
242  OP(bfloat16, +)
243  OP(bfloat16, -)
244  OP(bfloat16, *)
245  OP(bfloat16, /)
246  OP(bool, ==)
247  OP(bool, !=)
248  OP(bool, <)
249  OP(bool, >)
250  OP(bool, <=)
251  OP(bool, >=)
252 #undef OP
253 
254  // Bitwise(|,&,~,^), modulo(%) and shift(<<,>>) operations are not supported
255  // for floating-point types.
256 
257  // Stream Operator << and >>
258  inline friend std::ostream &operator<<(std::ostream &O, bfloat16 const &rhs) {
259  O << static_cast<float>(rhs);
260  return O;
261  }
262 
263  inline friend std::istream &operator>>(std::istream &I, bfloat16 &rhs) {
264  float ValFloat = 0.0f;
265  I >> ValFloat;
266  rhs = ValFloat;
267  return I;
268  }
269 };
270 
271 namespace detail {
272 
273 template <int N> void FloatVecToBF16Vec(float src[N], bfloat16 dst[N]) {
274 #if defined(__SYCL_DEVICE_ONLY__) && (defined(__SPIR__) || defined(__SPIRV__))
275  uint16_t *dst_i16 = sycl::bit_cast<uint16_t *>(dst);
276  if constexpr (N == 1)
278  else if constexpr (N == 2)
280  else if constexpr (N == 3)
282  else if constexpr (N == 4)
284  else if constexpr (N == 8)
286  else if constexpr (N == 16)
288 #else
289  for (int i = 0; i < N; ++i) {
290  // No need to cast as bfloat16 has a assignment op overload that takes
291  // a float.
292  dst[i] = src[i];
293  }
294 #endif
295 }
296 
297 // Helper function for getting the internal representation of a bfloat16.
299  return Value.value;
300 }
301 
302 // Helper function for creating a float16 from a value with the same type as the
303 // internal representation.
305  bfloat16 res;
306  res.value = Value;
307  return res;
308 }
309 
310 // Class to convert different data types to Bfloat16
311 // with different rounding modes.
313 
314  // The automatic rounding mode is RTE.
315  enum SYCLRoundingMode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };
316 
317  // Function to get the most significant bit position of a number.
318  template <typename Ty> static size_t get_msb_pos(const Ty &x) {
319  assert(x != 0);
320  size_t idx = 0;
321  Ty mask = ((Ty)1 << (sizeof(Ty) * 8 - 1));
322  for (idx = 0; idx < (sizeof(Ty) * 8); ++idx) {
323  if ((x & mask) == mask)
324  break;
325  mask >>= 1;
326  }
327 
328  return (sizeof(Ty) * 8 - 1 - idx);
329  }
330 
331  // Helper function to get BF16 from float with different rounding modes.
332  // Reference:
333  // https://github.com/intel/llvm/blob/sycl/libdevice/imf_bf16.hpp#L30
334  static bfloat16
335  getBFloat16FromFloatWithRoundingMode(const float &f,
336  SYCLRoundingMode roundingMode) {
337 
338  if (roundingMode == SYCLRoundingMode::automatic ||
339  roundingMode == SYCLRoundingMode::rte) {
340  // Use the default rounding mode.
341  return bfloat16{f};
342  } else {
343  uint32_t u32_val = sycl::bit_cast<uint32_t>(f);
344  uint16_t bf16_sign = static_cast<uint16_t>((u32_val >> 31) & 0x1);
345  uint16_t bf16_exp = static_cast<uint16_t>((u32_val >> 23) & 0x7FF);
346  uint32_t f_mant = u32_val & 0x7F'FFFF;
347  uint16_t bf16_mant = static_cast<uint16_t>(f_mant >> 16);
348  // +/-infinity and NAN
349  if (bf16_exp == 0xFF) {
350  if (!f_mant)
351  return bitsToBfloat16(bf16_sign ? 0xFF80 : 0x7F80);
352  else
353  return bitsToBfloat16((bf16_sign << 15) | (bf16_exp << 7) |
354  bf16_mant);
355  }
356 
357  // +/-0
358  if (!bf16_exp && !f_mant) {
359  return bitsToBfloat16(bf16_sign ? 0x8000 : 0x0);
360  }
361 
362  uint16_t mant_discard = static_cast<uint16_t>(f_mant & 0xFFFF);
363  switch (roundingMode) {
364  case SYCLRoundingMode::rtn:
365  if (bf16_sign && mant_discard)
366  bf16_mant++;
367  break;
368  case SYCLRoundingMode::rtz:
369  break;
370  case SYCLRoundingMode::rtp:
371  if (!bf16_sign && mant_discard)
372  bf16_mant++;
373  break;
374 
375  // Should not reach here. Adding these just to suppress the warning.
377  case SYCLRoundingMode::rte:
378  break;
379  }
380 
381  // if overflow happens, bf16_exp will be 0xFF and bf16_mant will be 0,
382  // infinity will be returned.
383  if (bf16_mant == 0x80) {
384  bf16_mant = 0;
385  bf16_exp++;
386  }
387 
388  return bitsToBfloat16((bf16_sign << 15) | (bf16_exp << 7) | bf16_mant);
389  }
390  }
391 
392  // Helper function to get BF16 from unsigned integral data types
393  // with different rounding modes.
394  // Reference:
395  // https://github.com/intel/llvm/blob/sycl/libdevice/imf_bf16.hpp#L302
396  template <typename T>
397  static bfloat16
398  getBFloat16FromUIntegralWithRoundingMode(T &u,
399  SYCLRoundingMode roundingMode) {
400 
401  size_t msb_pos = get_msb_pos(u);
402  // return half representation for 1
403  if (msb_pos == 0)
404  return bitsToBfloat16(0x3F80);
405 
406  T mant = u & ((static_cast<T>(1) << msb_pos) - 1);
407  // Unsigned integral value can be represented by 1.mant * (2^msb_pos),
408  // msb_pos is also the bit number of mantissa, 0 < msb_pos < sizeof(Ty) * 8,
409  // exponent of bfloat16 precision value range is [-126, 127].
410 
411  uint16_t b_exp = msb_pos;
412  uint16_t b_mant;
413 
414  if (msb_pos <= 7) {
415  // No need to round off if we can losslessly fit the input value in
416  // mantissa of bfloat16.
417  mant <<= (7 - msb_pos);
418  b_mant = static_cast<uint16_t>(mant);
419  } else {
420  b_mant = static_cast<uint16_t>(mant >> (msb_pos - 7));
421  T mant_discard = mant & ((static_cast<T>(1) << (msb_pos - 7)) - 1);
422  T mid = static_cast<T>(1) << (msb_pos - 8);
423  switch (roundingMode) {
425  case SYCLRoundingMode::rte:
426  if ((mant_discard > mid) ||
427  ((mant_discard == mid) && ((b_mant & 0x1) == 0x1)))
428  b_mant++;
429  break;
430  case SYCLRoundingMode::rtp:
431  if (mant_discard)
432  b_mant++;
433  break;
434  case SYCLRoundingMode::rtn:
435  case SYCLRoundingMode::rtz:
436  break;
437  }
438  }
439  if (b_mant == 0x80) {
440  b_exp++;
441  b_mant = 0;
442  }
443 
444  b_exp += 127;
445  return bitsToBfloat16((b_exp << 7) | b_mant);
446  }
447 
448  // Helper function to get BF16 from signed integral data types.
449  // Reference:
450  // https://github.com/intel/llvm/blob/sycl/libdevice/imf_bf16.hpp#L353
451  template <typename T>
452  static bfloat16
453  getBFloat16FromSIntegralWithRoundingMode(T &i,
454  SYCLRoundingMode roundingMode) {
455  // Get unsigned type corresponding to T.
456  typedef typename std::make_unsigned_t<T> UTy;
457 
458  uint16_t b_sign = (i >= 0) ? 0 : 0x8000;
459  UTy ui = (i > 0) ? static_cast<UTy>(i) : static_cast<UTy>(-i);
460  size_t msb_pos = get_msb_pos<UTy>(ui);
461  if (msb_pos == 0)
462  return bitsToBfloat16(b_sign ? 0xBF80 : 0x3F80);
463  UTy mant = ui & ((static_cast<UTy>(1) << msb_pos) - 1);
464 
465  uint16_t b_exp = msb_pos;
466  uint16_t b_mant;
467  if (msb_pos <= 7) {
468  mant <<= (7 - msb_pos);
469  b_mant = static_cast<uint16_t>(mant);
470  } else {
471  b_mant = static_cast<uint16_t>(mant >> (msb_pos - 7));
472  T mant_discard = mant & ((static_cast<T>(1) << (msb_pos - 7)) - 1);
473  T mid = static_cast<T>(1) << (msb_pos - 8);
474  switch (roundingMode) {
476  case SYCLRoundingMode::rte:
477  if ((mant_discard > mid) ||
478  ((mant_discard == mid) && ((b_mant & 0x1) == 0x1)))
479  b_mant++;
480  break;
481  case SYCLRoundingMode::rtp:
482  if (mant_discard && !b_sign)
483  b_mant++;
484  break;
485  case SYCLRoundingMode::rtn:
486  if (mant_discard && b_sign)
487  b_mant++;
488  case SYCLRoundingMode::rtz:
489  break;
490  }
491  }
492 
493  if (b_mant == 0x80) {
494  b_exp++;
495  b_mant = 0;
496  }
497  b_exp += 127;
498  return bitsToBfloat16(b_sign | (b_exp << 7) | b_mant);
499  }
500 
501  // Helper function to get BF16 from double with RTE rounding modes.
502  // Reference:
503  // https://github.com/intel/llvm/blob/sycl/libdevice/imf_bf16.hpp#L79
504  static bfloat16 getBFloat16FromDoubleWithRTE(const double &d) {
505 
506  uint64_t u64_val = sycl::bit_cast<uint64_t>(d);
507  int16_t bf16_sign = (u64_val >> 63) & 0x1;
508  uint16_t fp64_exp = static_cast<uint16_t>((u64_val >> 52) & 0x7FF);
509  uint64_t fp64_mant = (u64_val & 0xF'FFFF'FFFF'FFFF);
510  uint16_t bf16_mant;
511  // handling +/-infinity and NAN for double input
512  if (fp64_exp == 0x7FF) {
513  if (!fp64_mant) {
514  return bf16_sign ? 0xFF80 : 0x7F80;
515  } else {
516  // returns a quiet NaN
517  return 0x7FC0;
518  }
519  }
520 
521  // Subnormal double precision is converted to 0
522  if (fp64_exp == 0) {
523  return bf16_sign ? 0x8000 : 0x0;
524  }
525 
526  fp64_exp -= 1023;
527  // handling overflow, convert to +/-infinity
528  if (static_cast<int16_t>(fp64_exp) > 127) {
529  return bf16_sign ? 0xFF80 : 0x7F80;
530  }
531 
532  // handling underflow
533  if (static_cast<int16_t>(fp64_exp) < -133) {
534  return bf16_sign ? 0x8000 : 0x0;
535  }
536 
537  //-133 <= fp64_exp <= 127, 1.signicand * 2^fp64_exp
538  // For these numbers, they are NOT subnormal double-precision numbers but
539  // will turn into subnormal when converting to bfloat16
540  uint64_t discard_bits;
541  if (static_cast<int16_t>(fp64_exp) < -126) {
542  fp64_mant |= 0x10'0000'0000'0000;
543  fp64_mant >>= -126 - static_cast<int16_t>(fp64_exp) - 1;
544  discard_bits = fp64_mant & 0x3FFF'FFFF'FFFF;
545  bf16_mant = static_cast<uint16_t>(fp64_mant >> 46);
546  if (discard_bits > 0x2000'0000'0000 ||
547  ((discard_bits == 0x2000'0000'0000) && ((bf16_mant & 0x1) == 0x1)))
548  bf16_mant += 1;
549  fp64_exp = 0;
550  if (bf16_mant == 0x80) {
551  bf16_mant = 0;
552  fp64_exp = 1;
553  }
554  return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
555  }
556 
557  // For normal value, discard 45 bits from mantissa
558  discard_bits = fp64_mant & 0x1FFF'FFFF'FFFF;
559  bf16_mant = static_cast<uint16_t>(fp64_mant >> 45);
560  if (discard_bits > 0x1000'0000'0000 ||
561  ((discard_bits == 0x1000'0000'0000) && ((bf16_mant & 0x1) == 0x1)))
562  bf16_mant += 1;
563 
564  if (bf16_mant == 0x80) {
565  if (fp64_exp != 127) {
566  bf16_mant = 0;
567  fp64_exp++;
568  } else {
569  return bf16_sign ? 0xFF80 : 0x7F80;
570  }
571  }
572  fp64_exp += 127;
573 
574  return (bf16_sign << 15) | (fp64_exp << 7) | bf16_mant;
575  }
576 
577 public:
578  template <typename Ty, int rm>
579  static bfloat16 getBfloat16WithRoundingMode(const Ty &a) {
580 
581  if (!a)
582  return bfloat16{0.0f};
583 
584  constexpr SYCLRoundingMode roundingMode = static_cast<SYCLRoundingMode>(rm);
585 
586  // Float.
587  if constexpr (std::is_same_v<Ty, float>) {
588  return getBFloat16FromFloatWithRoundingMode(a, roundingMode);
589  }
590  // Double.
591  else if constexpr (std::is_same_v<Ty, double>) {
592  static_assert(
593  roundingMode == SYCLRoundingMode::automatic ||
594  roundingMode == SYCLRoundingMode::rte,
595  "Only automatic/RTE rounding mode is supported for double type.");
596  return getBFloat16FromDoubleWithRTE(a);
597  }
598  // Half
599  else if constexpr (std::is_same_v<Ty, sycl::half>) {
600  // Convert half to float and then convert to bfloat16.
601  // Conversion of half to float is lossless as the latter
602  // have a wider dynamic range.
603  return getBFloat16FromFloatWithRoundingMode(static_cast<float>(a),
604  roundingMode);
605  }
606  // Unsigned integral types.
607  else if constexpr (std::is_integral_v<Ty> && std::is_unsigned_v<Ty>) {
608  return getBFloat16FromUIntegralWithRoundingMode<Ty>(a, roundingMode);
609  }
610  // Signed integral types.
611  else if constexpr (std::is_integral_v<Ty> && std::is_signed_v<Ty>) {
612  return getBFloat16FromSIntegralWithRoundingMode<Ty>(a, roundingMode);
613  } else {
614  static_assert(std::is_integral_v<Ty> || std::is_floating_point_v<Ty>,
615  "Only integral and floating point types are supported.");
616  }
617  }
618 }; // class ConvertToBfloat16.
619 } // namespace detail
620 
621 } // namespace ext::oneapi
622 
623 } // namespace _V1
624 } // namespace sycl
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertFToBF16INTELVec4(const float *, uint16_t *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertFToBF16INTELVec16(const float *, uint16_t *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertFToBF16INTELVec3(const float *, uint16_t *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertBF16ToFINTELVec16(const uint16_t *, float *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertFToBF16INTELVec8(const float *, uint16_t *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertBF16ToFINTELVec1(const uint16_t *, float *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertBF16ToFINTELVec2(const uint16_t *, float *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertFToBF16INTELVec1(const float *, uint16_t *) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertBF16ToFINTELVec3(const uint16_t *, float *) noexcept
__DPCPP_SYCL_EXTERNAL uint16_t __devicelib_ConvertFToBF16INTEL(const float &) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertBF16ToFINTELVec4(const uint16_t *, float *) noexcept
#define OP(op)
Definition: bfloat16.hpp:228
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertFToBF16INTELVec2(const float *, uint16_t *) noexcept
__DPCPP_SYCL_EXTERNAL float __devicelib_ConvertBF16ToFINTEL(const uint16_t &) noexcept
__DPCPP_SYCL_EXTERNAL void __devicelib_ConvertBF16ToFINTELVec8(const uint16_t *, float *) noexcept
bfloat16(const sycl::half &a)
Definition: bfloat16.hpp:168
constexpr bfloat16(const bfloat16 &)=default
friend bfloat16 operator-(bfloat16 &lhs)
Definition: bfloat16.hpp:185
constexpr bfloat16(bfloat16 &&)=default
friend std::istream & operator>>(std::istream &I, bfloat16 &rhs)
Definition: bfloat16.hpp:263
friend std::ostream & operator<<(std::ostream &O, bfloat16 const &rhs)
Definition: bfloat16.hpp:258
bfloat16 & operator=(const sycl::half &rhs)
Definition: bfloat16.hpp:170
constexpr bfloat16 & operator=(const bfloat16 &rhs)=default
bfloat16 & operator=(const float &rhs)
Definition: bfloat16.hpp:162
detail::Bfloat16StorageT value
Definition: bfloat16.hpp:85
static bfloat16 getBfloat16WithRoundingMode(const Ty &a)
Definition: bfloat16.hpp:579
#define __DPCPP_SYCL_EXTERNAL
sycl::ext::oneapi::bfloat16 bfloat16
void FloatVecToBF16Vec(float src[N], bfloat16 dst[N])
Definition: bfloat16.hpp:273
bfloat16 bitsToBfloat16(const Bfloat16StorageT Value)
Definition: bfloat16.hpp:304
void BF16VecToFloatVec(const bfloat16 src[N], float dst[N])
Definition: bfloat16.hpp:60
Bfloat16StorageT bfloat16ToBits(const bfloat16 &Value)
Definition: bfloat16.hpp:298
sycl::detail::half_impl::half half
Definition: aliases.hpp:101
autodecltype(x) x
Definition: access.hpp:18
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324