38 #include <type_traits>
42 inline namespace _V1 {
85 template <
class F,
class T =
void>
86 using EnableIfFP =
typename std::enable_if_t<std::is_same_v<F, float> ||
87 std::is_same_v<F, double> ||
88 std::is_same_v<F, half>,
92 sycl::access::target::device>;
101 sycl::access::target::device>;
115 return ((
static_cast<unsigned>(
static_cast<uint8_t
>(GlobalFlushBuf[WIOffset]))
117 static_cast<uint8_t
>(GlobalFlushBuf[WIOffset + 1]));
126 unsigned WIOffset,
unsigned Offset) {
127 GlobalFlushBuf[WIOffset] =
static_cast<char>((Offset >> 8) & 0xff);
128 GlobalFlushBuf[WIOffset + 1] =
static_cast<char>(Offset & 0xff);
132 unsigned WIOffset,
const char *Str,
unsigned Len,
133 unsigned Padding = 0) {
137 if ((Offset + Len + Padding > FlushBufferSize) ||
138 (WIOffset + Offset + Len + Padding > GlobalFlushBuf.size()))
143 for (
size_t I = 0; I < Padding; ++I, ++Offset)
144 GlobalFlushBuf[WIOffset + Offset] =
' ';
146 for (
size_t I = 0; I < Len; ++I, ++Offset) {
147 GlobalFlushBuf[WIOffset + Offset] = Str[I];
165 template <
typename T>
166 inline std::make_unsigned_t<T>
getAbsVal(
const T Val,
const int Base) {
167 return ((Base == 10) && (Val < 0)) ? -Val : Val;
174 return 'a' + Digit - 10;
178 template <
typename T>
179 inline typename std::enable_if_t<std::is_integral_v<T>,
unsigned>
181 unsigned NumDigits = 0;
192 template <
typename T>
193 inline typename std::enable_if_t<std::is_integral_v<T>,
unsigned>
194 ScalarToStr(
const T &Val,
char *Buf,
unsigned Flags,
int,
int Precision = -1) {
219 if ((Flags &
ShowPos) && Val >= 0)
226 if (Base != 10 && (Flags &
ShowBase)) {
234 const unsigned NumBuf =
integralToBase(AbsVal, Base, Buf + Offset);
237 return Offset + NumBuf;
240 inline unsigned append(
char *Dst,
const char *Src) {
242 for (; Src[Len] !=
'\0'; ++Len)
245 for (
unsigned I = 0; I < Len; ++I)
256 return Internal.I32Val;
259 inline unsigned long long D2I64(
double Val) {
262 unsigned long long I64Val;
265 return Internal.I64Val;
268 template <
typename T>
269 inline typename detail::enable_if_t<
270 std::is_same<T, float>::value || std::is_same<T, double>::value,
bool>
272 if constexpr (
sizeof(Val) == 4) {
273 return (
F2I32(Val) & 0x7fffffff) == 0x7f800000;
274 }
else if constexpr (
sizeof(Val) == 8) {
275 return (
D2I64(Val) & -1ULL >> 1) == 0x7ffULL << 52;
281 template <
typename T>
282 inline typename detail::enable_if_t<
283 std::is_same<T, float>::value || std::is_same<T, double>::value,
bool>
285 if constexpr (
sizeof(Val) == 4) {
286 return (
F2I32(Val) & 0x7fffffff) > 0x7f800000;
287 }
else if constexpr (
sizeof(Val) == 8) {
288 return (
D2I64(Val) & -1ULL >> 1) > 0x7ffULL << 52;
294 template <
typename T>
295 inline typename detail::enable_if_t<
296 std::is_same<T, float>::value || std::is_same<T, double>::value,
bool>
298 if constexpr (
sizeof(Val) == 4) {
299 return F2I32(Val) >> 31;
300 }
else if constexpr (
sizeof(Val) == 8) {
301 return D2I64(Val) >> 63;
307 template <
typename T>
308 typename detail::enable_if_t<
309 std::is_same<T, float>::value || std::is_same<T, double>::value,
unsigned>
316 return append(Buf,
"nan");
324 return append(Buf,
"-inf");
325 return append(Buf,
"inf");
330 template <
typename T>
331 inline typename std::enable_if_t<std::is_same_v<T, half>,
unsigned>
334 return append(Buf,
"nan");
337 const uint16_t Sign =
reinterpret_cast<uint16_t &
>(Val) & 0x8000;
339 const uint16_t Exp16 = (
reinterpret_cast<uint16_t &
>(Val) & 0x7c00) >> 10;
343 return append(Buf,
"-inf");
344 return append(Buf,
"inf");
349 template <
typename T>
351 int Precision,
bool IsSci) {
355 while (AbsVal >= T{10.0}) {
360 while (AbsVal > T{0.0} && AbsVal < T{1.0}) {
365 auto IntegralPart =
static_cast<int>(AbsVal);
366 auto FractionPart = AbsVal - IntegralPart;
371 int P = Precision > 0 ? Precision : 4;
372 size_t FractionLength = Exp + P;
381 for (
unsigned I = 0; I < FractionLength; ++I) {
382 FractionPart *= T{10.0};
383 FractionDigits[I] =
static_cast<int>(FractionPart);
384 FractionPart -=
static_cast<int>(FractionPart);
387 int Carry = FractionPart >
static_cast<T
>(0.5) ? 1 : 0;
390 for (
int I = FractionLength - 1; I >= 0 && Carry; --I) {
391 auto Digit = FractionDigits[I] + Carry;
392 FractionDigits[I] = Digit % 10;
397 IntegralPart += Carry;
398 if (IntegralPart == 10) {
409 Digits[Offset++] =
'.';
412 for (
unsigned I = 0; I < FractionLength; ++I)
415 auto AbsExp = Exp < 0 ? -Exp : Exp;
417 Digits[Offset++] =
'e';
418 Digits[Offset++] = Exp >= 0 ?
'+' :
'-';
423 Digits[Offset++] =
'0';
424 Digits[Offset++] =
'.';
426 Digits[Offset++] =
'0';
432 for (
unsigned I = 0; I < FractionLength; ++I)
439 for (; I < FractionLength && Exp--; ++I)
443 Digits[Offset++] =
'.';
446 for (; I < FractionLength; ++I)
451 while (Digits[Offset - 1] ==
'0')
455 if (Digits[Offset - 1] ==
'.')
462 template <
typename T>
463 inline EnableIfFP<T, unsigned>
464 ScalarToStr(
const T &Val,
char *Buf,
unsigned Flags,
int,
int Precision = -1) {
470 auto AbsVal = Val < 0 ? Neg : Val;
488 template <
typename T>
489 inline typename std::enable_if_t<std::is_integral_v<T>>
491 unsigned WIOffset,
unsigned Flags,
int Width,
const T &Val) {
493 unsigned Len =
ScalarToStr(Val, Digits, Flags, Width);
494 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
495 (Width > 0 &&
static_cast<unsigned>(Width) > Len)
496 ?
static_cast<unsigned>(Width) - Len
500 template <
typename T>
503 unsigned WIOffset,
unsigned Flags,
int Width,
int Precision,
506 unsigned Len =
ScalarToStr(Val, Digits, Flags, Width, Precision);
507 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
508 (Width > 0 &&
static_cast<unsigned>(Width) > Len)
509 ?
static_cast<unsigned>(Width) - Len
520 Cur = GlobalOffset[0].load();
522 if (GlobalBuf.get_range().size() - Cur < Size)
526 }
while (!GlobalOffset[0].compare_exchange_strong(Cur, New));
538 if (!
updateOffset(GlobalOffset, GlobalBuf, Offset, Cur))
542 for (
unsigned I = StmtOffset; I < StmtOffset + Offset; I++) {
543 GlobalBuf[Cur++] = GlobalFlushBuf[I];
549 template <
typename T,
int VecLength>
550 typename std::enable_if_t<(VecLength == 1),
unsigned>
553 return ScalarToStr(
static_cast<T
>(Vec.x()), VecStr, Flags, Width, Precision);
556 template <
typename T,
int VecLength>
557 typename std::enable_if_t<(VecLength == 2 || VecLength == 4 || VecLength == 8 ||
563 VecToStr<T, VecLength / 2>(Vec.lo(), VecStr, Flags, Width, Precision);
565 Len +=
VecToStr<T, VecLength / 2>(Vec.hi(), VecStr + Len, Flags, Width,
570 template <
typename T,
int VecLength>
571 typename std::enable_if_t<(VecLength == 3),
unsigned>
574 unsigned Len = VecToStr<T, 2>(Vec.lo(), VecStr, Flags, Width, Precision);
576 Len += VecToStr<T, 1>(Vec.z(), VecStr + Len, Flags, Width, Precision);
580 template <
typename T,
int VecLength>
582 unsigned WIOffset,
unsigned Flags,
int Width,
585 constexpr
size_t MAX_VEC_SIZE =
587 char VecStr[MAX_VEC_SIZE] = {0};
588 unsigned Len = VecToStr<T, VecLength>(Vec, VecStr, Flags, Width, Precision);
589 write(GlobalFlushBuf, FlushBufferSize, WIOffset, VecStr, Len,
590 (Width > 0 && Width > Len) ? Width - Len : 0);
593 template <
int ArrayLength>
598 for (
int I = 0; I < ArrayLength; ++I) {
600 if (I != ArrayLength - 1)
609 template <
int ArrayLength>
611 size_t FlushBufferSize,
unsigned WIOffset,
615 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
618 template <
int Dimensions>
620 size_t FlushBufferSize,
unsigned WIOffset,
626 Len +=
append(Buf,
"item(");
627 Len +=
append(Buf + Len,
"range: ");
629 Len +=
append(Buf + Len,
", id: ");
631 Len +=
append(Buf + Len,
", offset: ");
634 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
637 template <
int Dimensions>
639 size_t FlushBufferSize,
unsigned WIOffset,
645 Len +=
append(Buf,
"nd_range(");
646 Len +=
append(Buf + Len,
"global_range: ");
648 Len +=
append(Buf + Len,
", local_range: ");
650 Len +=
append(Buf + Len,
", offset: ");
653 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
656 template <
int Dimensions>
658 size_t FlushBufferSize,
unsigned WIOffset,
664 Len +=
append(Buf,
"nd_item(");
665 Len +=
append(Buf + Len,
"global_id: ");
667 Len +=
append(Buf + Len,
", local_id: ");
670 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
673 template <
int Dimensions>
675 size_t FlushBufferSize,
unsigned WIOffset,
681 Len +=
append(Buf,
"group(");
682 Len +=
append(Buf + Len,
"id: ");
684 Len +=
append(Buf + Len,
", global_range: ");
685 Len +=
ArrayToStr(Buf + Len, Group.get_global_range());
686 Len +=
append(Buf + Len,
", local_range: ");
687 Len +=
ArrayToStr(Buf + Len, Group.get_local_range());
688 Len +=
append(Buf + Len,
", group_range: ");
689 Len +=
ArrayToStr(Buf + Len, Group.get_group_range());
691 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
698 template <
int Dimensions>
701 Len +=
append(Buf,
"item(");
702 for (
int I = 0; I < 2; ++I) {
703 Len +=
append(Buf + Len, I == 0 ?
"range: " :
", id: ");
710 template <
int Dimensions>
712 size_t FlushBufferSize,
unsigned WIOffset,
718 Len +=
append(Buf,
"h_item(");
719 for (
int I = 0; I < 3; ++I) {
720 Len +=
append(Buf + Len, I == 0 ?
"\n global "
721 : I == 1 ?
"\n logical local "
722 :
"\n physical local ");
727 Len +=
append(Buf + Len,
"\n)");
728 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
733 template <
typename VecT,
typename OperationLeftT,
typename OperationRightT,
734 template <
typename>
class OperationCurrentT,
int... Indexes>
736 VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes...>>
742 template <
typename T>
744 typename std::enable_if_t<IsSwizzleOp<T>::value,
801 friend const stream &
operator<<(
const stream &,
811 int width()
const {
return Width_; }
813 friend const stream &
operator<<(
const stream &,
830 :
public detail::OwnerLessBase<stream> {
832 #ifndef __SYCL_DEVICE_ONLY__
834 stream(std::shared_ptr<detail::stream_impl> Impl,
838 : impl{Impl}, GlobalBuf{GlobalBuf}, GlobalOffset{GlobalOffset},
839 GlobalFlushBuf{GlobalFlushBuf} {}
843 #ifdef __SYCL_DEVICE_ONLY__
849 stream(
size_t BufferSize,
size_t MaxStatementSize, handler &CGH);
853 stream(
size_t BufferSize,
size_t MaxStatementSize, handler &CGH,
854 const property_list &PropList);
856 #ifdef __SYCL_DEVICE_ONLY__
860 size_t size() const noexcept {
return GlobalBuf.size(); }
862 size_t get_work_item_buffer_size()
const {
867 "get_size() is deprecated since SYCL 2020. Please use size() instead.")
868 size_t get_size()
const {
return size(); }
871 "2020. Please use get_work_item_buffer_size() instead.")
872 size_t get_max_statement_size()
const {
return get_work_item_buffer_size(); }
874 size_t size() const noexcept;
876 size_t get_work_item_buffer_size() const;
879 "get_size() is deprecated since SYCL 2020. Please
use size()
instead.")
880 size_t get_size() const;
883 "2020. Please
use get_work_item_buffer_size()
instead.")
884 size_t get_max_statement_size() const;
887 size_t get_precision()
const {
return Precision; }
889 size_t get_width()
const {
return Width; }
897 template <
typename propertyT>
bool has_property() const noexcept;
899 template <typename propertyT> propertyT
get_property() const;
902 #ifdef __SYCL_DEVICE_ONLY__
903 char padding[
sizeof(std::shared_ptr<detail::stream_impl>)];
905 std::shared_ptr<detail::stream_impl> impl;
928 mutable unsigned WIOffset = 0;
930 mutable size_t FlushBufferSize;
938 mutable int Precision = -1;
939 mutable int Width = -1;
942 void set_flag(
FmtFlags FormatFlag)
const { Flags |= FormatFlag; }
944 void unset_flag(
FmtFlags FormatFlag)
const { Flags &= ~FormatFlag; }
946 FmtFlags get_flags()
const {
return Flags; }
953 Flags |= FormatFlag & Mask;
998 #ifdef __SYCL_DEVICE_ONLY__
1000 range<detail::GlobalBufDim> GlobalBufAccRange,
1001 range<detail::GlobalBufDim> GlobalBufMemRange,
1002 id<detail::GlobalBufDim> GlobalBufId,
1004 range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
1005 range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
1006 id<detail::GlobalOffsetDim> GlobalOffsetId,
1008 range<detail::GlobalBufDim> GlobalFlushAccRange,
1009 range<detail::GlobalBufDim> GlobalFlushMemRange,
1010 id<detail::GlobalBufDim> GlobalFlushId,
size_t _FlushBufferSize) {
1011 GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
1013 GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
1014 GlobalOffsetMemRange, GlobalOffsetId);
1015 GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
1016 GlobalFlushMemRange, GlobalFlushId);
1017 FlushBufferSize = _FlushBufferSize;
1023 WIOffset = GlobalOffset[1].fetch_add(FlushBufferSize);
1039 flushBuffer(GlobalOffset, GlobalBuf, GlobalFlushBuf, WIOffset);
1043 friend class handler;
1045 template <
typename SYCLObjT>
friend class ext::oneapi::weak_object;
1047 friend const stream &
operator<<(
const stream &,
const char);
1048 friend const stream &
operator<<(
const stream &,
const char *);
1049 template <
typename ValueType>
1051 typename std::enable_if_t<std::is_integral_v<ValueType>,
const stream &>
1052 operator<<(
const stream &,
const ValueType &);
1053 friend const stream &
operator<<(
const stream &,
const float &);
1054 friend const stream &
operator<<(
const stream &,
const double &);
1059 friend const stream &
operator<<(
const stream &Out,
1060 const __precision_manipulator__ &RHS);
1062 friend const stream &
operator<<(
const stream &Out,
1063 const __width_manipulator__ &RHS);
1064 template <
typename T,
int Dimensions>
1065 friend const stream &
operator<<(
const stream &Out,
1066 const vec<T, Dimensions> &RHS);
1067 template <
typename T>
1068 friend const stream &
operator<<(
const stream &Out,
const T *RHS);
1069 template <
int Dimensions>
1070 friend const stream &
operator<<(
const stream &Out,
const id<Dimensions> &RHS);
1072 template <
int Dimensions>
1073 friend const stream &
operator<<(
const stream &Out,
1074 const range<Dimensions> &RHS);
1076 template <
int Dimensions>
1077 friend const stream &
operator<<(
const stream &Out,
1078 const item<Dimensions> &RHS);
1080 template <
int Dimensions>
1081 friend const stream &
operator<<(
const stream &Out,
1082 const nd_range<Dimensions> &RHS);
1084 template <
int Dimensions>
1085 friend const stream &
operator<<(
const stream &Out,
1086 const nd_item<Dimensions> &RHS);
1088 template <
int Dimensions>
1089 friend const stream &
operator<<(
const stream &Out,
1090 const group<Dimensions> &RHS);
1092 template <
int Dimensions>
1093 friend const stream &
operator<<(
const stream &Out,
1094 const h_item<Dimensions> &RHS);
1097 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1099 template <
typename T>
1100 inline std::enable_if_t<std::is_same_v<T, std::byte>,
const stream &>
1102 static_assert(std::is_integral<T>(),
1103 "Convert the byte to a numeric value using std::to_integer");
1108 inline const stream &
operator<<(
const stream &Out,
const char C) {
1109 detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, &C, 1);
1114 inline const stream &
operator<<(
const stream &Out,
const char *Str) {
1116 for (; Str[Len] !=
'\0'; Len++)
1119 detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Str,
1125 inline const stream &
operator<<(
const stream &Out,
const bool &RHS) {
1126 Out << (RHS ?
"true" :
"false");
1131 template <
typename ValueType>
1132 inline typename std::enable_if_t<std::is_integral_v<ValueType>,
const stream &>
1135 Out.get_flags(), Out.get_width(), RHS);
1141 inline const stream &
operator<<(
const stream &Out,
const float &RHS) {
1142 detail::writeFloatingPoint<float>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1143 Out.WIOffset, Out.get_flags(),
1144 Out.get_width(), Out.get_precision(), RHS);
1148 inline const stream &
operator<<(
const stream &Out,
const double &RHS) {
1149 detail::writeFloatingPoint<double>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1150 Out.WIOffset, Out.get_flags(),
1151 Out.get_width(), Out.get_precision(), RHS);
1156 detail::writeFloatingPoint<half>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1157 Out.WIOffset, Out.get_flags(),
1158 Out.get_width(), Out.get_precision(), RHS);
1166 inline const stream &
1173 template <
typename T>
1179 Flags, Out.get_width(),
reinterpret_cast<size_t>(RHS));
1193 Out.Width = RHS.
width();
1202 flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1206 flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1210 Out.set_manipulator(RHS);
1218 template <
typename T,
int VectorLength>
1220 detail::writeVec<T, VectorLength>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1221 Out.WIOffset, Out.get_flags(),
1222 Out.get_width(), Out.get_precision(), RHS);
1228 template <
int Dimensions>
1230 detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1235 template <
int Dimensions>
1238 detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1243 template <
int Dimensions>
1246 detail::writeItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1251 template <
int Dimensions>
1254 detail::writeNDRange<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1259 template <
int Dimensions>
1262 detail::writeNDItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1267 template <
int Dimensions>
1270 detail::writeGroup<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1275 template <
int Dimensions>
1278 detail::writeHItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1283 template <
typename T,
typename RT = detail::EnableIfSwizzleVec<T>>
1284 inline const stream &
operator<<(
const stream &Out,
const T &RHS) {
1293 template <>
struct hash<
sycl::stream> {
1295 #ifdef __SYCL_DEVICE_ONLY__
1299 return hash<std::shared_ptr<sycl::detail::stream_impl>>()(