62 template <
class F,
class T =
void>
63 using EnableIfFP =
typename std::enable_if_t<std::is_same_v<F, float> ||
64 std::is_same_v<F, double> ||
65 std::is_same_v<F, half>,
69 sycl::access::target::global_buffer,
70 sycl::access::placeholder::false_t>;
79 sycl::access::target::global_buffer,
80 sycl::access::placeholder::false_t>;
94 return ((
static_cast<unsigned>(
static_cast<uint8_t
>(GlobalFlushBuf[WIOffset]))
96 static_cast<uint8_t
>(GlobalFlushBuf[WIOffset + 1]));
105 unsigned WIOffset,
unsigned Offset) {
106 GlobalFlushBuf[WIOffset] =
static_cast<char>((Offset >> 8) & 0xff);
107 GlobalFlushBuf[WIOffset + 1] =
static_cast<char>(Offset & 0xff);
111 unsigned WIOffset,
const char *Str,
unsigned Len,
112 unsigned Padding = 0) {
116 if ((Offset + Len + Padding > FlushBufferSize) ||
117 (WIOffset + Offset + Len + Padding > GlobalFlushBuf.size()))
122 for (
size_t I = 0; I < Padding; ++I, ++Offset)
123 GlobalFlushBuf[WIOffset + Offset] =
' ';
125 for (
size_t I = 0; I < Len; ++I, ++Offset) {
126 GlobalFlushBuf[WIOffset + Offset] = Str[I];
144 template <
typename T>
145 inline std::make_unsigned_t<T>
getAbsVal(
const T Val,
const int Base) {
146 return ((Base == 10) && (Val < 0)) ? -Val : Val;
153 return 'a' + Digit - 10;
157 template <
typename T>
158 inline typename std::enable_if_t<std::is_integral_v<T>,
unsigned>
160 unsigned NumDigits = 0;
171 template <
typename T>
172 inline typename std::enable_if_t<std::is_integral_v<T>,
unsigned>
173 ScalarToStr(
const T &Val,
char *Buf,
unsigned Flags,
int,
int Precision = -1) {
198 if ((Flags &
ShowPos) && Val >= 0)
205 if (Base != 10 && (Flags &
ShowBase)) {
213 const unsigned NumBuf =
integralToBase(AbsVal, Base, Buf + Offset);
216 return Offset + NumBuf;
219 inline unsigned append(
char *Dst,
const char *Src) {
221 for (; Src[Len] !=
'\0'; ++Len)
224 for (
unsigned I = 0; I < Len; ++I)
229 static inline unsigned F2I32(
float Val) {
235 return Internal.I32Val;
238 static inline unsigned long long D2I64(
double Val) {
241 unsigned long long I64Val;
244 return Internal.I64Val;
247 template <
typename T>
248 inline typename detail::enable_if_t<
249 std::is_same<T, float>::value || std::is_same<T, double>::value,
bool>
251 if constexpr (
sizeof(Val) == 4) {
252 return (
F2I32(Val) & 0x7fffffff) == 0x7f800000;
253 }
else if constexpr (
sizeof(Val) == 8) {
254 return (
D2I64(Val) & -1ULL >> 1) == 0x7ffULL << 52;
260 template <
typename T>
261 inline typename detail::enable_if_t<
262 std::is_same<T, float>::value || std::is_same<T, double>::value,
bool>
264 if constexpr (
sizeof(Val) == 4) {
265 return (
F2I32(Val) & 0x7fffffff) > 0x7f800000;
266 }
else if constexpr (
sizeof(Val) == 8) {
267 return (
D2I64(Val) & -1ULL >> 1) > 0x7ffULL << 52;
273 template <
typename T>
274 inline typename detail::enable_if_t<
275 std::is_same<T, float>::value || std::is_same<T, double>::value,
bool>
277 if constexpr (
sizeof(Val) == 4) {
278 return F2I32(Val) >> 31;
279 }
else if constexpr (
sizeof(Val) == 8) {
280 return D2I64(Val) >> 63;
286 template <
typename T>
287 typename detail::enable_if_t<
288 std::is_same<T, float>::value || std::is_same<T, double>::value,
unsigned>
295 return append(Buf,
"nan");
303 return append(Buf,
"-inf");
304 return append(Buf,
"inf");
309 template <
typename T>
310 inline typename std::enable_if_t<std::is_same_v<T, half>,
unsigned>
313 return append(Buf,
"nan");
316 const uint16_t Sign =
reinterpret_cast<uint16_t &
>(Val) & 0x8000;
318 const uint16_t Exp16 = (
reinterpret_cast<uint16_t &
>(Val) & 0x7c00) >> 10;
322 return append(Buf,
"-inf");
323 return append(Buf,
"inf");
328 template <
typename T>
330 int Precision,
bool IsSci) {
334 while (AbsVal >= T{10.0}) {
339 while (AbsVal > T{0.0} && AbsVal < T{1.0}) {
344 auto IntegralPart =
static_cast<int>(AbsVal);
345 auto FractionPart = AbsVal - IntegralPart;
350 int P = Precision > 0 ? Precision : 4;
351 size_t FractionLength = Exp + P;
360 for (
unsigned I = 0; I < FractionLength; ++I) {
361 FractionPart *= T{10.0};
362 FractionDigits[I] =
static_cast<int>(FractionPart);
363 FractionPart -=
static_cast<int>(FractionPart);
366 int Carry = FractionPart >
static_cast<T
>(0.5) ? 1 : 0;
369 for (
int I = FractionLength - 1; I >= 0 && Carry; --I) {
370 auto Digit = FractionDigits[I] + Carry;
371 FractionDigits[I] = Digit % 10;
376 IntegralPart += Carry;
377 if (IntegralPart == 10) {
388 Digits[Offset++] =
'.';
391 for (
unsigned I = 0; I < FractionLength; ++I)
394 auto AbsExp = Exp < 0 ? -Exp : Exp;
396 Digits[Offset++] =
'e';
397 Digits[Offset++] = Exp >= 0 ?
'+' :
'-';
402 Digits[Offset++] =
'0';
403 Digits[Offset++] =
'.';
405 Digits[Offset++] =
'0';
411 for (
unsigned I = 0; I < FractionLength; ++I)
418 for (; I < FractionLength && Exp--; ++I)
422 Digits[Offset++] =
'.';
425 for (; I < FractionLength; ++I)
430 while (Digits[Offset - 1] ==
'0')
434 if (Digits[Offset - 1] ==
'.')
441 template <
typename T>
442 inline EnableIfFP<T, unsigned>
443 ScalarToStr(
const T &Val,
char *Buf,
unsigned Flags,
int,
int Precision = -1) {
449 auto AbsVal = Val < 0 ? Neg : Val;
467 template <
typename T>
468 inline typename std::enable_if_t<std::is_integral_v<T>>
470 unsigned WIOffset,
unsigned Flags,
int Width,
const T &Val) {
472 unsigned Len =
ScalarToStr(Val, Digits, Flags, Width);
473 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
474 (Width > 0 &&
static_cast<unsigned>(Width) > Len)
475 ?
static_cast<unsigned>(Width) - Len
479 template <
typename T>
482 unsigned WIOffset,
unsigned Flags,
int Width,
int Precision,
485 unsigned Len =
ScalarToStr(Val, Digits, Flags, Width, Precision);
486 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
487 (Width > 0 &&
static_cast<unsigned>(Width) > Len)
488 ?
static_cast<unsigned>(Width) - Len
499 Cur = GlobalOffset[0].load();
501 if (GlobalBuf.get_range().size() - Cur < Size)
505 }
while (!GlobalOffset[0].compare_exchange_strong(Cur, New));
517 if (!
updateOffset(GlobalOffset, GlobalBuf, Offset, Cur))
521 for (
unsigned I = StmtOffset; I < StmtOffset + Offset; I++) {
522 GlobalBuf[Cur++] = GlobalFlushBuf[I];
528 template <
typename T,
int VecLength>
529 typename std::enable_if_t<(VecLength == 1),
unsigned>
532 return ScalarToStr(
static_cast<T
>(Vec.x()), VecStr, Flags, Width, Precision);
535 template <
typename T,
int VecLength>
536 typename std::enable_if_t<(VecLength == 2 || VecLength == 4 || VecLength == 8 ||
542 VecToStr<T, VecLength / 2>(Vec.lo(), VecStr, Flags, Width, Precision);
544 Len +=
VecToStr<T, VecLength / 2>(Vec.hi(), VecStr + Len, Flags, Width,
549 template <
typename T,
int VecLength>
550 typename std::enable_if_t<(VecLength == 3),
unsigned>
553 unsigned Len = VecToStr<T, 2>(Vec.lo(), VecStr, Flags, Width, Precision);
555 Len += VecToStr<T, 1>(Vec.z(), VecStr + Len, Flags, Width, Precision);
559 template <
typename T,
int VecLength>
561 unsigned WIOffset,
unsigned Flags,
int Width,
564 constexpr
size_t MAX_VEC_SIZE =
566 char VecStr[MAX_VEC_SIZE] = {0};
567 unsigned Len = VecToStr<T, VecLength>(Vec, VecStr, Flags, Width, Precision);
568 write(GlobalFlushBuf, FlushBufferSize, WIOffset, VecStr, Len,
569 (Width > 0 && Width > Len) ? Width - Len : 0);
572 template <
int ArrayLength>
577 for (
int I = 0; I < ArrayLength; ++I) {
579 if (I != ArrayLength - 1)
588 template <
int ArrayLength>
590 size_t FlushBufferSize,
unsigned WIOffset,
594 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
597 template <
int Dimensions>
599 size_t FlushBufferSize,
unsigned WIOffset,
605 Len +=
append(Buf,
"item(");
606 Len +=
append(Buf + Len,
"range: ");
608 Len +=
append(Buf + Len,
", id: ");
610 Len +=
append(Buf + Len,
", offset: ");
613 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
616 template <
int Dimensions>
618 size_t FlushBufferSize,
unsigned WIOffset,
624 Len +=
append(Buf,
"nd_range(");
625 Len +=
append(Buf + Len,
"global_range: ");
627 Len +=
append(Buf + Len,
", local_range: ");
629 Len +=
append(Buf + Len,
", offset: ");
632 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
635 template <
int Dimensions>
637 size_t FlushBufferSize,
unsigned WIOffset,
643 Len +=
append(Buf,
"nd_item(");
644 Len +=
append(Buf + Len,
"global_id: ");
646 Len +=
append(Buf + Len,
", local_id: ");
649 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
652 template <
int Dimensions>
654 size_t FlushBufferSize,
unsigned WIOffset,
660 Len +=
append(Buf,
"group(");
661 Len +=
append(Buf + Len,
"id: ");
663 Len +=
append(Buf + Len,
", global_range: ");
664 Len +=
ArrayToStr(Buf + Len, Group.get_global_range());
665 Len +=
append(Buf + Len,
", local_range: ");
666 Len +=
ArrayToStr(Buf + Len, Group.get_local_range());
667 Len +=
append(Buf + Len,
", group_range: ");
668 Len +=
ArrayToStr(Buf + Len, Group.get_group_range());
670 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
677 template <
int Dimensions>
680 Len +=
append(Buf,
"item(");
681 for (
int I = 0; I < 2; ++I) {
682 Len +=
append(Buf + Len, I == 0 ?
"range: " :
", id: ");
689 template <
int Dimensions>
691 size_t FlushBufferSize,
unsigned WIOffset,
697 Len +=
append(Buf,
"h_item(");
698 for (
int I = 0; I < 3; ++I) {
699 Len +=
append(Buf + Len, I == 0 ?
"\n global "
700 : I == 1 ?
"\n logical local "
701 :
"\n physical local ");
706 Len +=
append(Buf + Len,
"\n)");
707 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
712 template <
typename VecT,
typename OperationLeftT,
typename OperationRightT,
713 template <
typename>
class OperationCurrentT,
int... Indexes>
715 VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes...>>
717 using T =
typename VecT::element_type;
718 using Type =
typename sycl::vec<
T, (
sizeof...(Indexes))>;
721 template <
typename T>
723 typename std::enable_if_t<IsSwizzleOp<T>::value,
780 friend const stream &
operator<<(
const stream &,
790 int width()
const {
return Width_; }
792 friend const stream &
operator<<(
const stream &,
809 :
public detail::OwnerLessBase<stream> {
811 #ifndef __SYCL_DEVICE_ONLY__
813 stream(std::shared_ptr<detail::stream_impl> Impl,
817 : impl{Impl}, GlobalBuf{GlobalBuf}, GlobalOffset{GlobalOffset},
818 GlobalFlushBuf{GlobalFlushBuf} {}
822 #ifdef __SYCL_DEVICE_ONLY__
828 stream(
size_t BufferSize,
size_t MaxStatementSize, handler &CGH);
832 stream(
size_t BufferSize,
size_t MaxStatementSize, handler &CGH,
833 const property_list &PropList);
835 #ifdef __SYCL_DEVICE_ONLY__
839 size_t size() const noexcept {
return GlobalBuf.size(); }
841 size_t get_work_item_buffer_size()
const {
846 "get_size() is deprecated since SYCL 2020. Please use size() instead.")
847 size_t get_size()
const {
return size(); }
850 "2020. Please use get_work_item_buffer_size() instead.")
851 size_t get_max_statement_size()
const {
return get_work_item_buffer_size(); }
853 size_t size() const noexcept;
855 size_t get_work_item_buffer_size() const;
857 __SYCL2020_DEPRECATED(
858 "get_size() is deprecated since SYCL 2020. Please
use size()
instead.")
859 size_t get_size() const;
861 __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
862 "2020. Please
use get_work_item_buffer_size()
instead.")
863 size_t get_max_statement_size() const;
866 size_t get_precision()
const {
return Precision; }
868 size_t get_width()
const {
return Width; }
876 template <
typename propertyT>
bool has_property() const noexcept;
878 template <typename propertyT> propertyT
get_property() const;
881 #ifdef __SYCL_DEVICE_ONLY__
882 char padding[
sizeof(std::shared_ptr<detail::stream_impl>)];
884 std::shared_ptr<detail::stream_impl> impl;
907 mutable unsigned WIOffset = 0;
909 mutable size_t FlushBufferSize;
917 mutable int Precision = -1;
918 mutable int Width = -1;
921 void set_flag(
FmtFlags FormatFlag)
const { Flags |= FormatFlag; }
923 void unset_flag(
FmtFlags FormatFlag)
const { Flags &= ~FormatFlag; }
925 FmtFlags get_flags()
const {
return Flags; }
932 Flags |= FormatFlag & Mask;
977 #ifdef __SYCL_DEVICE_ONLY__
979 range<detail::GlobalBufDim> GlobalBufAccRange,
980 range<detail::GlobalBufDim> GlobalBufMemRange,
981 id<detail::GlobalBufDim> GlobalBufId,
983 range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
984 range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
985 id<detail::GlobalOffsetDim> GlobalOffsetId,
987 range<detail::GlobalBufDim> GlobalFlushAccRange,
988 range<detail::GlobalBufDim> GlobalFlushMemRange,
989 id<detail::GlobalBufDim> GlobalFlushId,
size_t _FlushBufferSize) {
990 GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
992 GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
993 GlobalOffsetMemRange, GlobalOffsetId);
994 GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
995 GlobalFlushMemRange, GlobalFlushId);
996 FlushBufferSize = _FlushBufferSize;
1002 WIOffset = GlobalOffset[1].fetch_add(FlushBufferSize);
1018 flushBuffer(GlobalOffset, GlobalBuf, GlobalFlushBuf, WIOffset);
1022 friend class handler;
1024 template <
typename SYCLObjT>
friend class ext::oneapi::weak_object;
1026 friend const stream &
operator<<(
const stream &,
const char);
1027 friend const stream &
operator<<(
const stream &,
const char *);
1028 template <
typename ValueType>
1030 typename std::enable_if_t<std::is_integral_v<ValueType>,
const stream &>
1031 operator<<(
const stream &,
const ValueType &);
1032 friend const stream &
operator<<(
const stream &,
const float &);
1033 friend const stream &
operator<<(
const stream &,
const double &);
1038 friend const stream &
operator<<(
const stream &Out,
1039 const __precision_manipulator__ &RHS);
1041 friend const stream &
operator<<(
const stream &Out,
1042 const __width_manipulator__ &RHS);
1043 template <
typename T,
int Dimensions>
1044 friend const stream &
operator<<(
const stream &Out,
1045 const vec<T, Dimensions> &RHS);
1046 template <
typename T>
1047 friend const stream &
operator<<(
const stream &Out,
const T *RHS);
1048 template <
int Dimensions>
1049 friend const stream &
operator<<(
const stream &Out,
const id<Dimensions> &RHS);
1051 template <
int Dimensions>
1052 friend const stream &
operator<<(
const stream &Out,
1053 const range<Dimensions> &RHS);
1055 template <
int Dimensions>
1056 friend const stream &
operator<<(
const stream &Out,
1057 const item<Dimensions> &RHS);
1059 template <
int Dimensions>
1060 friend const stream &
operator<<(
const stream &Out,
1061 const nd_range<Dimensions> &RHS);
1063 template <
int Dimensions>
1064 friend const stream &
operator<<(
const stream &Out,
1065 const nd_item<Dimensions> &RHS);
1067 template <
int Dimensions>
1068 friend const stream &
operator<<(
const stream &Out,
1069 const group<Dimensions> &RHS);
1071 template <
int Dimensions>
1072 friend const stream &
operator<<(
const stream &Out,
1073 const h_item<Dimensions> &RHS);
1076 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1078 template <
typename T>
1079 inline std::enable_if_t<std::is_same_v<T, std::byte>,
const stream &>
1081 static_assert(std::is_integral<T>(),
1082 "Convert the byte to a numeric value using std::to_integer");
1087 inline const stream &
operator<<(
const stream &Out,
const char C) {
1088 detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, &C, 1);
1093 inline const stream &
operator<<(
const stream &Out,
const char *Str) {
1095 for (; Str[Len] !=
'\0'; Len++)
1098 detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Str,
1104 inline const stream &
operator<<(
const stream &Out,
const bool &RHS) {
1105 Out << (RHS ?
"true" :
"false");
1110 template <
typename ValueType>
1111 inline typename std::enable_if_t<std::is_integral_v<ValueType>,
const stream &>
1114 Out.get_flags(), Out.get_width(), RHS);
1120 inline const stream &
operator<<(
const stream &Out,
const float &RHS) {
1121 detail::writeFloatingPoint<float>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1122 Out.WIOffset, Out.get_flags(),
1123 Out.get_width(), Out.get_precision(), RHS);
1127 inline const stream &
operator<<(
const stream &Out,
const double &RHS) {
1128 detail::writeFloatingPoint<double>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1129 Out.WIOffset, Out.get_flags(),
1130 Out.get_width(), Out.get_precision(), RHS);
1135 detail::writeFloatingPoint<half>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1136 Out.WIOffset, Out.get_flags(),
1137 Out.get_width(), Out.get_precision(), RHS);
1145 inline const stream &
1152 template <
typename T>
1158 Flags, Out.get_width(),
reinterpret_cast<size_t>(RHS));
1172 Out.Width = RHS.
width();
1181 flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1185 flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1189 Out.set_manipulator(RHS);
1197 template <
typename T,
int VectorLength>
1199 detail::writeVec<T, VectorLength>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1200 Out.WIOffset, Out.get_flags(),
1201 Out.get_width(), Out.get_precision(), RHS);
1207 template <
int Dimensions>
1209 detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1214 template <
int Dimensions>
1217 detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1222 template <
int Dimensions>
1225 detail::writeItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1230 template <
int Dimensions>
1233 detail::writeNDRange<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1238 template <
int Dimensions>
1241 detail::writeNDItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1246 template <
int Dimensions>
1249 detail::writeGroup<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1254 template <
int Dimensions>
1257 detail::writeHItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1262 template <
typename T,
typename RT = detail::EnableIfSwizzleVec<T>>
1272 template <>
struct hash<
sycl::stream> {
1274 #ifdef __SYCL_DEVICE_ONLY__
1278 return hash<std::shared_ptr<sycl::detail::stream_impl>>()(