60 template <
class F,
class T =
void>
63 std::is_same<F, double>::value ||
64 std::is_same<F, half>::value,
68 cl::sycl::access::target::global_buffer,
79 cl::sycl::access::target::global_buffer,
94 return ((
static_cast<unsigned>(
static_cast<uint8_t
>(GlobalFlushBuf[WIOffset]))
96 static_cast<uint8_t
>(GlobalFlushBuf[WIOffset + 1]));
105 unsigned WIOffset,
unsigned Offset) {
106 GlobalFlushBuf[WIOffset] =
static_cast<char>((Offset >> 8) & 0xff);
107 GlobalFlushBuf[WIOffset + 1] =
static_cast<char>(Offset & 0xff);
111 unsigned WIOffset,
const char *Str,
unsigned Len,
112 unsigned Padding = 0) {
116 if ((Offset + Len + Padding > FlushBufferSize) ||
117 (WIOffset + Offset + Len + Padding > GlobalFlushBuf.
size()))
122 for (
size_t I = 0; I < Padding; ++I, ++Offset)
123 GlobalFlushBuf[WIOffset + Offset] =
' ';
125 for (
size_t I = 0; I < Len; ++I, ++Offset) {
126 GlobalFlushBuf[WIOffset + Offset] = Str[I];
144 template <
typename T>
145 inline typename std::make_unsigned<T>::type
getAbsVal(
const T Val,
147 return ((Base == 10) && (Val < 0)) ? -Val : Val;
154 return 'a' + Digit - 10;
158 template <
typename T>
161 unsigned NumDigits = 0;
172 template <
typename T>
174 ScalarToStr(
const T &Val,
char *Buf,
unsigned Flags,
int,
int Precision = -1) {
199 if ((Flags &
ShowPos) && Val >= 0)
206 if (Base != 10 && (Flags &
ShowBase)) {
214 const unsigned NumBuf =
integralToBase(AbsVal, Base, Buf + Offset);
217 return Offset + NumBuf;
220 inline unsigned append(
char *Dst,
const char *Src) {
222 for (; Src[Len] !=
'\0'; ++Len)
225 for (
unsigned I = 0; I < Len; ++I)
230 template <
typename T>
232 std::is_same<T, float>::value || std::is_same<T, double>::value,
unsigned>
235 return append(Buf,
"nan");
238 return append(Buf,
"-inf");
239 return append(Buf,
"inf");
244 template <
typename T>
248 return append(Buf,
"nan");
251 const uint16_t Sign =
reinterpret_cast<uint16_t &
>(Val) & 0x8000;
253 const uint16_t Exp16 = (
reinterpret_cast<uint16_t &
>(Val) & 0x7c00) >> 10;
257 return append(Buf,
"-inf");
258 return append(Buf,
"inf");
263 template <
typename T>
265 int Precision,
bool IsSci) {
269 while (AbsVal >= 10.0) {
274 while (AbsVal > 0.0 && AbsVal < 1.0) {
279 auto IntegralPart =
static_cast<int>(AbsVal);
280 auto FractionPart = AbsVal - IntegralPart;
285 int P = Precision > 0 ? Precision : 4;
286 size_t FractionLength = Exp + P;
295 for (
unsigned I = 0; I < FractionLength; ++I) {
296 FractionPart *= 10.0;
297 FractionDigits[I] =
static_cast<int>(FractionPart);
298 FractionPart -=
static_cast<int>(FractionPart);
301 int Carry = FractionPart >
static_cast<T
>(0.5) ? 1 : 0;
304 for (
int I = FractionLength - 1; I >= 0 && Carry; --I) {
305 auto Digit = FractionDigits[I] + Carry;
306 FractionDigits[I] = Digit % 10;
311 IntegralPart += Carry;
312 if (IntegralPart == 10) {
323 Digits[Offset++] =
'.';
326 for (
unsigned I = 0; I < FractionLength; ++I)
329 auto AbsExp = Exp < 0 ? -Exp : Exp;
331 Digits[Offset++] =
'e';
332 Digits[Offset++] = Exp >= 0 ?
'+' :
'-';
337 Digits[Offset++] =
'0';
338 Digits[Offset++] =
'.';
340 Digits[Offset++] =
'0';
346 for (
unsigned I = 0; I < FractionLength; ++I)
353 for (; I < FractionLength && Exp--; ++I)
357 Digits[Offset++] =
'.';
360 for (; I < FractionLength; ++I)
365 while (Digits[Offset - 1] ==
'0')
369 if (Digits[Offset - 1] ==
'.')
376 template <
typename T>
378 ScalarToStr(
const T &Val,
char *Buf,
unsigned Flags,
int,
int Precision = -1) {
384 auto AbsVal = Val < 0 ? Neg : Val;
402 template <
typename T>
405 unsigned WIOffset,
unsigned Flags,
int Width,
const T &Val) {
407 unsigned Len =
ScalarToStr(Val, Digits, Flags, Width);
408 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
409 (Width > 0 &&
static_cast<unsigned>(Width) > Len)
410 ?
static_cast<unsigned>(Width) - Len
414 template <
typename T>
417 unsigned WIOffset,
unsigned Flags,
int Width,
int Precision,
420 unsigned Len =
ScalarToStr(Val, Digits, Flags, Width, Precision);
421 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
422 (Width > 0 &&
static_cast<unsigned>(Width) > Len)
423 ?
static_cast<unsigned>(Width) - Len
434 Cur = GlobalOffset[0].load();
440 }
while (!GlobalOffset[0].compare_exchange_strong(Cur, New));
452 if (!
updateOffset(GlobalOffset, GlobalBuf, Offset, Cur))
456 for (
unsigned I = StmtOffset; I < StmtOffset + Offset; I++) {
457 GlobalBuf[Cur++] = GlobalFlushBuf[I];
463 template <
typename T,
int VecLength>
467 return ScalarToStr(
static_cast<T
>(Vec.x()), VecStr, Flags, Width, Precision);
470 template <
typename T,
int VecLength>
472 VecLength == 8 || VecLength == 16),
477 VecToStr<T, VecLength / 2>(Vec.lo(), VecStr, Flags, Width, Precision);
479 Len +=
VecToStr<T, VecLength / 2>(Vec.hi(), VecStr + Len, Flags, Width,
484 template <
typename T,
int VecLength>
488 unsigned Len = VecToStr<T, 2>(Vec.lo(), VecStr, Flags, Width, Precision);
490 Len += VecToStr<T, 1>(Vec.z(), VecStr + Len, Flags, Width, Precision);
494 template <
typename T,
int VecLength>
496 unsigned WIOffset,
unsigned Flags,
int Width,
499 constexpr
size_t MAX_VEC_SIZE =
501 char VecStr[MAX_VEC_SIZE] = {0};
502 unsigned Len = VecToStr<T, VecLength>(Vec, VecStr, Flags, Width, Precision);
503 write(GlobalFlushBuf, FlushBufferSize, WIOffset, VecStr, Len,
504 (Width > 0 && Width > Len) ? Width - Len : 0);
507 template <
int ArrayLength>
512 for (
int I = 0; I < ArrayLength; ++I) {
514 if (I != ArrayLength - 1)
523 template <
int ArrayLength>
525 size_t FlushBufferSize,
unsigned WIOffset,
529 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
532 template <
int Dimensions>
534 size_t FlushBufferSize,
unsigned WIOffset,
540 Len +=
append(Buf,
"item(");
541 Len +=
append(Buf + Len,
"range: ");
543 Len +=
append(Buf + Len,
", id: ");
545 Len +=
append(Buf + Len,
", offset: ");
548 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
551 template <
int Dimensions>
553 size_t FlushBufferSize,
unsigned WIOffset,
559 Len +=
append(Buf,
"nd_range(");
560 Len +=
append(Buf + Len,
"global_range: ");
562 Len +=
append(Buf + Len,
", local_range: ");
564 Len +=
append(Buf + Len,
", offset: ");
567 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
570 template <
int Dimensions>
572 size_t FlushBufferSize,
unsigned WIOffset,
578 Len +=
append(Buf,
"nd_item(");
579 Len +=
append(Buf + Len,
"global_id: ");
581 Len +=
append(Buf + Len,
", local_id: ");
584 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
587 template <
int Dimensions>
589 size_t FlushBufferSize,
unsigned WIOffset,
595 Len +=
append(Buf,
"group(");
596 Len +=
append(Buf + Len,
"id: ");
598 Len +=
append(Buf + Len,
", global_range: ");
600 Len +=
append(Buf + Len,
", local_range: ");
602 Len +=
append(Buf + Len,
", group_range: ");
605 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
612 template <
int Dimensions>
615 Len +=
append(Buf,
"item(");
616 for (
int I = 0; I < 2; ++I) {
617 Len +=
append(Buf + Len, I == 0 ?
"range: " :
", id: ");
624 template <
int Dimensions>
626 size_t FlushBufferSize,
unsigned WIOffset,
632 Len +=
append(Buf,
"h_item(");
633 for (
int I = 0; I < 3; ++I) {
634 Len +=
append(Buf + Len, I == 0 ?
"\n global "
635 : I == 1 ?
"\n logical local "
636 :
"\n physical local ");
641 Len +=
append(Buf + Len,
"\n)");
642 write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
647 template <
typename VecT,
typename OperationLeftT,
typename OperationRightT,
648 template <
typename>
class OperationCurrentT,
int... Indexes>
650 VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes...>>
652 using T =
typename VecT::element_type;
656 template <
typename T>
725 int width()
const {
return Width_; }
745 #ifdef __SYCL_DEVICE_ONLY__
751 stream(
size_t BufferSize,
size_t MaxStatementSize,
handler &CGH);
755 stream(
size_t BufferSize,
size_t MaxStatementSize,
handler &CGH,
758 size_t get_size()
const;
760 size_t get_max_statement_size()
const;
772 template <
typename propertyT>
bool has_property() const noexcept;
774 template <typename propertyT> propertyT
get_property() const;
777 #ifdef __SYCL_DEVICE_ONLY__
778 char padding[
sizeof(std::shared_ptr<detail::stream_impl>)];
780 std::shared_ptr<detail::stream_impl> impl;
799 mutable unsigned WIOffset = 0;
804 mutable unsigned Offset = 0;
806 mutable size_t FlushBufferSize;
814 mutable int Precision = -1;
815 mutable int Width = -1;
818 void set_flag(
FmtFlags FormatFlag)
const { Flags |= FormatFlag; }
820 void unset_flag(
FmtFlags FormatFlag)
const { Flags &= ~FormatFlag; }
822 FmtFlags get_flags()
const {
return Flags; }
829 Flags |= FormatFlag & Mask;
874 #ifdef __SYCL_DEVICE_ONLY__
876 range<detail::GlobalBufDim> GlobalBufAccRange,
877 range<detail::GlobalBufDim> GlobalBufMemRange,
878 id<detail::GlobalBufDim> GlobalBufId,
880 range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
881 range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
882 id<detail::GlobalOffsetDim> GlobalOffsetId,
884 range<detail::GlobalBufDim> GlobalFlushAccRange,
885 range<detail::GlobalBufDim> GlobalFlushMemRange,
886 id<detail::GlobalBufDim> GlobalFlushId,
size_t _FlushBufferSize) {
887 GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
889 GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
890 GlobalOffsetMemRange, GlobalOffsetId);
891 GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
892 GlobalFlushMemRange, GlobalFlushId);
893 FlushBufferSize = _FlushBufferSize;
899 WIOffset = GlobalOffset[1].fetch_add(FlushBufferSize);
915 flushBuffer(GlobalOffset, GlobalBuf, GlobalFlushBuf, WIOffset);
923 template <
typename ValueType>
938 template <
typename T,
int Dimensions>
941 template <
typename T>
943 template <
int Dimensions>
946 template <
int Dimensions>
950 template <
int Dimensions>
954 template <
int Dimensions>
958 template <
int Dimensions>
962 template <
int Dimensions>
966 template <
int Dimensions>
971 #if __cplusplus >= 201703L && (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
973 template <
typename T>
974 inline std::enable_if_t<std::is_same<T, std::byte>::value,
const stream &>
976 static_assert(std::is_integral<T>(),
977 "Convert the byte to a numeric value using std::to_integer");
979 #endif // __cplusplus >= 201703L
983 detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, &C, 1);
990 for (; Str[Len] !=
'\0'; Len++)
993 detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Str,
1000 Out << (RHS ?
"true" :
"false");
1005 template <
typename ValueType>
1006 inline typename detail::enable_if_t<std::is_integral<ValueType>::value,
1017 detail::writeFloatingPoint<float>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1018 Out.WIOffset, Out.get_flags(),
1024 detail::writeFloatingPoint<double>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1025 Out.WIOffset, Out.get_flags(),
1031 detail::writeFloatingPoint<half>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1032 Out.WIOffset, Out.get_flags(),
1039 template <
typename ElementType, access::address_space Space>
1046 template <
typename T>
1052 Flags, Out.
get_width(),
reinterpret_cast<size_t>(RHS));
1066 Out.Width = RHS.
width();
1075 flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1079 flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1083 Out.set_manipulator(RHS);
1091 template <
typename T,
int VectorLength>
1093 detail::writeVec<T, VectorLength>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1094 Out.WIOffset, Out.get_flags(),
1101 template <
int Dimensions>
1103 detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1108 template <
int Dimensions>
1111 detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1116 template <
int Dimensions>
1119 detail::writeItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1124 template <
int Dimensions>
1127 detail::writeNDRange<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1132 template <
int Dimensions>
1135 detail::writeNDItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1140 template <
int Dimensions>
1143 detail::writeGroup<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1148 template <
int Dimensions>
1151 detail::writeHItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1156 template <
typename T,
typename RT = detail::EnableIfSwizzleVec<T>>
1166 template <>
struct hash<
cl::sycl::stream> {
1168 #ifdef __SYCL_DEVICE_ONLY__
1172 return hash<std::shared_ptr<cl::sycl::detail::stream_impl>>()(