DPC++ Runtime
Runtime libraries for oneAPI DPC++
stream.hpp
Go to the documentation of this file.
1 //==----------------- stream.hpp - SYCL standard header file ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, mode, address_space
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aliases.hpp> // for half
14 #include <sycl/atomic.hpp> // for atomic
15 #include <sycl/builtins.hpp> // for isinf, isnan, signbit
16 #include <sycl/detail/array.hpp> // for array
17 #include <sycl/detail/defines.hpp> // for __SYCL_SPECIAL_CLASS, __S...
18 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
19 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
20 #include <sycl/detail/item_base.hpp> // for id, range
21 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
22 #include <sycl/ext/oneapi/bfloat16.hpp> // for bfloat16
23 #include <sycl/group.hpp> // for group
24 #include <sycl/h_item.hpp> // for h_item
25 #include <sycl/half_type.hpp> // for half, operator-, operator<
26 #include <sycl/handler.hpp> // for handler
27 #include <sycl/item.hpp> // for item
28 #include <sycl/nd_item.hpp> // for nd_item
29 #include <sycl/nd_range.hpp> // for nd_range
30 #include <sycl/property_list.hpp> // for property_list
31 #include <sycl/range.hpp> // for range
32 #include <sycl/sub_group.hpp> // for multi_ptr
33 #include <sycl/types.hpp> // for vec, SwizzleOp
34 
35 #include <cstddef> // for size_t, byte
36 #include <memory> // for hash, shared_ptr
37 #include <stdint.h> // for uint16_t, uint8_t
38 #include <type_traits> // for enable_if_t, is_same, fal...
39 #include <variant> // for hash
40 
41 namespace sycl {
42 inline namespace _V1 {
43 
44 namespace detail {
45 
46 class stream_impl;
47 
48 using FmtFlags = unsigned int;
49 
50 // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds
51 // to the bit in FmtFlags.
52 static constexpr FmtFlags Dec = 0x0001;
53 static constexpr FmtFlags Hex = 0x0002;
54 static constexpr FmtFlags Oct = 0x0004;
55 static constexpr FmtFlags ShowBase = 0x0008;
56 static constexpr FmtFlags ShowPos = 0x0010;
57 static constexpr FmtFlags Fixed = 0x0020;
58 static constexpr FmtFlags Scientific = 0x0040;
59 
60 // Bitmask made of the combination of the base flags. Base flags are mutually
61 // exclusive, this mask is used to clean base field before setting the new
62 // base flag.
63 static constexpr FmtFlags BaseField = Dec | Hex | Oct;
64 
65 // Bitmask made of the combination of the floating point value format flags.
66 // Thease flags are mutually exclusive, this mask is used to clean float field
67 // before setting the new float flag.
68 static constexpr FmtFlags FloatField = Scientific | Fixed;
69 
70 constexpr size_t MAX_FLOATING_POINT_DIGITS = 24;
71 constexpr size_t MAX_INTEGRAL_DIGITS = 23;
72 constexpr const char *VEC_ELEMENT_DELIMITER = ", ";
73 constexpr char VEC_OPEN_BRACE = '{';
74 constexpr char VEC_CLOSE_BRACE = '}';
75 
76 constexpr size_t MAX_DIMENSIONS = 3;
77 
78 // Space for integrals (up to 3), comma and space between the
79 // integrals and enclosing braces.
80 constexpr size_t MAX_ARRAY_SIZE =
82 
83 // First 2 bytes in each work item's flush buffer are reserved for saving
84 // statement offset.
85 constexpr unsigned FLUSH_BUF_OFFSET_SIZE = 2;
86 
87 template <class F, class T = void>
88 using EnableIfFP = typename std::enable_if_t<
89  detail::check_type_in_v<F, float, double, half, ext::oneapi::bfloat16>, T>;
90 
92  sycl::access::target::device>;
93 
98 constexpr static int GlobalBufDim = 1;
99 
100 using GlobalOffsetAccessorT = accessor<unsigned, 1, sycl::access::mode::atomic,
101  sycl::access::target::device>;
102 
107 constexpr static int GlobalOffsetDim = 1;
108 
109 // Read first 2 bytes of flush buffer to get buffer offset.
110 // TODO: Should be optimized to the following:
111 // return *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]);
112 // when an issue with device code compilation using this optimization is fixed.
113 inline unsigned GetFlushBufOffset(const GlobalBufAccessorT &GlobalFlushBuf,
114  unsigned WIOffset) {
115  return ((static_cast<unsigned>(static_cast<uint8_t>(GlobalFlushBuf[WIOffset]))
116  << 8) +
117  static_cast<uint8_t>(GlobalFlushBuf[WIOffset + 1]));
118 }
119 
120 // Write flush buffer's offset into first 2 bytes of that buffer.
121 // TODO: Should be optimized to the following:
122 // *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]) =
123 // static_cast<uint16_t>(Offset);
124 // when an issue with device code compilation using this optimization is fixed.
125 inline void SetFlushBufOffset(GlobalBufAccessorT &GlobalFlushBuf,
126  unsigned WIOffset, unsigned Offset) {
127  GlobalFlushBuf[WIOffset] = static_cast<char>((Offset >> 8) & 0xff);
128  GlobalFlushBuf[WIOffset + 1] = static_cast<char>(Offset & 0xff);
129 }
130 
131 inline void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
132  unsigned WIOffset, const char *Str, unsigned Len,
133  unsigned Padding = 0) {
134  unsigned Offset =
135  GetFlushBufOffset(GlobalFlushBuf, WIOffset) + FLUSH_BUF_OFFSET_SIZE;
136 
137  if ((Offset + Len + Padding > FlushBufferSize) ||
138  (WIOffset + Offset + Len + Padding > GlobalFlushBuf.size()))
139  // TODO: flush here
140  return;
141 
142  // Write padding
143  for (size_t I = 0; I < Padding; ++I, ++Offset)
144  GlobalFlushBuf[WIOffset + Offset] = ' ';
145 
146  for (size_t I = 0; I < Len; ++I, ++Offset) {
147  GlobalFlushBuf[WIOffset + Offset] = Str[I];
148  }
149 
150  SetFlushBufOffset(GlobalFlushBuf, WIOffset, Offset - FLUSH_BUF_OFFSET_SIZE);
151 }
152 
153 inline void reverseBuf(char *Buf, unsigned Len) {
154  int I = Len - 1;
155  int J = 0;
156  while (I > J) {
157  int Temp = Buf[I];
158  Buf[I] = Buf[J];
159  Buf[J] = Temp;
160  I--;
161  J++;
162  }
163 }
164 
165 template <typename T>
166 inline std::make_unsigned_t<T> getAbsVal(const T Val, const int Base) {
167  return ((Base == 10) && (Val < 0)) ? -Val : Val;
168 }
169 
170 inline char digitToChar(const int Digit) {
171  if (Digit < 10) {
172  return '0' + Digit;
173  } else {
174  return 'a' + Digit - 10;
175  }
176 }
177 
178 template <typename T>
179 inline typename std::enable_if_t<std::is_integral_v<T>, unsigned>
180 integralToBase(T Val, int Base, char *Digits) {
181  unsigned NumDigits = 0;
182 
183  do {
184  Digits[NumDigits++] = digitToChar(Val % Base);
185  Val /= Base;
186  } while (Val);
187 
188  return NumDigits;
189 }
190 
191 // Returns number of symbols written to the buffer
192 template <typename T>
193 inline typename std::enable_if_t<std::is_integral_v<T>, unsigned>
194 ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision = -1) {
195  (void)Precision;
196  int Base = 10;
197 
198  // append base manipulator
199  switch (Flags & BaseField) {
200  case Dec:
201  Base = 10;
202  break;
203  case Hex:
204  Base = 16;
205  break;
206  case Oct:
207  Base = 8;
208  break;
209  default:
210  // default value is 10
211  break;
212  }
213 
214  unsigned Offset = 0;
215 
216  // write '+' to the stream if the base is 10 and the value is non-negative
217  // or write '-' to stream if base is 10 and the value is negative
218  if (Base == 10) {
219  if ((Flags & ShowPos) && Val >= 0)
220  Buf[Offset++] = '+';
221  else if (Val < 0)
222  Buf[Offset++] = '-';
223  }
224 
225  // write 0 or 0x to the stream if base is not 10 and the manipulator is set
226  if (Base != 10 && (Flags & ShowBase)) {
227  Buf[Offset++] = '0';
228  if (Base == 16)
229  Buf[Offset++] = 'x';
230  }
231 
232  auto AbsVal = getAbsVal(Val, Base);
233 
234  const unsigned NumBuf = integralToBase(AbsVal, Base, Buf + Offset);
235 
236  reverseBuf(Buf + Offset, NumBuf);
237  return Offset + NumBuf;
238 }
239 
240 inline unsigned append(char *Dst, const char *Src) {
241  unsigned Len = 0;
242  for (; Src[Len] != '\0'; ++Len)
243  ;
244 
245  for (unsigned I = 0; I < Len; ++I)
246  Dst[I] = Src[I];
247  return Len;
248 }
249 
250 inline unsigned F2I32(float Val) {
251  union {
252  float FVal;
253  unsigned I32Val;
254  } Internal;
255  Internal.FVal = Val;
256  return Internal.I32Val;
257 }
258 
259 inline unsigned long long D2I64(double Val) {
260  union {
261  double DVal;
262  unsigned long long I64Val;
263  } Internal;
264  Internal.DVal = Val;
265  return Internal.I64Val;
266 }
267 
268 template <typename T>
269 inline typename detail::enable_if_t<
270  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
272  if constexpr (sizeof(Val) == 4) {
273  return (F2I32(Val) & 0x7fffffff) == 0x7f800000;
274  } else if constexpr (sizeof(Val) == 8) {
275  return (D2I64(Val) & -1ULL >> 1) == 0x7ffULL << 52;
276  }
277 
278  return false;
279 }
280 
281 template <typename T>
282 inline typename detail::enable_if_t<
283  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
285  if constexpr (sizeof(Val) == 4) {
286  return (F2I32(Val) & 0x7fffffff) > 0x7f800000;
287  } else if constexpr (sizeof(Val) == 8) {
288  return (D2I64(Val) & -1ULL >> 1) > 0x7ffULL << 52;
289  }
290 
291  return false;
292 }
293 
294 template <typename T>
295 inline typename detail::enable_if_t<
296  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
298  if constexpr (sizeof(Val) == 4) {
299  return F2I32(Val) >> 31;
300  } else if constexpr (sizeof(Val) == 8) {
301  return D2I64(Val) >> 63;
302  }
303 
304  return false;
305 }
306 
307 template <typename T>
308 typename detail::enable_if_t<
309  std::is_same<T, float>::value || std::is_same<T, double>::value, unsigned>
310 checkForInfNan(char *Buf, T Val) {
311 #ifdef __FAST_MATH__
312  if (isFastMathNan(Val))
313 #else
314  if (isnan(Val))
315 #endif
316  return append(Buf, "nan");
317 #ifdef __FAST_MATH__
318  if (isFastMathInf(Val)) {
319  if (isFastMathSignBit(Val))
320 #else
321  if (isinf(Val)) {
322  if (signbit(Val))
323 #endif
324  return append(Buf, "-inf");
325  return append(Buf, "inf");
326  }
327  return 0;
328 }
329 
330 template <typename T>
331 inline typename std::enable_if_t<std::is_same_v<T, half>, unsigned>
332 checkForInfNan(char *Buf, T Val) {
333  if (Val != Val)
334  return append(Buf, "nan");
335 
336  // Extract the sign from the bits
337  const uint16_t Sign = sycl::bit_cast<uint16_t>(Val) & 0x8000;
338  // Extract the exponent from the bits
339  const uint16_t Exp16 = (sycl::bit_cast<uint16_t>(Val) & 0x7c00) >> 10;
340 
341  if (Exp16 == 0x1f) {
342  if (Sign)
343  return append(Buf, "-inf");
344  return append(Buf, "inf");
345  }
346  return 0;
347 }
348 
349 template <typename T>
350 inline typename std::enable_if_t<std::is_same_v<T, ext::oneapi::bfloat16>,
351  unsigned>
352 checkForInfNan(char *Buf, T Val) {
353  if (Val != Val)
354  return append(Buf, "nan");
355 
356  // Extract the sign from the bits
357  const uint16_t Sign = sycl::bit_cast<uint16_t>(Val) & 0x8000;
358  // Extract the exponent from the bits
359  const uint16_t Exp16 = (sycl::bit_cast<uint16_t>(Val) & 0x7f80) >> 7;
360 
361  if (Exp16 == 0x7f) {
362  if (Sign)
363  return append(Buf, "-inf");
364  return append(Buf, "inf");
365  }
366  return 0;
367 }
368 
369 template <typename T>
371  int Precision, bool IsSci) {
372  int Exp = 0;
373 
374  // For the case that the value is larger than 10.0
375  while (AbsVal >= T{10.0}) {
376  ++Exp;
377  AbsVal /= T{10.0};
378  }
379  // For the case that the value is less than 1.0
380  while (AbsVal > T{0.0} && AbsVal < T{1.0}) {
381  --Exp;
382  AbsVal *= T{10.0};
383  }
384 
385  auto IntegralPart = static_cast<int>(AbsVal);
386  auto FractionPart = AbsVal - IntegralPart;
387 
388  int FractionDigits[MAX_FLOATING_POINT_DIGITS] = {0};
389 
390  // Exponent
391  int P = Precision > 0 ? Precision : 4;
392  size_t FractionLength = Exp + P;
393 
394  // After normalization integral part contains 1 symbol, also there could be
395  // '.', 'e', sign of the exponent and sign of the number, overall 5 symbols.
396  // So, clamp fraction length if required according to maximum size of the
397  // buffer for floating point number.
398  if (FractionLength > MAX_FLOATING_POINT_DIGITS - 5)
399  FractionLength = MAX_FLOATING_POINT_DIGITS - 5;
400 
401  for (unsigned I = 0; I < FractionLength; ++I) {
402  FractionPart *= T{10.0};
403  FractionDigits[I] = static_cast<int>(FractionPart);
404  FractionPart -= static_cast<int>(FractionPart);
405  }
406 
407  int Carry = FractionPart > static_cast<T>(0.5) ? 1 : 0;
408 
409  // Propagate the Carry
410  for (int I = FractionLength - 1; I >= 0 && Carry; --I) {
411  auto Digit = FractionDigits[I] + Carry;
412  FractionDigits[I] = Digit % 10;
413  Carry = Digit / 10;
414  }
415 
416  // Carry from the fraction part is propagated to integral part
417  IntegralPart += Carry;
418  if (IntegralPart == 10) {
419  IntegralPart = 1;
420  ++Exp;
421  }
422 
423  unsigned Offset = 0;
424 
425  // Assemble the final string correspondingly
426  if (IsSci) { // scientific mode
427  // Append the integral part
428  Digits[Offset++] = digitToChar(IntegralPart);
429  Digits[Offset++] = '.';
430 
431  // Append all fraction
432  for (unsigned I = 0; I < FractionLength; ++I)
433  Digits[Offset++] = digitToChar(FractionDigits[I]);
434 
435  auto AbsExp = Exp < 0 ? -Exp : Exp;
436  // Exponent part
437  Digits[Offset++] = 'e';
438  Digits[Offset++] = Exp >= 0 ? '+' : '-';
439  Digits[Offset++] = digitToChar(AbsExp / 10);
440  Digits[Offset++] = digitToChar(AbsExp % 10);
441  } else { // normal mode
442  if (Exp < 0) {
443  Digits[Offset++] = '0';
444  Digits[Offset++] = '.';
445  while (++Exp)
446  Digits[Offset++] = '0';
447 
448  // Append the integral part
449  Digits[Offset++] = digitToChar(IntegralPart);
450 
451  // Append all fraction
452  for (unsigned I = 0; I < FractionLength; ++I)
453  Digits[Offset++] = digitToChar(FractionDigits[I]);
454  } else {
455  // Append the integral part
456  Digits[Offset++] = digitToChar(IntegralPart);
457  unsigned I = 0;
458  // Append the integral part first
459  for (; I < FractionLength && Exp--; ++I)
460  Digits[Offset++] = digitToChar(FractionDigits[I]);
461 
462  // Put the dot
463  Digits[Offset++] = '.';
464 
465  // Append the rest of fraction part, or the real fraction part
466  for (; I < FractionLength; ++I)
467  Digits[Offset++] = digitToChar(FractionDigits[I]);
468  }
469  // The normal mode requires no tailing zero digit, then we need to first
470  // find the first non-zero digit
471  while (Digits[Offset - 1] == '0')
472  Offset--;
473 
474  // If dot is the last digit, it should be stripped off as well
475  if (Digits[Offset - 1] == '.')
476  Offset--;
477  }
478  return Offset;
479 }
480 
481 // Returns number of symbols written to the buffer
482 template <typename T>
483 inline EnableIfFP<T, unsigned>
484 ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision = -1) {
485  unsigned Offset = checkForInfNan(Buf, Val);
486  if (Offset)
487  return Offset;
488 
489  T Neg = -Val;
490  auto AbsVal = Val < 0 ? Neg : Val;
491 
492  if (Val < 0) {
493  Buf[Offset++] = '-';
494  } else if (Flags & ShowPos) {
495  Buf[Offset++] = '+';
496  }
497 
498  bool IsSci = false;
499  if (Flags & detail::Scientific)
500  IsSci = true;
501 
502  // TODO: manipulators for floating-point output - hexfloat, fixed
503  Offset += floatingPointToDecStr(AbsVal, Buf + Offset, Precision, IsSci);
504 
505  return Offset;
506 }
507 
508 template <typename T>
509 inline typename std::enable_if_t<std::is_integral_v<T>>
510 writeIntegral(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
511  unsigned WIOffset, unsigned Flags, int Width, const T &Val) {
512  char Digits[MAX_INTEGRAL_DIGITS] = {0};
513  unsigned Len = ScalarToStr(Val, Digits, Flags, Width);
514  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
515  (Width > 0 && static_cast<unsigned>(Width) > Len)
516  ? static_cast<unsigned>(Width) - Len
517  : 0);
518 }
519 
520 template <typename T>
521 inline EnableIfFP<T>
522 writeFloatingPoint(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
523  unsigned WIOffset, unsigned Flags, int Width, int Precision,
524  const T &Val) {
525  char Digits[MAX_FLOATING_POINT_DIGITS] = {0};
526  unsigned Len = ScalarToStr(Val, Digits, Flags, Width, Precision);
527  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
528  (Width > 0 && static_cast<unsigned>(Width) > Len)
529  ? static_cast<unsigned>(Width) - Len
530  : 0);
531 }
532 
533 // Helper method to update offset in the global buffer atomically according to
534 // the provided size of the data in the flush buffer. Return true if offset is
535 // updated and false in case of overflow.
536 inline bool updateOffset(GlobalOffsetAccessorT &GlobalOffset,
537  GlobalBufAccessorT &GlobalBuf, unsigned Size,
538  unsigned &Cur) {
539  unsigned New;
540  Cur = GlobalOffset[0].load();
541  do {
542  if (GlobalBuf.get_range().size() - Cur < Size)
543  // Overflow
544  return false;
545  New = Cur + Size;
546  } while (!GlobalOffset[0].compare_exchange_strong(Cur, New));
547  return true;
548 }
549 
550 inline void flushBuffer(GlobalOffsetAccessorT &GlobalOffset,
551  GlobalBufAccessorT &GlobalBuf,
552  GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset) {
553  unsigned Offset = GetFlushBufOffset(GlobalFlushBuf, WIOffset);
554  if (Offset == 0)
555  return;
556 
557  unsigned Cur = 0;
558  if (!updateOffset(GlobalOffset, GlobalBuf, Offset, Cur))
559  return;
560 
561  unsigned StmtOffset = WIOffset + FLUSH_BUF_OFFSET_SIZE;
562  for (unsigned I = StmtOffset; I < StmtOffset + Offset; I++) {
563  GlobalBuf[Cur++] = GlobalFlushBuf[I];
564  }
565  // Reset the offset in the flush buffer
566  SetFlushBufOffset(GlobalFlushBuf, WIOffset, 0);
567 }
568 
569 template <typename T, int VecLength>
570 typename std::enable_if_t<(VecLength == 1), unsigned>
571 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
572  int Precision) {
573  return ScalarToStr(static_cast<T>(Vec.x()), VecStr, Flags, Width, Precision);
574 }
575 
576 template <typename T, int VecLength>
577 typename std::enable_if_t<(VecLength == 2 || VecLength == 4 || VecLength == 8 ||
578  VecLength == 16),
579  unsigned>
580 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
581  int Precision) {
582  unsigned Len =
583  VecToStr<T, VecLength / 2>(Vec.lo(), VecStr, Flags, Width, Precision);
584  Len += append(VecStr + Len, VEC_ELEMENT_DELIMITER);
585  Len += VecToStr<T, VecLength / 2>(Vec.hi(), VecStr + Len, Flags, Width,
586  Precision);
587  return Len;
588 }
589 
590 template <typename T, int VecLength>
591 typename std::enable_if_t<(VecLength == 3), unsigned>
592 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
593  int Precision) {
594  unsigned Len = VecToStr<T, 2>(Vec.lo(), VecStr, Flags, Width, Precision);
595  Len += append(VecStr + Len, VEC_ELEMENT_DELIMITER);
596  Len += VecToStr<T, 1>(Vec.z(), VecStr + Len, Flags, Width, Precision);
597  return Len;
598 }
599 
600 template <typename T, int VecLength>
601 inline void writeVec(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
602  unsigned WIOffset, unsigned Flags, int Width,
603  int Precision, const vec<T, VecLength> &Vec) {
604  // Reserve space for vector elements and delimiters
605  constexpr size_t MAX_VEC_SIZE =
606  MAX_FLOATING_POINT_DIGITS * VecLength + (VecLength - 1) * 2;
607  char VecStr[MAX_VEC_SIZE] = {0};
608  unsigned Len = VecToStr<T, VecLength>(Vec, VecStr, Flags, Width, Precision);
609  write(GlobalFlushBuf, FlushBufferSize, WIOffset, VecStr, Len,
610  (Width > 0 && Width > Len) ? Width - Len : 0);
611 }
612 
613 template <int ArrayLength>
614 inline unsigned ArrayToStr(char *Buf, const array<ArrayLength> &Arr) {
615  unsigned Len = 0;
616  Buf[Len++] = VEC_OPEN_BRACE;
617 
618  for (int I = 0; I < ArrayLength; ++I) {
619  Len += ScalarToStr(Arr[I], Buf + Len, 0 /* No flags */, -1, -1);
620  if (I != ArrayLength - 1)
621  Len += append(Buf + Len, VEC_ELEMENT_DELIMITER);
622  }
623 
624  Buf[Len++] = VEC_CLOSE_BRACE;
625 
626  return Len;
627 }
628 
629 template <int ArrayLength>
630 inline void writeArray(GlobalBufAccessorT &GlobalFlushBuf,
631  size_t FlushBufferSize, unsigned WIOffset,
632  const array<ArrayLength> &Arr) {
633  char Buf[MAX_ARRAY_SIZE];
634  unsigned Len = ArrayToStr(Buf, Arr);
635  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
636 }
637 
638 template <int Dimensions>
639 inline void writeItem(GlobalBufAccessorT &GlobalFlushBuf,
640  size_t FlushBufferSize, unsigned WIOffset,
641  const item<Dimensions> &Item) {
642  // Reserve space for 3 arrays and additional place (40 symbols) for printing
643  // the text
644  char Buf[3 * MAX_ARRAY_SIZE + 40];
645  unsigned Len = 0;
646  Len += append(Buf, "item(");
647  Len += append(Buf + Len, "range: ");
648  Len += ArrayToStr(Buf + Len, Item.get_range());
649  Len += append(Buf + Len, ", id: ");
650  Len += ArrayToStr(Buf + Len, Item.get_id());
651  Len += append(Buf + Len, ", offset: ");
652  Len += ArrayToStr(Buf + Len, Item.get_offset());
653  Buf[Len++] = ')';
654  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
655 }
656 
657 template <int Dimensions>
658 inline void writeNDRange(GlobalBufAccessorT &GlobalFlushBuf,
659  size_t FlushBufferSize, unsigned WIOffset,
660  const nd_range<Dimensions> &ND_Range) {
661  // Reserve space for 3 arrays and additional place (50 symbols) for printing
662  // the text
663  char Buf[3 * MAX_ARRAY_SIZE + 50];
664  unsigned Len = 0;
665  Len += append(Buf, "nd_range(");
666  Len += append(Buf + Len, "global_range: ");
667  Len += ArrayToStr(Buf + Len, ND_Range.get_global_range());
668  Len += append(Buf + Len, ", local_range: ");
669  Len += ArrayToStr(Buf + Len, ND_Range.get_local_range());
670  Len += append(Buf + Len, ", offset: ");
671  Len += ArrayToStr(Buf + Len, ND_Range.get_offset());
672  Buf[Len++] = ')';
673  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
674 }
675 
676 template <int Dimensions>
677 inline void writeNDItem(GlobalBufAccessorT &GlobalFlushBuf,
678  size_t FlushBufferSize, unsigned WIOffset,
679  const nd_item<Dimensions> &ND_Item) {
680  // Reserve space for 2 arrays and additional place (40 symbols) for printing
681  // the text
682  char Buf[2 * MAX_ARRAY_SIZE + 40];
683  unsigned Len = 0;
684  Len += append(Buf, "nd_item(");
685  Len += append(Buf + Len, "global_id: ");
686  Len += ArrayToStr(Buf + Len, ND_Item.get_global_id());
687  Len += append(Buf + Len, ", local_id: ");
688  Len += ArrayToStr(Buf + Len, ND_Item.get_local_id());
689  Buf[Len++] = ')';
690  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
691 }
692 
693 template <int Dimensions>
694 inline void writeGroup(GlobalBufAccessorT &GlobalFlushBuf,
695  size_t FlushBufferSize, unsigned WIOffset,
696  const group<Dimensions> &Group) {
697  // Reserve space for 4 arrays and additional place (60 symbols) for printing
698  // the text
699  char Buf[4 * MAX_ARRAY_SIZE + 60];
700  unsigned Len = 0;
701  Len += append(Buf, "group(");
702  Len += append(Buf + Len, "id: ");
703  Len += ArrayToStr(Buf + Len, Group.get_id());
704  Len += append(Buf + Len, ", global_range: ");
705  Len += ArrayToStr(Buf + Len, Group.get_global_range());
706  Len += append(Buf + Len, ", local_range: ");
707  Len += ArrayToStr(Buf + Len, Group.get_local_range());
708  Len += append(Buf + Len, ", group_range: ");
709  Len += ArrayToStr(Buf + Len, Group.get_group_range());
710  Buf[Len++] = ')';
711  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
712 }
713 
714 // Space for 2 arrays and additional place (20 symbols) for printing
715 // the text
716 constexpr size_t MAX_ITEM_SIZE = 2 * MAX_ARRAY_SIZE + 20;
717 
718 template <int Dimensions>
719 inline unsigned ItemToStr(char *Buf, const item<Dimensions, false> &Item) {
720  unsigned Len = 0;
721  Len += append(Buf, "item(");
722  for (int I = 0; I < 2; ++I) {
723  Len += append(Buf + Len, I == 0 ? "range: " : ", id: ");
724  Len += ArrayToStr(Buf + Len, I == 0 ? Item.get_range() : Item.get_id());
725  }
726  Buf[Len++] = ')';
727  return Len;
728 }
729 
730 template <int Dimensions>
731 inline void writeHItem(GlobalBufAccessorT &GlobalFlushBuf,
732  size_t FlushBufferSize, unsigned WIOffset,
733  const h_item<Dimensions> &HItem) {
734  // Reserve space for 3 items and additional place (60 symbols) for printing
735  // the text
736  char Buf[3 * MAX_ITEM_SIZE + 60];
737  unsigned Len = 0;
738  Len += append(Buf, "h_item(");
739  for (int I = 0; I < 3; ++I) {
740  Len += append(Buf + Len, I == 0 ? "\n global "
741  : I == 1 ? "\n logical local "
742  : "\n physical local ");
743  Len += ItemToStr(Buf + Len, I == 0 ? HItem.get_global()
744  : I == 1 ? HItem.get_logical_local()
745  : HItem.get_physical_local());
746  }
747  Len += append(Buf + Len, "\n)");
748  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
749 }
750 
751 template <typename> struct IsSwizzleOp : std::false_type {};
752 
753 template <typename VecT, typename OperationLeftT, typename OperationRightT,
754  template <typename> class OperationCurrentT, int... Indexes>
756  VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes...>>
757  : std::true_type {
758  using T = typename VecT::element_type;
759  using Type = typename sycl::vec<T, (sizeof...(Indexes))>;
760 };
761 
762 template <typename T>
764  typename std::enable_if_t<IsSwizzleOp<T>::value,
765  typename IsSwizzleOp<T>::Type>;
766 
767 } // namespace detail
768 
769 enum class stream_manipulator {
770  dec = 0,
771  hex = 1,
772  oct = 2,
773  noshowbase = 3,
774  showbase = 4,
775  noshowpos = 5,
776  showpos = 6,
777  endl = 7,
778  flush = 8,
779  fixed = 9,
780  scientific = 10,
781  hexfloat = 11,
782  defaultfloat = 12
783 };
784 
786 
788 
790 
792 
794 
796 
798 
800 
802 
804 
806 
808 
810 
811 class stream;
812 
814  int Precision_;
815 
816 public:
817  __precision_manipulator__(int Precision) : Precision_(Precision) {}
818 
819  int precision() const { return Precision_; }
820 
821  friend const stream &operator<<(const stream &,
822  const __precision_manipulator__ &);
823 };
824 
826  int Width_;
827 
828 public:
829  __width_manipulator__(int Width) : Width_(Width) {}
830 
831  int width() const { return Width_; }
832 
833  friend const stream &operator<<(const stream &,
834  const __width_manipulator__ &);
835 };
836 
838  return __precision_manipulator__(Precision);
839 }
840 
841 inline __width_manipulator__ setw(int Width) {
842  return __width_manipulator__(Width);
843 }
844 
849 class __SYCL_EXPORT __SYCL_SPECIAL_CLASS __SYCL_TYPE(stream) stream
850  : public detail::OwnerLessBase<stream> {
851 private:
852 #ifndef __SYCL_DEVICE_ONLY__
853  // Constructor for recreating a stream.
854  stream(std::shared_ptr<detail::stream_impl> Impl,
855  detail::GlobalBufAccessorT GlobalBuf,
856  detail::GlobalOffsetAccessorT GlobalOffset,
857  detail::GlobalBufAccessorT GlobalFlushBuf)
858  : impl{Impl}, GlobalBuf{GlobalBuf}, GlobalOffset{GlobalOffset},
859  GlobalFlushBuf{GlobalFlushBuf} {}
860 #endif
861 
862 public:
863 #ifdef __SYCL_DEVICE_ONLY__
864  // Default constructor for objects later initialized with __init member.
865  stream() = default;
866 #endif
867 
868  // Throws exception in case of invalid input parameters
869  stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH);
870 
871  // Property-list constructor variant.
872  // TODO: Merge with other stream constructor and give PropList default value.
873  stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH,
874  const property_list &PropList);
875 
876 #ifdef __SYCL_DEVICE_ONLY__
877  // We need the definitions of these functions in the header for device,
878  // otherwise they are not visible. Also, we cannot use `impl` because it's not
879  // there on the device, so we rely on GlobalBuf/GlobalFlushBuf.
880  size_t size() const noexcept { return GlobalBuf.size(); }
881 
882  size_t get_work_item_buffer_size() const {
883  return GlobalFlushBuf.size() - detail::FLUSH_BUF_OFFSET_SIZE;
884  }
885 
887  "get_size() is deprecated since SYCL 2020. Please use size() instead.")
888  size_t get_size() const { return size(); }
889 
890  __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
891  "2020. Please use get_work_item_buffer_size() instead.")
892  size_t get_max_statement_size() const { return get_work_item_buffer_size(); }
893 #else
894  size_t size() const noexcept;
895 
896  size_t get_work_item_buffer_size() const;
897 
899  "get_size() is deprecated since SYCL 2020. Please use size() instead.")
900  size_t get_size() const;
901 
902  __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
903  "2020. Please use get_work_item_buffer_size() instead.")
904  size_t get_max_statement_size() const;
905 #endif
906 
907  size_t get_precision() const { return Precision; }
908 
909  size_t get_width() const { return Width; }
910 
911  stream_manipulator get_stream_mode() const { return Manipulator; }
912 
913  bool operator==(const stream &RHS) const;
914 
915  bool operator!=(const stream &LHS) const;
916 
917  template <typename propertyT> bool has_property() const noexcept {
918  return getPropList().template has_property<propertyT>();
919  }
920 
921  template <typename propertyT> propertyT get_property() const {
922  return getPropList().template get_property<propertyT>();
923  }
924 
925 private:
926 #ifdef __SYCL_DEVICE_ONLY__
927  char padding[sizeof(std::shared_ptr<detail::stream_impl>)];
928 #else
929  std::shared_ptr<detail::stream_impl> impl;
930  template <class Obj>
931  friend const decltype(Obj::impl) &
932  detail::getSyclObjImpl(const Obj &SyclObject);
933 #endif
934 
935  // NOTE: Some members are required for reconstructing the stream, but are not
936  // part of the implementation class. If more members are added, they should
937  // also be added to the weak_object specialization for streams.
938 
939  // Accessor to the global stream buffer. Global buffer contains all output
940  // from the kernel.
941  mutable detail::GlobalBufAccessorT GlobalBuf;
942 
943  // Atomic accessor to the global offset variable. It represents an offset in
944  // the global stream buffer. Since work items will flush data to global buffer
945  // in parallel we need atomic access to this offset.
946  mutable detail::GlobalOffsetAccessorT GlobalOffset;
947 
948  // Accessor to the flush buffer. Each work item writes its
949  // output to a designated section of the flush buffer.
950  mutable detail::GlobalBufAccessorT GlobalFlushBuf;
951 
952  // Offset of the WI's flush buffer in the pool.
953  mutable unsigned WIOffset = 0;
954 
955  mutable size_t FlushBufferSize;
956 
957  // Fields and methods to work with manipulators
958  mutable stream_manipulator Manipulator = defaultfloat;
959 
960  // Type used for format flags
961  using FmtFlags = unsigned int;
962 
963  mutable int Precision = -1;
964  mutable int Width = -1;
965  mutable FmtFlags Flags = 0x0;
966 
967  void set_flag(FmtFlags FormatFlag) const { Flags |= FormatFlag; }
968 
969  void unset_flag(FmtFlags FormatFlag) const { Flags &= ~FormatFlag; }
970 
971  FmtFlags get_flags() const { return Flags; }
972 
973  // This method is used to set the flag for base and float manipulators. These
974  // flags are mutually exclusive and base/float field needs to be cleared
975  // before the setting new flag.
976  void set_flag(FmtFlags FormatFlag, FmtFlags Mask) const {
977  unset_flag(Mask);
978  Flags |= FormatFlag & Mask;
979  }
980 
981  // Set the flags which correspond to the input stream manipulator.
982  void set_manipulator(const stream_manipulator SM) const {
983  switch (SM) {
985  set_flag(detail::Dec, detail::BaseField);
986  break;
988  set_flag(detail::Hex, detail::BaseField);
989  break;
991  set_flag(detail::Oct, detail::BaseField);
992  break;
994  unset_flag(detail::ShowBase);
995  break;
997  set_flag(detail::ShowBase);
998  break;
1000  unset_flag(detail::ShowPos);
1001  break;
1003  set_flag(detail::ShowPos);
1004  break;
1006  set_flag(detail::Fixed, detail::FloatField);
1007  break;
1010  break;
1013  break;
1015  unset_flag(detail::FloatField);
1016  break;
1017  default:
1018  // Unknown manipulator
1019  break;
1020  }
1021  }
1022 
1023 #ifdef __SYCL_DEVICE_ONLY__
1024  void __init(detail::GlobalBufPtrType GlobalBufPtr,
1025  range<detail::GlobalBufDim> GlobalBufAccRange,
1026  range<detail::GlobalBufDim> GlobalBufMemRange,
1027  id<detail::GlobalBufDim> GlobalBufId,
1028  detail::GlobalOffsetPtrType GlobalOffsetPtr,
1029  range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
1030  range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
1031  id<detail::GlobalOffsetDim> GlobalOffsetId,
1032  detail::GlobalBufPtrType GlobalFlushPtr,
1033  range<detail::GlobalBufDim> GlobalFlushAccRange,
1034  range<detail::GlobalBufDim> GlobalFlushMemRange,
1035  id<detail::GlobalBufDim> GlobalFlushId, size_t _FlushBufferSize) {
1036  GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
1037  GlobalBufId);
1038  GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
1039  GlobalOffsetMemRange, GlobalOffsetId);
1040  GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
1041  GlobalFlushMemRange, GlobalFlushId);
1042  FlushBufferSize = _FlushBufferSize;
1043  // Calculate offset in the flush buffer for each work item in the global
1044  // work space. We need to avoid calling intrinsics to get global id because
1045  // when stream is used in a single_task kernel this could cause some
1046  // overhead on FPGA target. That is why use global atomic variable to
1047  // calculate offsets.
1048  WIOffset = GlobalOffset[1].fetch_add(FlushBufferSize);
1049 
1050  // Initialize flush subbuffer's offset for each work item on device.
1051  SetFlushBufOffset(GlobalFlushBuf, WIOffset, 0);
1052  }
1053 
1054  void __finalize() {
1055  // Flush data to global buffer if flush buffer is not empty. This could be
1056  // necessary if user hasn't yet flushed data on its own and kernel execution
1057  // is finished
1058  // NOTE: A call to this function will be generated by compiler
1059  flushBuffer(GlobalOffset, GlobalBuf, GlobalFlushBuf, WIOffset);
1060  }
1061 #endif
1062 
1063  friend class handler;
1064 
1065  template <typename SYCLObjT> friend class ext::oneapi::weak_object;
1066 
1067  friend const stream &operator<<(const stream &, const char);
1068  friend const stream &operator<<(const stream &, const char *);
1069  template <typename ValueType>
1070  friend
1071  typename std::enable_if_t<std::is_integral_v<ValueType>, const stream &>
1072  operator<<(const stream &, const ValueType &);
1073  friend const stream &operator<<(const stream &, const float &);
1074  friend const stream &operator<<(const stream &, const double &);
1075  friend const stream &operator<<(const stream &, const half &);
1076  friend const stream &operator<<(const stream &,
1077  const ext::oneapi::bfloat16 &);
1078 
1079  friend const stream &operator<<(const stream &, const stream_manipulator);
1080 
1081  friend const stream &operator<<(const stream &Out,
1082  const __precision_manipulator__ &RHS);
1083 
1084  friend const stream &operator<<(const stream &Out,
1085  const __width_manipulator__ &RHS);
1086  template <typename T, int Dimensions>
1087  friend const stream &operator<<(const stream &Out,
1088  const vec<T, Dimensions> &RHS);
1089  template <typename T>
1090  friend const stream &operator<<(const stream &Out, const T *RHS);
1091  template <int Dimensions>
1092  friend const stream &operator<<(const stream &Out, const id<Dimensions> &RHS);
1093 
1094  template <int Dimensions>
1095  friend const stream &operator<<(const stream &Out,
1096  const range<Dimensions> &RHS);
1097 
1098  template <int Dimensions>
1099  friend const stream &operator<<(const stream &Out,
1100  const item<Dimensions> &RHS);
1101 
1102  template <int Dimensions>
1103  friend const stream &operator<<(const stream &Out,
1104  const nd_range<Dimensions> &RHS);
1105 
1106  template <int Dimensions>
1107  friend const stream &operator<<(const stream &Out,
1108  const nd_item<Dimensions> &RHS);
1109 
1110  template <int Dimensions>
1111  friend const stream &operator<<(const stream &Out,
1112  const group<Dimensions> &RHS);
1113 
1114  template <int Dimensions>
1115  friend const stream &operator<<(const stream &Out,
1116  const h_item<Dimensions> &RHS);
1117 
1118  const property_list &getPropList() const;
1119 };
1120 
1121 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1122 // Byte (has to be converted to a numeric value)
1123 template <typename T>
1124 inline std::enable_if_t<std::is_same_v<T, std::byte>, const stream &>
1125 operator<<(const stream &, const T &) {
1126  static_assert(std::is_integral<T>(),
1127  "Convert the byte to a numeric value using std::to_integer");
1128 }
1129 #endif
1130 
1131 // Character
1132 inline const stream &operator<<(const stream &Out, const char C) {
1133  detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, &C, 1);
1134  return Out;
1135 }
1136 
1137 // String
1138 inline const stream &operator<<(const stream &Out, const char *Str) {
1139  unsigned Len = 0;
1140  for (; Str[Len] != '\0'; Len++)
1141  ;
1142 
1143  detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Str,
1144  Len);
1145  return Out;
1146 }
1147 
1148 // Boolean
1149 inline const stream &operator<<(const stream &Out, const bool &RHS) {
1150  Out << (RHS ? "true" : "false");
1151  return Out;
1152 }
1153 
1154 // Integral
1155 template <typename ValueType>
1156 inline typename std::enable_if_t<std::is_integral_v<ValueType>, const stream &>
1157 operator<<(const stream &Out, const ValueType &RHS) {
1158  detail::writeIntegral(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset,
1159  Out.get_flags(), Out.get_width(), RHS);
1160  return Out;
1161 }
1162 
1163 // Floating points
1164 
1165 inline const stream &operator<<(const stream &Out, const float &RHS) {
1166  detail::writeFloatingPoint<float>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1167  Out.WIOffset, Out.get_flags(),
1168  Out.get_width(), Out.get_precision(), RHS);
1169  return Out;
1170 }
1171 
1172 inline const stream &operator<<(const stream &Out, const double &RHS) {
1173  detail::writeFloatingPoint<double>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1174  Out.WIOffset, Out.get_flags(),
1175  Out.get_width(), Out.get_precision(), RHS);
1176  return Out;
1177 }
1178 
1179 inline const stream &operator<<(const stream &Out, const half &RHS) {
1180  detail::writeFloatingPoint<half>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1181  Out.WIOffset, Out.get_flags(),
1182  Out.get_width(), Out.get_precision(), RHS);
1183  return Out;
1184 }
1185 
1186 inline const stream &operator<<(const stream &Out,
1187  const ext::oneapi::bfloat16 &RHS) {
1188  detail::writeFloatingPoint<ext::oneapi::bfloat16>(
1189  Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Out.get_flags(),
1190  Out.get_width(), Out.get_precision(), RHS);
1191  return Out;
1192 }
1193 
1194 // Pointer
1195 
1196 template <typename ElementType, access::address_space Space,
1197  access::decorated IsDecorated>
1198 inline const stream &
1199 operator<<(const stream &Out,
1201  Out << RHS.get();
1202  return Out;
1203 }
1204 
1205 template <typename T>
1206 const stream &operator<<(const stream &Out, const T *RHS) {
1207  detail::FmtFlags Flags = Out.get_flags();
1208  Flags &= ~detail::BaseField;
1209  Flags |= detail::Hex | detail::ShowBase;
1210  detail::writeIntegral(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset,
1211  Flags, Out.get_width(), reinterpret_cast<size_t>(RHS));
1212  return Out;
1213 }
1214 
1215 // Manipulators
1216 
1217 inline const stream &operator<<(const stream &Out,
1218  const __precision_manipulator__ &RHS) {
1219  Out.Precision = RHS.precision();
1220  return Out;
1221 }
1222 
1223 inline const stream &operator<<(const stream &Out,
1224  const __width_manipulator__ &RHS) {
1225  Out.Width = RHS.width();
1226  return Out;
1227 }
1228 
1229 inline const stream &operator<<(const stream &Out,
1230  const stream_manipulator RHS) {
1231  switch (RHS) {
1233  Out << '\n';
1234  flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1235  Out.WIOffset);
1236  break;
1238  flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1239  Out.WIOffset);
1240  break;
1241  default:
1242  Out.set_manipulator(RHS);
1243  break;
1244  }
1245  return Out;
1246 }
1247 
1248 // Vec
1249 
1250 template <typename T, int VectorLength>
1251 const stream &operator<<(const stream &Out, const vec<T, VectorLength> &RHS) {
1252  detail::writeVec<T, VectorLength>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1253  Out.WIOffset, Out.get_flags(),
1254  Out.get_width(), Out.get_precision(), RHS);
1255  return Out;
1256 }
1257 
1258 // SYCL types
1259 
1260 template <int Dimensions>
1261 inline const stream &operator<<(const stream &Out, const id<Dimensions> &RHS) {
1262  detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1263  Out.WIOffset, RHS);
1264  return Out;
1265 }
1266 
1267 template <int Dimensions>
1268 inline const stream &operator<<(const stream &Out,
1269  const range<Dimensions> &RHS) {
1270  detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1271  Out.WIOffset, RHS);
1272  return Out;
1273 }
1274 
1275 template <int Dimensions>
1276 inline const stream &operator<<(const stream &Out,
1277  const item<Dimensions> &RHS) {
1278  detail::writeItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1279  Out.WIOffset, RHS);
1280  return Out;
1281 }
1282 
1283 template <int Dimensions>
1284 inline const stream &operator<<(const stream &Out,
1285  const nd_range<Dimensions> &RHS) {
1286  detail::writeNDRange<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1287  Out.WIOffset, RHS);
1288  return Out;
1289 }
1290 
1291 template <int Dimensions>
1292 inline const stream &operator<<(const stream &Out,
1293  const nd_item<Dimensions> &RHS) {
1294  detail::writeNDItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1295  Out.WIOffset, RHS);
1296  return Out;
1297 }
1298 
1299 template <int Dimensions>
1300 inline const stream &operator<<(const stream &Out,
1301  const group<Dimensions> &RHS) {
1302  detail::writeGroup<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1303  Out.WIOffset, RHS);
1304  return Out;
1305 }
1306 
1307 template <int Dimensions>
1308 inline const stream &operator<<(const stream &Out,
1309  const h_item<Dimensions> &RHS) {
1310  detail::writeHItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1311  Out.WIOffset, RHS);
1312  return Out;
1313 }
1314 
1315 template <typename T, typename RT = detail::EnableIfSwizzleVec<T>>
1316 inline const stream &operator<<(const stream &Out, const T &RHS) {
1317  RT V = RHS;
1318  Out << V;
1319  return Out;
1320 }
1321 
1322 } // namespace _V1
1323 } // namespace sycl
1324 namespace std {
1325 template <> struct hash<sycl::stream> {
1326  size_t operator()(const sycl::stream &S) const {
1327 #ifdef __SYCL_DEVICE_ONLY__
1328  (void)S;
1329  return 0;
1330 #else
1331  return hash<std::shared_ptr<sycl::detail::stream_impl>>()(
1333 #endif
1334  }
1335 };
1336 } // namespace std
The file contains implementations of accessor class.
__precision_manipulator__(int Precision)
Definition: stream.hpp:817
Identifies an instance of a group::parallel_for_work_item function object executing at each point in ...
Definition: h_item.hpp:31
item< Dimensions, false > get_logical_local() const
Definition: h_item.hpp:46
item< Dimensions, false > get_global() const
Definition: h_item.hpp:42
item< Dimensions, false > get_physical_local() const
Definition: h_item.hpp:48
A unique identifier of an item in an index space.
Definition: id.hpp:36
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:37
id< Dimensions > get_id() const
Definition: item.hpp:55
std::enable_if_t< has_offset, id< Dimensions > > get_offset() const
Definition: item.hpp:81
range< Dimensions > get_range() const
Definition: item.hpp:69
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
id< Dimensions > get_local_id() const
Definition: nd_item.hpp:83
id< Dimensions > get_global_id() const
Definition: nd_item.hpp:52
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
range< Dimensions > get_global_range() const
Definition: nd_range.hpp:43
range< Dimensions > get_local_range() const
Definition: nd_range.hpp:45
id< Dimensions > get_offset() const
Definition: nd_range.hpp:50
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
#define __SYCL_SPECIAL_CLASS
Definition: defines.hpp:29
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor< DataT
Image accessors.
static constexpr FmtFlags Fixed
Definition: stream.hpp:57
typename detail::DecoratedType< unsigned, GlobalBufAS >::type * GlobalOffsetPtrType
Definition: stream.hpp:106
static constexpr FmtFlags ShowPos
Definition: stream.hpp:56
std::enable_if_t<(VecLength==1), unsigned > VecToStr(const vec< T, VecLength > &Vec, char *VecStr, unsigned Flags, int Width, int Precision)
Definition: stream.hpp:571
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:31
constexpr size_t MAX_ARRAY_SIZE
Definition: stream.hpp:80
std::enable_if_t< std::is_integral_v< T > > writeIntegral(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, const T &Val)
Definition: stream.hpp:510
static constexpr FmtFlags Oct
Definition: stream.hpp:54
constexpr static int GlobalOffsetDim
Definition: stream.hpp:107
constexpr size_t MAX_FLOATING_POINT_DIGITS
Definition: stream.hpp:70
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathSignBit(T Val)
Definition: stream.hpp:297
static constexpr FmtFlags FloatField
Definition: stream.hpp:68
std::enable_if_t< std::is_integral_v< T >, unsigned > ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision=-1)
Definition: stream.hpp:194
void writeHItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const h_item< Dimensions > &HItem)
Definition: stream.hpp:731
typename std::enable_if_t< detail::check_type_in_v< F, float, double, half, ext::oneapi::bfloat16 >, T > EnableIfFP
Definition: stream.hpp:89
void writeItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const item< Dimensions > &Item)
Definition: stream.hpp:639
std::make_unsigned_t< T > getAbsVal(const T Val, const int Base)
Definition: stream.hpp:166
void writeVec(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, int Precision, const vec< T, VecLength > &Vec)
Definition: stream.hpp:601
typename detail::DecoratedType< char, GlobalBufAS >::type * GlobalBufPtrType
Definition: stream.hpp:97
void writeArray(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const array< ArrayLength > &Arr)
Definition: stream.hpp:630
void flushBuffer(GlobalOffsetAccessorT &GlobalOffset, GlobalBufAccessorT &GlobalBuf, GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset)
Definition: stream.hpp:550
EnableIfFP< T > writeFloatingPoint(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, int Precision, const T &Val)
Definition: stream.hpp:522
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, unsigned > checkForInfNan(char *Buf, T Val)
Definition: stream.hpp:310
void writeNDItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const nd_item< Dimensions > &ND_Item)
Definition: stream.hpp:677
accessor< unsigned, 1, sycl::access::mode::atomic, sycl::access::target::device > GlobalOffsetAccessorT
Definition: stream.hpp:101
constexpr char VEC_CLOSE_BRACE
Definition: stream.hpp:74
accessor< char, 1, sycl::access::mode::read_write, sycl::access::target::device > GlobalBufAccessorT
Definition: stream.hpp:92
constexpr const char * VEC_ELEMENT_DELIMITER
Definition: stream.hpp:72
unsigned GetFlushBufOffset(const GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset)
Definition: stream.hpp:113
std::ostream & operator<<(std::ostream &os, std::optional< T > const &opt)
static constexpr FmtFlags ShowBase
Definition: stream.hpp:55
constexpr size_t MAX_DIMENSIONS
Definition: stream.hpp:76
void reverseBuf(char *Buf, unsigned Len)
Definition: stream.hpp:153
unsigned int FmtFlags
Definition: stream.hpp:48
char digitToChar(const int Digit)
Definition: stream.hpp:170
EnableIfFP< T, unsigned > floatingPointToDecStr(T AbsVal, char *Digits, int Precision, bool IsSci)
Definition: stream.hpp:370
static constexpr FmtFlags Hex
Definition: stream.hpp:53
constexpr char VEC_OPEN_BRACE
Definition: stream.hpp:73
void writeGroup(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const group< Dimensions > &Group)
Definition: stream.hpp:694
unsigned ItemToStr(char *Buf, const item< Dimensions, false > &Item)
Definition: stream.hpp:719
unsigned long long D2I64(double Val)
Definition: stream.hpp:259
constexpr static access::address_space GlobalOffsetAS
Definition: stream.hpp:103
void SetFlushBufOffset(GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset, unsigned Offset)
Definition: stream.hpp:125
void writeNDRange(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const nd_range< Dimensions > &ND_Range)
Definition: stream.hpp:658
static constexpr FmtFlags BaseField
Definition: stream.hpp:63
constexpr size_t MAX_INTEGRAL_DIGITS
Definition: stream.hpp:71
struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") IsDeprecatedDeviceCopyable< T
constexpr static int GlobalBufDim
Definition: stream.hpp:98
constexpr size_t MAX_ITEM_SIZE
Definition: stream.hpp:716
constexpr unsigned FLUSH_BUF_OFFSET_SIZE
Definition: stream.hpp:85
sycl::ext::oneapi::bfloat16 bfloat16
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathInf(T Val)
Definition: stream.hpp:271
static constexpr FmtFlags Scientific
Definition: stream.hpp:58
unsigned append(char *Dst, const char *Src)
Definition: stream.hpp:240
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:131
static constexpr FmtFlags Dec
Definition: stream.hpp:52
typename std::enable_if_t< IsSwizzleOp< T >::value, typename IsSwizzleOp< T >::Type > EnableIfSwizzleVec
Definition: stream.hpp:765
std::enable_if_t< std::is_integral_v< T >, unsigned > integralToBase(T Val, int Base, char *Digits)
Definition: stream.hpp:180
unsigned F2I32(float Val)
Definition: stream.hpp:250
unsigned ArrayToStr(char *Buf, const array< ArrayLength > &Arr)
Definition: stream.hpp:614
bool updateOffset(GlobalOffsetAccessorT &GlobalOffset, GlobalBufAccessorT &GlobalBuf, unsigned Size, unsigned &Cur)
Definition: stream.hpp:536
constexpr static access::address_space GlobalBufAS
Definition: stream.hpp:94
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathNan(T Val)
Definition: stream.hpp:284
bool operator==(const cache_config &lhs, const cache_config &rhs)
bool operator!=(const cache_config &lhs, const cache_config &rhs)
static constexpr bool has_property()
std::enable_if_t< std::is_same_v< T, bfloat16 >, bool > isnan(T x)
static constexpr auto get_property()
constexpr stream_manipulator endl
Definition: stream.hpp:799
__precision_manipulator__ setprecision(int Precision)
Definition: stream.hpp:837
constexpr stream_manipulator fixed
Definition: stream.hpp:803
constexpr stream_manipulator noshowbase
Definition: stream.hpp:791
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
constexpr stream_manipulator hex
Definition: stream.hpp:787
constexpr stream_manipulator flush
Definition: stream.hpp:801
__width_manipulator__ setw(int Width)
Definition: stream.hpp:841
std::uint8_t instead
Definition: aliases.hpp:93
constexpr stream_manipulator scientific
Definition: stream.hpp:805
constexpr stream_manipulator noshowpos
Definition: stream.hpp:795
stream_manipulator
Definition: stream.hpp:769
constexpr stream_manipulator showpos
Definition: stream.hpp:797
constexpr stream_manipulator dec
Definition: stream.hpp:785
constexpr stream_manipulator showbase
Definition: stream.hpp:793
constexpr stream_manipulator oct
Definition: stream.hpp:789
sycl::detail::half_impl::half half
Definition: aliases.hpp:101
constexpr stream_manipulator defaultfloat
Definition: stream.hpp:809
constexpr stream_manipulator hexfloat
Definition: stream.hpp:807
Definition: access.hpp:18
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
size_t operator()(const sycl::stream &S) const
Definition: stream.hpp:1326