DPC++ Runtime
Runtime libraries for oneAPI DPC++
stream.hpp
Go to the documentation of this file.
1 //==----------------- stream.hpp - SYCL standard header file ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, mode, address_space
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aliases.hpp> // for half
14 #include <sycl/atomic.hpp> // for atomic
15 #include <sycl/builtins.hpp> // for isinf, isnan, signbit
16 #include <sycl/detail/array.hpp> // for array
17 #include <sycl/detail/cg.hpp> // for stream_impl
18 #include <sycl/detail/defines.hpp> // for __SYCL_SPECIAL_CLASS, __S...
19 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
20 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
21 #include <sycl/detail/item_base.hpp> // for id, range
22 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
23 #include <sycl/group.hpp> // for group
24 #include <sycl/h_item.hpp> // for h_item
25 #include <sycl/half_type.hpp> // for half, operator-, operator<
26 #include <sycl/handler.hpp> // for handler
27 #include <sycl/item.hpp> // for item
28 #include <sycl/nd_item.hpp> // for nd_item
29 #include <sycl/nd_range.hpp> // for nd_range
30 #include <sycl/property_list.hpp> // for property_list
31 #include <sycl/range.hpp> // for range
32 #include <sycl/sub_group.hpp> // for multi_ptr
33 #include <sycl/types.hpp> // for vec, SwizzleOp
34 
35 #include <cstddef> // for size_t, byte
36 #include <memory> // for hash, shared_ptr
37 #include <stdint.h> // for uint16_t, uint8_t
38 #include <type_traits> // for enable_if_t, is_same, fal...
39 #include <variant> // for hash
40 
41 namespace sycl {
42 inline namespace _V1 {
43 
44 namespace detail {
45 
46 using FmtFlags = unsigned int;
47 
48 // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds
49 // to the bit in FmtFlags.
50 static constexpr FmtFlags Dec = 0x0001;
51 static constexpr FmtFlags Hex = 0x0002;
52 static constexpr FmtFlags Oct = 0x0004;
53 static constexpr FmtFlags ShowBase = 0x0008;
54 static constexpr FmtFlags ShowPos = 0x0010;
55 static constexpr FmtFlags Fixed = 0x0020;
56 static constexpr FmtFlags Scientific = 0x0040;
57 
58 // Bitmask made of the combination of the base flags. Base flags are mutually
59 // exclusive, this mask is used to clean base field before setting the new
60 // base flag.
61 static constexpr FmtFlags BaseField = Dec | Hex | Oct;
62 
63 // Bitmask made of the combination of the floating point value format flags.
64 // Thease flags are mutually exclusive, this mask is used to clean float field
65 // before setting the new float flag.
66 static constexpr FmtFlags FloatField = Scientific | Fixed;
67 
68 constexpr size_t MAX_FLOATING_POINT_DIGITS = 24;
69 constexpr size_t MAX_INTEGRAL_DIGITS = 23;
70 constexpr const char *VEC_ELEMENT_DELIMITER = ", ";
71 constexpr char VEC_OPEN_BRACE = '{';
72 constexpr char VEC_CLOSE_BRACE = '}';
73 
74 constexpr size_t MAX_DIMENSIONS = 3;
75 
76 // Space for integrals (up to 3), comma and space between the
77 // integrals and enclosing braces.
78 constexpr size_t MAX_ARRAY_SIZE =
80 
81 // First 2 bytes in each work item's flush buffer are reserved for saving
82 // statement offset.
83 constexpr unsigned FLUSH_BUF_OFFSET_SIZE = 2;
84 
85 template <class F, class T = void>
86 using EnableIfFP = typename std::enable_if_t<std::is_same_v<F, float> ||
87  std::is_same_v<F, double> ||
88  std::is_same_v<F, half>,
89  T>;
90 
92  sycl::access::target::device>;
93 
96 using GlobalBufPtrType =
98 constexpr static int GlobalBufDim = 1;
99 
100 using GlobalOffsetAccessorT = accessor<unsigned, 1, sycl::access::mode::atomic,
101  sycl::access::target::device>;
102 
105 using GlobalOffsetPtrType =
107 constexpr static int GlobalOffsetDim = 1;
108 
109 // Read first 2 bytes of flush buffer to get buffer offset.
110 // TODO: Should be optimized to the following:
111 // return *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]);
112 // when an issue with device code compilation using this optimization is fixed.
113 inline unsigned GetFlushBufOffset(const GlobalBufAccessorT &GlobalFlushBuf,
114  unsigned WIOffset) {
115  return ((static_cast<unsigned>(static_cast<uint8_t>(GlobalFlushBuf[WIOffset]))
116  << 8) +
117  static_cast<uint8_t>(GlobalFlushBuf[WIOffset + 1]));
118 }
119 
120 // Write flush buffer's offset into first 2 bytes of that buffer.
121 // TODO: Should be optimized to the following:
122 // *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]) =
123 // static_cast<uint16_t>(Offset);
124 // when an issue with device code compilation using this optimization is fixed.
125 inline void SetFlushBufOffset(GlobalBufAccessorT &GlobalFlushBuf,
126  unsigned WIOffset, unsigned Offset) {
127  GlobalFlushBuf[WIOffset] = static_cast<char>((Offset >> 8) & 0xff);
128  GlobalFlushBuf[WIOffset + 1] = static_cast<char>(Offset & 0xff);
129 }
130 
131 inline void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
132  unsigned WIOffset, const char *Str, unsigned Len,
133  unsigned Padding = 0) {
134  unsigned Offset =
135  GetFlushBufOffset(GlobalFlushBuf, WIOffset) + FLUSH_BUF_OFFSET_SIZE;
136 
137  if ((Offset + Len + Padding > FlushBufferSize) ||
138  (WIOffset + Offset + Len + Padding > GlobalFlushBuf.size()))
139  // TODO: flush here
140  return;
141 
142  // Write padding
143  for (size_t I = 0; I < Padding; ++I, ++Offset)
144  GlobalFlushBuf[WIOffset + Offset] = ' ';
145 
146  for (size_t I = 0; I < Len; ++I, ++Offset) {
147  GlobalFlushBuf[WIOffset + Offset] = Str[I];
148  }
149 
150  SetFlushBufOffset(GlobalFlushBuf, WIOffset, Offset - FLUSH_BUF_OFFSET_SIZE);
151 }
152 
153 inline void reverseBuf(char *Buf, unsigned Len) {
154  int I = Len - 1;
155  int J = 0;
156  while (I > J) {
157  int Temp = Buf[I];
158  Buf[I] = Buf[J];
159  Buf[J] = Temp;
160  I--;
161  J++;
162  }
163 }
164 
165 template <typename T>
166 inline std::make_unsigned_t<T> getAbsVal(const T Val, const int Base) {
167  return ((Base == 10) && (Val < 0)) ? -Val : Val;
168 }
169 
170 inline char digitToChar(const int Digit) {
171  if (Digit < 10) {
172  return '0' + Digit;
173  } else {
174  return 'a' + Digit - 10;
175  }
176 }
177 
178 template <typename T>
179 inline typename std::enable_if_t<std::is_integral_v<T>, unsigned>
180 integralToBase(T Val, int Base, char *Digits) {
181  unsigned NumDigits = 0;
182 
183  do {
184  Digits[NumDigits++] = digitToChar(Val % Base);
185  Val /= Base;
186  } while (Val);
187 
188  return NumDigits;
189 }
190 
191 // Returns number of symbols written to the buffer
192 template <typename T>
193 inline typename std::enable_if_t<std::is_integral_v<T>, unsigned>
194 ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision = -1) {
195  (void)Precision;
196  int Base = 10;
197 
198  // append base manipulator
199  switch (Flags & BaseField) {
200  case Dec:
201  Base = 10;
202  break;
203  case Hex:
204  Base = 16;
205  break;
206  case Oct:
207  Base = 8;
208  break;
209  default:
210  // default value is 10
211  break;
212  }
213 
214  unsigned Offset = 0;
215 
216  // write '+' to the stream if the base is 10 and the value is non-negative
217  // or write '-' to stream if base is 10 and the value is negative
218  if (Base == 10) {
219  if ((Flags & ShowPos) && Val >= 0)
220  Buf[Offset++] = '+';
221  else if (Val < 0)
222  Buf[Offset++] = '-';
223  }
224 
225  // write 0 or 0x to the stream if base is not 10 and the manipulator is set
226  if (Base != 10 && (Flags & ShowBase)) {
227  Buf[Offset++] = '0';
228  if (Base == 16)
229  Buf[Offset++] = 'x';
230  }
231 
232  auto AbsVal = getAbsVal(Val, Base);
233 
234  const unsigned NumBuf = integralToBase(AbsVal, Base, Buf + Offset);
235 
236  reverseBuf(Buf + Offset, NumBuf);
237  return Offset + NumBuf;
238 }
239 
240 inline unsigned append(char *Dst, const char *Src) {
241  unsigned Len = 0;
242  for (; Src[Len] != '\0'; ++Len)
243  ;
244 
245  for (unsigned I = 0; I < Len; ++I)
246  Dst[I] = Src[I];
247  return Len;
248 }
249 
250 inline unsigned F2I32(float Val) {
251  union {
252  float FVal;
253  unsigned I32Val;
254  } Internal;
255  Internal.FVal = Val;
256  return Internal.I32Val;
257 }
258 
259 inline unsigned long long D2I64(double Val) {
260  union {
261  double DVal;
262  unsigned long long I64Val;
263  } Internal;
264  Internal.DVal = Val;
265  return Internal.I64Val;
266 }
267 
268 template <typename T>
269 inline typename detail::enable_if_t<
270  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
272  if constexpr (sizeof(Val) == 4) {
273  return (F2I32(Val) & 0x7fffffff) == 0x7f800000;
274  } else if constexpr (sizeof(Val) == 8) {
275  return (D2I64(Val) & -1ULL >> 1) == 0x7ffULL << 52;
276  }
277 
278  return false;
279 }
280 
281 template <typename T>
282 inline typename detail::enable_if_t<
283  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
285  if constexpr (sizeof(Val) == 4) {
286  return (F2I32(Val) & 0x7fffffff) > 0x7f800000;
287  } else if constexpr (sizeof(Val) == 8) {
288  return (D2I64(Val) & -1ULL >> 1) > 0x7ffULL << 52;
289  }
290 
291  return false;
292 }
293 
294 template <typename T>
295 inline typename detail::enable_if_t<
296  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
298  if constexpr (sizeof(Val) == 4) {
299  return F2I32(Val) >> 31;
300  } else if constexpr (sizeof(Val) == 8) {
301  return D2I64(Val) >> 63;
302  }
303 
304  return false;
305 }
306 
307 template <typename T>
308 typename detail::enable_if_t<
309  std::is_same<T, float>::value || std::is_same<T, double>::value, unsigned>
310 checkForInfNan(char *Buf, T Val) {
311 #ifdef __FAST_MATH__
312  if (isFastMathNan(Val))
313 #else
314  if (isnan(Val))
315 #endif
316  return append(Buf, "nan");
317 #ifdef __FAST_MATH__
318  if (isFastMathInf(Val)) {
319  if (isFastMathSignBit(Val))
320 #else
321  if (isinf(Val)) {
322  if (signbit(Val))
323 #endif
324  return append(Buf, "-inf");
325  return append(Buf, "inf");
326  }
327  return 0;
328 }
329 
330 template <typename T>
331 inline typename std::enable_if_t<std::is_same_v<T, half>, unsigned>
332 checkForInfNan(char *Buf, T Val) {
333  if (Val != Val)
334  return append(Buf, "nan");
335 
336  // Extract the sign from the bits
337  const uint16_t Sign = reinterpret_cast<uint16_t &>(Val) & 0x8000;
338  // Extract the exponent from the bits
339  const uint16_t Exp16 = (reinterpret_cast<uint16_t &>(Val) & 0x7c00) >> 10;
340 
341  if (Exp16 == 0x1f) {
342  if (Sign)
343  return append(Buf, "-inf");
344  return append(Buf, "inf");
345  }
346  return 0;
347 }
348 
349 template <typename T>
351  int Precision, bool IsSci) {
352  int Exp = 0;
353 
354  // For the case that the value is larger than 10.0
355  while (AbsVal >= T{10.0}) {
356  ++Exp;
357  AbsVal /= T{10.0};
358  }
359  // For the case that the value is less than 1.0
360  while (AbsVal > T{0.0} && AbsVal < T{1.0}) {
361  --Exp;
362  AbsVal *= T{10.0};
363  }
364 
365  auto IntegralPart = static_cast<int>(AbsVal);
366  auto FractionPart = AbsVal - IntegralPart;
367 
368  int FractionDigits[MAX_FLOATING_POINT_DIGITS] = {0};
369 
370  // Exponent
371  int P = Precision > 0 ? Precision : 4;
372  size_t FractionLength = Exp + P;
373 
374  // After normalization integral part contains 1 symbol, also there could be
375  // '.', 'e', sign of the exponent and sign of the number, overall 5 symbols.
376  // So, clamp fraction length if required according to maximum size of the
377  // buffer for floating point number.
378  if (FractionLength > MAX_FLOATING_POINT_DIGITS - 5)
379  FractionLength = MAX_FLOATING_POINT_DIGITS - 5;
380 
381  for (unsigned I = 0; I < FractionLength; ++I) {
382  FractionPart *= T{10.0};
383  FractionDigits[I] = static_cast<int>(FractionPart);
384  FractionPart -= static_cast<int>(FractionPart);
385  }
386 
387  int Carry = FractionPart > static_cast<T>(0.5) ? 1 : 0;
388 
389  // Propagate the Carry
390  for (int I = FractionLength - 1; I >= 0 && Carry; --I) {
391  auto Digit = FractionDigits[I] + Carry;
392  FractionDigits[I] = Digit % 10;
393  Carry = Digit / 10;
394  }
395 
396  // Carry from the fraction part is propagated to integral part
397  IntegralPart += Carry;
398  if (IntegralPart == 10) {
399  IntegralPart = 1;
400  ++Exp;
401  }
402 
403  unsigned Offset = 0;
404 
405  // Assemble the final string correspondingly
406  if (IsSci) { // scientific mode
407  // Append the integral part
408  Digits[Offset++] = digitToChar(IntegralPart);
409  Digits[Offset++] = '.';
410 
411  // Append all fraction
412  for (unsigned I = 0; I < FractionLength; ++I)
413  Digits[Offset++] = digitToChar(FractionDigits[I]);
414 
415  auto AbsExp = Exp < 0 ? -Exp : Exp;
416  // Exponent part
417  Digits[Offset++] = 'e';
418  Digits[Offset++] = Exp >= 0 ? '+' : '-';
419  Digits[Offset++] = digitToChar(AbsExp / 10);
420  Digits[Offset++] = digitToChar(AbsExp % 10);
421  } else { // normal mode
422  if (Exp < 0) {
423  Digits[Offset++] = '0';
424  Digits[Offset++] = '.';
425  while (++Exp)
426  Digits[Offset++] = '0';
427 
428  // Append the integral part
429  Digits[Offset++] = digitToChar(IntegralPart);
430 
431  // Append all fraction
432  for (unsigned I = 0; I < FractionLength; ++I)
433  Digits[Offset++] = digitToChar(FractionDigits[I]);
434  } else {
435  // Append the integral part
436  Digits[Offset++] = digitToChar(IntegralPart);
437  unsigned I = 0;
438  // Append the integral part first
439  for (; I < FractionLength && Exp--; ++I)
440  Digits[Offset++] = digitToChar(FractionDigits[I]);
441 
442  // Put the dot
443  Digits[Offset++] = '.';
444 
445  // Append the rest of fraction part, or the real fraction part
446  for (; I < FractionLength; ++I)
447  Digits[Offset++] = digitToChar(FractionDigits[I]);
448  }
449  // The normal mode requires no tailing zero digit, then we need to first
450  // find the first non-zero digit
451  while (Digits[Offset - 1] == '0')
452  Offset--;
453 
454  // If dot is the last digit, it should be stripped off as well
455  if (Digits[Offset - 1] == '.')
456  Offset--;
457  }
458  return Offset;
459 }
460 
461 // Returns number of symbols written to the buffer
462 template <typename T>
463 inline EnableIfFP<T, unsigned>
464 ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision = -1) {
465  unsigned Offset = checkForInfNan(Buf, Val);
466  if (Offset)
467  return Offset;
468 
469  T Neg = -Val;
470  auto AbsVal = Val < 0 ? Neg : Val;
471 
472  if (Val < 0) {
473  Buf[Offset++] = '-';
474  } else if (Flags & ShowPos) {
475  Buf[Offset++] = '+';
476  }
477 
478  bool IsSci = false;
479  if (Flags & detail::Scientific)
480  IsSci = true;
481 
482  // TODO: manipulators for floating-point output - hexfloat, fixed
483  Offset += floatingPointToDecStr(AbsVal, Buf + Offset, Precision, IsSci);
484 
485  return Offset;
486 }
487 
488 template <typename T>
489 inline typename std::enable_if_t<std::is_integral_v<T>>
490 writeIntegral(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
491  unsigned WIOffset, unsigned Flags, int Width, const T &Val) {
492  char Digits[MAX_INTEGRAL_DIGITS] = {0};
493  unsigned Len = ScalarToStr(Val, Digits, Flags, Width);
494  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
495  (Width > 0 && static_cast<unsigned>(Width) > Len)
496  ? static_cast<unsigned>(Width) - Len
497  : 0);
498 }
499 
500 template <typename T>
501 inline EnableIfFP<T>
502 writeFloatingPoint(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
503  unsigned WIOffset, unsigned Flags, int Width, int Precision,
504  const T &Val) {
505  char Digits[MAX_FLOATING_POINT_DIGITS] = {0};
506  unsigned Len = ScalarToStr(Val, Digits, Flags, Width, Precision);
507  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
508  (Width > 0 && static_cast<unsigned>(Width) > Len)
509  ? static_cast<unsigned>(Width) - Len
510  : 0);
511 }
512 
513 // Helper method to update offset in the global buffer atomically according to
514 // the provided size of the data in the flush buffer. Return true if offset is
515 // updated and false in case of overflow.
516 inline bool updateOffset(GlobalOffsetAccessorT &GlobalOffset,
517  GlobalBufAccessorT &GlobalBuf, unsigned Size,
518  unsigned &Cur) {
519  unsigned New;
520  Cur = GlobalOffset[0].load();
521  do {
522  if (GlobalBuf.get_range().size() - Cur < Size)
523  // Overflow
524  return false;
525  New = Cur + Size;
526  } while (!GlobalOffset[0].compare_exchange_strong(Cur, New));
527  return true;
528 }
529 
530 inline void flushBuffer(GlobalOffsetAccessorT &GlobalOffset,
531  GlobalBufAccessorT &GlobalBuf,
532  GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset) {
533  unsigned Offset = GetFlushBufOffset(GlobalFlushBuf, WIOffset);
534  if (Offset == 0)
535  return;
536 
537  unsigned Cur = 0;
538  if (!updateOffset(GlobalOffset, GlobalBuf, Offset, Cur))
539  return;
540 
541  unsigned StmtOffset = WIOffset + FLUSH_BUF_OFFSET_SIZE;
542  for (unsigned I = StmtOffset; I < StmtOffset + Offset; I++) {
543  GlobalBuf[Cur++] = GlobalFlushBuf[I];
544  }
545  // Reset the offset in the flush buffer
546  SetFlushBufOffset(GlobalFlushBuf, WIOffset, 0);
547 }
548 
549 template <typename T, int VecLength>
550 typename std::enable_if_t<(VecLength == 1), unsigned>
551 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
552  int Precision) {
553  return ScalarToStr(static_cast<T>(Vec.x()), VecStr, Flags, Width, Precision);
554 }
555 
556 template <typename T, int VecLength>
557 typename std::enable_if_t<(VecLength == 2 || VecLength == 4 || VecLength == 8 ||
558  VecLength == 16),
559  unsigned>
560 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
561  int Precision) {
562  unsigned Len =
563  VecToStr<T, VecLength / 2>(Vec.lo(), VecStr, Flags, Width, Precision);
564  Len += append(VecStr + Len, VEC_ELEMENT_DELIMITER);
565  Len += VecToStr<T, VecLength / 2>(Vec.hi(), VecStr + Len, Flags, Width,
566  Precision);
567  return Len;
568 }
569 
570 template <typename T, int VecLength>
571 typename std::enable_if_t<(VecLength == 3), unsigned>
572 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
573  int Precision) {
574  unsigned Len = VecToStr<T, 2>(Vec.lo(), VecStr, Flags, Width, Precision);
575  Len += append(VecStr + Len, VEC_ELEMENT_DELIMITER);
576  Len += VecToStr<T, 1>(Vec.z(), VecStr + Len, Flags, Width, Precision);
577  return Len;
578 }
579 
580 template <typename T, int VecLength>
581 inline void writeVec(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
582  unsigned WIOffset, unsigned Flags, int Width,
583  int Precision, const vec<T, VecLength> &Vec) {
584  // Reserve space for vector elements and delimiters
585  constexpr size_t MAX_VEC_SIZE =
586  MAX_FLOATING_POINT_DIGITS * VecLength + (VecLength - 1) * 2;
587  char VecStr[MAX_VEC_SIZE] = {0};
588  unsigned Len = VecToStr<T, VecLength>(Vec, VecStr, Flags, Width, Precision);
589  write(GlobalFlushBuf, FlushBufferSize, WIOffset, VecStr, Len,
590  (Width > 0 && Width > Len) ? Width - Len : 0);
591 }
592 
593 template <int ArrayLength>
594 inline unsigned ArrayToStr(char *Buf, const array<ArrayLength> &Arr) {
595  unsigned Len = 0;
596  Buf[Len++] = VEC_OPEN_BRACE;
597 
598  for (int I = 0; I < ArrayLength; ++I) {
599  Len += ScalarToStr(Arr[I], Buf + Len, 0 /* No flags */, -1, -1);
600  if (I != ArrayLength - 1)
601  Len += append(Buf + Len, VEC_ELEMENT_DELIMITER);
602  }
603 
604  Buf[Len++] = VEC_CLOSE_BRACE;
605 
606  return Len;
607 }
608 
609 template <int ArrayLength>
610 inline void writeArray(GlobalBufAccessorT &GlobalFlushBuf,
611  size_t FlushBufferSize, unsigned WIOffset,
612  const array<ArrayLength> &Arr) {
613  char Buf[MAX_ARRAY_SIZE];
614  unsigned Len = ArrayToStr(Buf, Arr);
615  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
616 }
617 
618 template <int Dimensions>
619 inline void writeItem(GlobalBufAccessorT &GlobalFlushBuf,
620  size_t FlushBufferSize, unsigned WIOffset,
621  const item<Dimensions> &Item) {
622  // Reserve space for 3 arrays and additional place (40 symbols) for printing
623  // the text
624  char Buf[3 * MAX_ARRAY_SIZE + 40];
625  unsigned Len = 0;
626  Len += append(Buf, "item(");
627  Len += append(Buf + Len, "range: ");
628  Len += ArrayToStr(Buf + Len, Item.get_range());
629  Len += append(Buf + Len, ", id: ");
630  Len += ArrayToStr(Buf + Len, Item.get_id());
631  Len += append(Buf + Len, ", offset: ");
632  Len += ArrayToStr(Buf + Len, Item.get_offset());
633  Buf[Len++] = ')';
634  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
635 }
636 
637 template <int Dimensions>
638 inline void writeNDRange(GlobalBufAccessorT &GlobalFlushBuf,
639  size_t FlushBufferSize, unsigned WIOffset,
640  const nd_range<Dimensions> &ND_Range) {
641  // Reserve space for 3 arrays and additional place (50 symbols) for printing
642  // the text
643  char Buf[3 * MAX_ARRAY_SIZE + 50];
644  unsigned Len = 0;
645  Len += append(Buf, "nd_range(");
646  Len += append(Buf + Len, "global_range: ");
647  Len += ArrayToStr(Buf + Len, ND_Range.get_global_range());
648  Len += append(Buf + Len, ", local_range: ");
649  Len += ArrayToStr(Buf + Len, ND_Range.get_local_range());
650  Len += append(Buf + Len, ", offset: ");
651  Len += ArrayToStr(Buf + Len, ND_Range.get_offset());
652  Buf[Len++] = ')';
653  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
654 }
655 
656 template <int Dimensions>
657 inline void writeNDItem(GlobalBufAccessorT &GlobalFlushBuf,
658  size_t FlushBufferSize, unsigned WIOffset,
659  const nd_item<Dimensions> &ND_Item) {
660  // Reserve space for 2 arrays and additional place (40 symbols) for printing
661  // the text
662  char Buf[2 * MAX_ARRAY_SIZE + 40];
663  unsigned Len = 0;
664  Len += append(Buf, "nd_item(");
665  Len += append(Buf + Len, "global_id: ");
666  Len += ArrayToStr(Buf + Len, ND_Item.get_global_id());
667  Len += append(Buf + Len, ", local_id: ");
668  Len += ArrayToStr(Buf + Len, ND_Item.get_local_id());
669  Buf[Len++] = ')';
670  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
671 }
672 
673 template <int Dimensions>
674 inline void writeGroup(GlobalBufAccessorT &GlobalFlushBuf,
675  size_t FlushBufferSize, unsigned WIOffset,
676  const group<Dimensions> &Group) {
677  // Reserve space for 4 arrays and additional place (60 symbols) for printing
678  // the text
679  char Buf[4 * MAX_ARRAY_SIZE + 60];
680  unsigned Len = 0;
681  Len += append(Buf, "group(");
682  Len += append(Buf + Len, "id: ");
683  Len += ArrayToStr(Buf + Len, Group.get_id());
684  Len += append(Buf + Len, ", global_range: ");
685  Len += ArrayToStr(Buf + Len, Group.get_global_range());
686  Len += append(Buf + Len, ", local_range: ");
687  Len += ArrayToStr(Buf + Len, Group.get_local_range());
688  Len += append(Buf + Len, ", group_range: ");
689  Len += ArrayToStr(Buf + Len, Group.get_group_range());
690  Buf[Len++] = ')';
691  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
692 }
693 
694 // Space for 2 arrays and additional place (20 symbols) for printing
695 // the text
696 constexpr size_t MAX_ITEM_SIZE = 2 * MAX_ARRAY_SIZE + 20;
697 
698 template <int Dimensions>
699 inline unsigned ItemToStr(char *Buf, const item<Dimensions, false> &Item) {
700  unsigned Len = 0;
701  Len += append(Buf, "item(");
702  for (int I = 0; I < 2; ++I) {
703  Len += append(Buf + Len, I == 0 ? "range: " : ", id: ");
704  Len += ArrayToStr(Buf + Len, I == 0 ? Item.get_range() : Item.get_id());
705  }
706  Buf[Len++] = ')';
707  return Len;
708 }
709 
710 template <int Dimensions>
711 inline void writeHItem(GlobalBufAccessorT &GlobalFlushBuf,
712  size_t FlushBufferSize, unsigned WIOffset,
713  const h_item<Dimensions> &HItem) {
714  // Reserve space for 3 items and additional place (60 symbols) for printing
715  // the text
716  char Buf[3 * MAX_ITEM_SIZE + 60];
717  unsigned Len = 0;
718  Len += append(Buf, "h_item(");
719  for (int I = 0; I < 3; ++I) {
720  Len += append(Buf + Len, I == 0 ? "\n global "
721  : I == 1 ? "\n logical local "
722  : "\n physical local ");
723  Len += ItemToStr(Buf + Len, I == 0 ? HItem.get_global()
724  : I == 1 ? HItem.get_logical_local()
725  : HItem.get_physical_local());
726  }
727  Len += append(Buf + Len, "\n)");
728  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
729 }
730 
731 template <typename> struct IsSwizzleOp : std::false_type {};
732 
733 template <typename VecT, typename OperationLeftT, typename OperationRightT,
734  template <typename> class OperationCurrentT, int... Indexes>
736  VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes...>>
737  : std::true_type {
738  using T = typename VecT::element_type;
739  using Type = typename sycl::vec<T, (sizeof...(Indexes))>;
740 };
741 
742 template <typename T>
743 using EnableIfSwizzleVec =
744  typename std::enable_if_t<IsSwizzleOp<T>::value,
746 
747 } // namespace detail
748 
749 enum class stream_manipulator {
750  dec = 0,
751  hex = 1,
752  oct = 2,
753  noshowbase = 3,
754  showbase = 4,
755  noshowpos = 5,
756  showpos = 6,
757  endl = 7,
758  flush = 8,
759  fixed = 9,
760  scientific = 10,
761  hexfloat = 11,
762  defaultfloat = 12
763 };
764 
766 
768 
770 
772 
774 
776 
778 
780 
782 
784 
786 
788 
790 
791 class stream;
792 
794  int Precision_;
795 
796 public:
797  __precision_manipulator__(int Precision) : Precision_(Precision) {}
798 
799  int precision() const { return Precision_; }
800 
801  friend const stream &operator<<(const stream &,
802  const __precision_manipulator__ &);
803 };
804 
806  int Width_;
807 
808 public:
809  __width_manipulator__(int Width) : Width_(Width) {}
810 
811  int width() const { return Width_; }
812 
813  friend const stream &operator<<(const stream &,
814  const __width_manipulator__ &);
815 };
816 
818  return __precision_manipulator__(Precision);
819 }
820 
821 inline __width_manipulator__ setw(int Width) {
822  return __width_manipulator__(Width);
823 }
824 
829 class __SYCL_EXPORT __SYCL_SPECIAL_CLASS __SYCL_TYPE(stream) stream
830  : public detail::OwnerLessBase<stream> {
831 private:
832 #ifndef __SYCL_DEVICE_ONLY__
833  // Constructor for recreating a stream.
834  stream(std::shared_ptr<detail::stream_impl> Impl,
835  detail::GlobalBufAccessorT GlobalBuf,
836  detail::GlobalOffsetAccessorT GlobalOffset,
837  detail::GlobalBufAccessorT GlobalFlushBuf)
838  : impl{Impl}, GlobalBuf{GlobalBuf}, GlobalOffset{GlobalOffset},
839  GlobalFlushBuf{GlobalFlushBuf} {}
840 #endif
841 
842 public:
843 #ifdef __SYCL_DEVICE_ONLY__
844  // Default constructor for objects later initialized with __init member.
845  stream() = default;
846 #endif
847 
848  // Throws exception in case of invalid input parameters
849  stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH);
850 
851  // Property-list constructor variant.
852  // TODO: Merge with other stream constructor and give PropList default value.
853  stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH,
854  const property_list &PropList);
855 
856 #ifdef __SYCL_DEVICE_ONLY__
857  // We need the definitions of these functions in the header for device,
858  // otherwise they are not visible. Also, we cannot use `impl` because it's not
859  // there on the device, so we rely on GlobalBuf/GlobalFlushBuf.
860  size_t size() const noexcept { return GlobalBuf.size(); }
861 
862  size_t get_work_item_buffer_size() const {
863  return GlobalFlushBuf.size() - detail::FLUSH_BUF_OFFSET_SIZE;
864  }
865 
867  "get_size() is deprecated since SYCL 2020. Please use size() instead.")
868  size_t get_size() const { return size(); }
869 
870  __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
871  "2020. Please use get_work_item_buffer_size() instead.")
872  size_t get_max_statement_size() const { return get_work_item_buffer_size(); }
873 #else
874  size_t size() const noexcept;
875 
876  size_t get_work_item_buffer_size() const;
877 
879  "get_size() is deprecated since SYCL 2020. Please use size() instead.")
880  size_t get_size() const;
881 
882  __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
883  "2020. Please use get_work_item_buffer_size() instead.")
884  size_t get_max_statement_size() const;
885 #endif
886 
887  size_t get_precision() const { return Precision; }
888 
889  size_t get_width() const { return Width; }
890 
891  stream_manipulator get_stream_mode() const { return Manipulator; }
892 
893  bool operator==(const stream &RHS) const;
894 
895  bool operator!=(const stream &LHS) const;
896 
897  template <typename propertyT> bool has_property() const noexcept;
898 
899  template <typename propertyT> propertyT get_property() const;
900 
901 private:
902 #ifdef __SYCL_DEVICE_ONLY__
903  char padding[sizeof(std::shared_ptr<detail::stream_impl>)];
904 #else
905  std::shared_ptr<detail::stream_impl> impl;
906  template <class Obj>
907  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
908 #endif
909 
910  // NOTE: Some members are required for reconstructing the stream, but are not
911  // part of the implementation class. If more members are added, they should
912  // also be added to the weak_object specialization for streams.
913 
914  // Accessor to the global stream buffer. Global buffer contains all output
915  // from the kernel.
916  mutable detail::GlobalBufAccessorT GlobalBuf;
917 
918  // Atomic accessor to the global offset variable. It represents an offset in
919  // the global stream buffer. Since work items will flush data to global buffer
920  // in parallel we need atomic access to this offset.
921  mutable detail::GlobalOffsetAccessorT GlobalOffset;
922 
923  // Accessor to the flush buffer. Each work item writes its
924  // output to a designated section of the flush buffer.
925  mutable detail::GlobalBufAccessorT GlobalFlushBuf;
926 
927  // Offset of the WI's flush buffer in the pool.
928  mutable unsigned WIOffset = 0;
929 
930  mutable size_t FlushBufferSize;
931 
932  // Fields and methods to work with manipulators
933  mutable stream_manipulator Manipulator = defaultfloat;
934 
935  // Type used for format flags
936  using FmtFlags = unsigned int;
937 
938  mutable int Precision = -1;
939  mutable int Width = -1;
940  mutable FmtFlags Flags = 0x0;
941 
942  void set_flag(FmtFlags FormatFlag) const { Flags |= FormatFlag; }
943 
944  void unset_flag(FmtFlags FormatFlag) const { Flags &= ~FormatFlag; }
945 
946  FmtFlags get_flags() const { return Flags; }
947 
948  // This method is used to set the flag for base and float manipulators. These
949  // flags are mutually exclusive and base/float field needs to be cleared
950  // before the setting new flag.
951  void set_flag(FmtFlags FormatFlag, FmtFlags Mask) const {
952  unset_flag(Mask);
953  Flags |= FormatFlag & Mask;
954  }
955 
956  // Set the flags which correspond to the input stream manipulator.
957  void set_manipulator(const stream_manipulator SM) const {
958  switch (SM) {
960  set_flag(detail::Dec, detail::BaseField);
961  break;
963  set_flag(detail::Hex, detail::BaseField);
964  break;
966  set_flag(detail::Oct, detail::BaseField);
967  break;
969  unset_flag(detail::ShowBase);
970  break;
972  set_flag(detail::ShowBase);
973  break;
975  unset_flag(detail::ShowPos);
976  break;
978  set_flag(detail::ShowPos);
979  break;
982  break;
985  break;
988  break;
990  unset_flag(detail::FloatField);
991  break;
992  default:
993  // Unknown manipulator
994  break;
995  }
996  }
997 
998 #ifdef __SYCL_DEVICE_ONLY__
999  void __init(detail::GlobalBufPtrType GlobalBufPtr,
1000  range<detail::GlobalBufDim> GlobalBufAccRange,
1001  range<detail::GlobalBufDim> GlobalBufMemRange,
1002  id<detail::GlobalBufDim> GlobalBufId,
1003  detail::GlobalOffsetPtrType GlobalOffsetPtr,
1004  range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
1005  range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
1006  id<detail::GlobalOffsetDim> GlobalOffsetId,
1007  detail::GlobalBufPtrType GlobalFlushPtr,
1008  range<detail::GlobalBufDim> GlobalFlushAccRange,
1009  range<detail::GlobalBufDim> GlobalFlushMemRange,
1010  id<detail::GlobalBufDim> GlobalFlushId, size_t _FlushBufferSize) {
1011  GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
1012  GlobalBufId);
1013  GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
1014  GlobalOffsetMemRange, GlobalOffsetId);
1015  GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
1016  GlobalFlushMemRange, GlobalFlushId);
1017  FlushBufferSize = _FlushBufferSize;
1018  // Calculate offset in the flush buffer for each work item in the global
1019  // work space. We need to avoid calling intrinsics to get global id because
1020  // when stream is used in a single_task kernel this could cause some
1021  // overhead on FPGA target. That is why use global atomic variable to
1022  // calculate offsets.
1023  WIOffset = GlobalOffset[1].fetch_add(FlushBufferSize);
1024 
1025  // Initialize flush subbuffer's offset for each work item on device.
1026  // Initialization on host device is performed via submition of additional
1027  // host task.
1028  SetFlushBufOffset(GlobalFlushBuf, WIOffset, 0);
1029  }
1030 
1031  void __finalize() {
1032  // Flush data to global buffer if flush buffer is not empty. This could be
1033  // necessary if user hasn't yet flushed data on its own and kernel execution
1034  // is finished
1035  // NOTE: A call to this function will be generated by compiler
1036  // NOTE: In the current implementation user should explicitly flush data on
1037  // the host device. Data is not flushed automatically after kernel execution
1038  // because of the missing feature in scheduler.
1039  flushBuffer(GlobalOffset, GlobalBuf, GlobalFlushBuf, WIOffset);
1040  }
1041 #endif
1042 
1043  friend class handler;
1044 
1045  template <typename SYCLObjT> friend class ext::oneapi::weak_object;
1046 
1047  friend const stream &operator<<(const stream &, const char);
1048  friend const stream &operator<<(const stream &, const char *);
1049  template <typename ValueType>
1050  friend
1051  typename std::enable_if_t<std::is_integral_v<ValueType>, const stream &>
1052  operator<<(const stream &, const ValueType &);
1053  friend const stream &operator<<(const stream &, const float &);
1054  friend const stream &operator<<(const stream &, const double &);
1055  friend const stream &operator<<(const stream &, const half &);
1056 
1057  friend const stream &operator<<(const stream &, const stream_manipulator);
1058 
1059  friend const stream &operator<<(const stream &Out,
1060  const __precision_manipulator__ &RHS);
1061 
1062  friend const stream &operator<<(const stream &Out,
1063  const __width_manipulator__ &RHS);
1064  template <typename T, int Dimensions>
1065  friend const stream &operator<<(const stream &Out,
1066  const vec<T, Dimensions> &RHS);
1067  template <typename T>
1068  friend const stream &operator<<(const stream &Out, const T *RHS);
1069  template <int Dimensions>
1070  friend const stream &operator<<(const stream &Out, const id<Dimensions> &RHS);
1071 
1072  template <int Dimensions>
1073  friend const stream &operator<<(const stream &Out,
1074  const range<Dimensions> &RHS);
1075 
1076  template <int Dimensions>
1077  friend const stream &operator<<(const stream &Out,
1078  const item<Dimensions> &RHS);
1079 
1080  template <int Dimensions>
1081  friend const stream &operator<<(const stream &Out,
1082  const nd_range<Dimensions> &RHS);
1083 
1084  template <int Dimensions>
1085  friend const stream &operator<<(const stream &Out,
1086  const nd_item<Dimensions> &RHS);
1087 
1088  template <int Dimensions>
1089  friend const stream &operator<<(const stream &Out,
1090  const group<Dimensions> &RHS);
1091 
1092  template <int Dimensions>
1093  friend const stream &operator<<(const stream &Out,
1094  const h_item<Dimensions> &RHS);
1095 };
1096 
1097 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1098 // Byte (has to be converted to a numeric value)
1099 template <typename T>
1100 inline std::enable_if_t<std::is_same_v<T, std::byte>, const stream &>
1101 operator<<(const stream &, const T &) {
1102  static_assert(std::is_integral<T>(),
1103  "Convert the byte to a numeric value using std::to_integer");
1104 }
1105 #endif
1106 
1107 // Character
1108 inline const stream &operator<<(const stream &Out, const char C) {
1109  detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, &C, 1);
1110  return Out;
1111 }
1112 
1113 // String
1114 inline const stream &operator<<(const stream &Out, const char *Str) {
1115  unsigned Len = 0;
1116  for (; Str[Len] != '\0'; Len++)
1117  ;
1118 
1119  detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Str,
1120  Len);
1121  return Out;
1122 }
1123 
1124 // Boolean
1125 inline const stream &operator<<(const stream &Out, const bool &RHS) {
1126  Out << (RHS ? "true" : "false");
1127  return Out;
1128 }
1129 
1130 // Integral
1131 template <typename ValueType>
1132 inline typename std::enable_if_t<std::is_integral_v<ValueType>, const stream &>
1133 operator<<(const stream &Out, const ValueType &RHS) {
1134  detail::writeIntegral(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset,
1135  Out.get_flags(), Out.get_width(), RHS);
1136  return Out;
1137 }
1138 
1139 // Floating points
1140 
1141 inline const stream &operator<<(const stream &Out, const float &RHS) {
1142  detail::writeFloatingPoint<float>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1143  Out.WIOffset, Out.get_flags(),
1144  Out.get_width(), Out.get_precision(), RHS);
1145  return Out;
1146 }
1147 
1148 inline const stream &operator<<(const stream &Out, const double &RHS) {
1149  detail::writeFloatingPoint<double>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1150  Out.WIOffset, Out.get_flags(),
1151  Out.get_width(), Out.get_precision(), RHS);
1152  return Out;
1153 }
1154 
1155 inline const stream &operator<<(const stream &Out, const half &RHS) {
1156  detail::writeFloatingPoint<half>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1157  Out.WIOffset, Out.get_flags(),
1158  Out.get_width(), Out.get_precision(), RHS);
1159  return Out;
1160 }
1161 
1162 // Pointer
1163 
1164 template <typename ElementType, access::address_space Space,
1165  access::decorated IsDecorated>
1166 inline const stream &
1167 operator<<(const stream &Out,
1169  Out << RHS.get();
1170  return Out;
1171 }
1172 
1173 template <typename T>
1174 const stream &operator<<(const stream &Out, const T *RHS) {
1175  detail::FmtFlags Flags = Out.get_flags();
1176  Flags &= ~detail::BaseField;
1177  Flags |= detail::Hex | detail::ShowBase;
1178  detail::writeIntegral(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset,
1179  Flags, Out.get_width(), reinterpret_cast<size_t>(RHS));
1180  return Out;
1181 }
1182 
1183 // Manipulators
1184 
1185 inline const stream &operator<<(const stream &Out,
1186  const __precision_manipulator__ &RHS) {
1187  Out.Precision = RHS.precision();
1188  return Out;
1189 }
1190 
1191 inline const stream &operator<<(const stream &Out,
1192  const __width_manipulator__ &RHS) {
1193  Out.Width = RHS.width();
1194  return Out;
1195 }
1196 
1197 inline const stream &operator<<(const stream &Out,
1198  const stream_manipulator RHS) {
1199  switch (RHS) {
1201  Out << '\n';
1202  flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1203  Out.WIOffset);
1204  break;
1206  flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1207  Out.WIOffset);
1208  break;
1209  default:
1210  Out.set_manipulator(RHS);
1211  break;
1212  }
1213  return Out;
1214 }
1215 
1216 // Vec
1217 
1218 template <typename T, int VectorLength>
1219 const stream &operator<<(const stream &Out, const vec<T, VectorLength> &RHS) {
1220  detail::writeVec<T, VectorLength>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1221  Out.WIOffset, Out.get_flags(),
1222  Out.get_width(), Out.get_precision(), RHS);
1223  return Out;
1224 }
1225 
1226 // SYCL types
1227 
1228 template <int Dimensions>
1229 inline const stream &operator<<(const stream &Out, const id<Dimensions> &RHS) {
1230  detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1231  Out.WIOffset, RHS);
1232  return Out;
1233 }
1234 
1235 template <int Dimensions>
1236 inline const stream &operator<<(const stream &Out,
1237  const range<Dimensions> &RHS) {
1238  detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1239  Out.WIOffset, RHS);
1240  return Out;
1241 }
1242 
1243 template <int Dimensions>
1244 inline const stream &operator<<(const stream &Out,
1245  const item<Dimensions> &RHS) {
1246  detail::writeItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1247  Out.WIOffset, RHS);
1248  return Out;
1249 }
1250 
1251 template <int Dimensions>
1252 inline const stream &operator<<(const stream &Out,
1253  const nd_range<Dimensions> &RHS) {
1254  detail::writeNDRange<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1255  Out.WIOffset, RHS);
1256  return Out;
1257 }
1258 
1259 template <int Dimensions>
1260 inline const stream &operator<<(const stream &Out,
1261  const nd_item<Dimensions> &RHS) {
1262  detail::writeNDItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1263  Out.WIOffset, RHS);
1264  return Out;
1265 }
1266 
1267 template <int Dimensions>
1268 inline const stream &operator<<(const stream &Out,
1269  const group<Dimensions> &RHS) {
1270  detail::writeGroup<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1271  Out.WIOffset, RHS);
1272  return Out;
1273 }
1274 
1275 template <int Dimensions>
1276 inline const stream &operator<<(const stream &Out,
1277  const h_item<Dimensions> &RHS) {
1278  detail::writeHItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1279  Out.WIOffset, RHS);
1280  return Out;
1281 }
1282 
1283 template <typename T, typename RT = detail::EnableIfSwizzleVec<T>>
1284 inline const stream &operator<<(const stream &Out, const T &RHS) {
1285  RT V = RHS;
1286  Out << V;
1287  return Out;
1288 }
1289 
1290 } // namespace _V1
1291 } // namespace sycl
1292 namespace std {
1293 template <> struct hash<sycl::stream> {
1294  size_t operator()(const sycl::stream &S) const {
1295 #ifdef __SYCL_DEVICE_ONLY__
1296  (void)S;
1297  return 0;
1298 #else
1299  return hash<std::shared_ptr<sycl::detail::stream_impl>>()(
1301 #endif
1302  }
1303 };
1304 } // namespace std
sycl::_V1::detail::writeGroup
void writeGroup(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const group< Dimensions > &Group)
Definition: stream.hpp:674
sycl::_V1::__width_manipulator__
Definition: stream.hpp:805
sycl::_V1::stream_manipulator::oct
@ oct
sycl::_V1::stream_manipulator::fixed
@ fixed
sycl::_V1::h_item::get_physical_local
item< Dimensions, false > get_physical_local() const
Definition: h_item.hpp:48
sycl::_V1::detail::IsSwizzleOp< sycl::detail::SwizzleOp< VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes... > >::Type
typename sycl::vec< T,(sizeof...(Indexes))> Type
Definition: stream.hpp:739
sycl::_V1::detail::MAX_ARRAY_SIZE
constexpr size_t MAX_ARRAY_SIZE
Definition: stream.hpp:78
sycl::_V1::__width_manipulator__::width
int width() const
Definition: stream.hpp:811
property_list.hpp
builtins.hpp
sycl::_V1::endl
constexpr stream_manipulator endl
Definition: stream.hpp:779
sycl::_V1::detail::reverseBuf
void reverseBuf(char *Buf, unsigned Len)
Definition: stream.hpp:153
std::hash< sycl::stream >::operator()
size_t operator()(const sycl::stream &S) const
Definition: stream.hpp:1294
sycl::_V1::instead
std::uint8_t instead
Definition: aliases.hpp:93
cg.hpp
sycl::_V1::nd_item::get_global_id
id< Dimensions > get_global_id() const
Definition: nd_item.hpp:51
sycl::_V1::defaultfloat
constexpr stream_manipulator defaultfloat
Definition: stream.hpp:789
sub_group.hpp
sycl::_V1::ext::intel::experimental::operator==
bool operator==(const cache_config &lhs, const cache_config &rhs)
Definition: kernel_execution_properties.hpp:36
sycl::_V1::detail::updateOffset
bool updateOffset(GlobalOffsetAccessorT &GlobalOffset, GlobalBufAccessorT &GlobalBuf, unsigned Size, unsigned &Cur)
Definition: stream.hpp:516
sycl::_V1::__precision_manipulator__::precision
int precision() const
Definition: stream.hpp:799
sycl::_V1::detail::EnableIfFP
typename std::enable_if_t< std::is_same_v< F, float >||std::is_same_v< F, double >||std::is_same_v< F, half >, T > EnableIfFP
Definition: stream.hpp:89
item.hpp
sycl::_V1::__precision_manipulator__::__precision_manipulator__
__precision_manipulator__(int Precision)
Definition: stream.hpp:797
sycl::_V1::detail::isFastMathInf
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathInf(T Val)
Definition: stream.hpp:271
sycl::_V1::detail::ItemToStr
unsigned ItemToStr(char *Buf, const item< Dimensions, false > &Item)
Definition: stream.hpp:699
sycl::_V1::detail::integralToBase
std::enable_if_t< std::is_integral_v< T >, unsigned > integralToBase(T Val, int Base, char *Digits)
Definition: stream.hpp:180
sycl::_V1::detail::SwizzleOp
Definition: types.hpp:132
sycl::_V1::item::get_range
range< Dimensions > get_range() const
Definition: item.hpp:73
sycl::_V1::detail::MAX_ITEM_SIZE
constexpr size_t MAX_ITEM_SIZE
Definition: stream.hpp:696
sycl::_V1::stream_manipulator::showbase
@ showbase
sycl::_V1::showpos
constexpr stream_manipulator showpos
Definition: stream.hpp:777
types.hpp
sycl::_V1::stream_manipulator::dec
@ dec
sycl::_V1::detail::EnableIfSwizzleVec
typename std::enable_if_t< IsSwizzleOp< T >::value, typename IsSwizzleOp< T >::Type > EnableIfSwizzleVec
Definition: stream.hpp:745
sycl::_V1::stream_manipulator::hex
@ hex
sycl::_V1::detail::GlobalBufAccessorT
accessor< char, 1, sycl::access::mode::read_write, sycl::access::target::device > GlobalBufAccessorT
Definition: stream.hpp:92
sycl::_V1::detail::digitToChar
char digitToChar(const int Digit)
Definition: stream.hpp:170
sycl::_V1::stream_manipulator::defaultfloat
@ defaultfloat
array.hpp
detail
---— Error handling, matching OpenCL plugin semantics.
Definition: common.hpp:44
sycl::_V1::__SYCL_TYPE
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor< DataT
Image accessors.
sycl
Definition: access.hpp:18
sycl::_V1::detail::GlobalOffsetAS
constexpr static access::address_space GlobalOffsetAS
Definition: stream.hpp:103
sycl::_V1::hexfloat
constexpr stream_manipulator hexfloat
Definition: stream.hpp:787
sycl::_V1::detail::writeIntegral
std::enable_if_t< std::is_integral_v< T > > writeIntegral(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, const T &Val)
Definition: stream.hpp:490
__SYCL_SPECIAL_CLASS
#define __SYCL_SPECIAL_CLASS
Definition: defines.hpp:29
access.hpp
sycl::_V1::id
A unique identifier of an item in an index space.
Definition: array.hpp:20
group.hpp
sycl::_V1::nd_range::get_offset
id< Dimensions > get_offset() const
Definition: nd_range.hpp:50
sycl::_V1::detail::writeVec
void writeVec(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, int Precision, const vec< T, VecLength > &Vec)
Definition: stream.hpp:581
owner_less_base.hpp
sycl::_V1::detail::FmtFlags
unsigned int FmtFlags
Definition: stream.hpp:46
sycl::_V1::detail::GlobalBufDim
constexpr static int GlobalBufDim
Definition: stream.hpp:98
sycl::_V1::item::get_id
id< Dimensions > get_id() const
Definition: item.hpp:59
sycl::_V1::detail::writeFloatingPoint
EnableIfFP< T > writeFloatingPoint(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, int Precision, const T &Val)
Definition: stream.hpp:502
sycl::_V1::detail::half_impl::half
Definition: half_type.hpp:271
sycl::_V1::detail::Dec
static constexpr FmtFlags Dec
Definition: stream.hpp:50
sycl::_V1::detail::D2I64
unsigned long long D2I64(double Val)
Definition: stream.hpp:259
sycl::_V1::multi_ptr::get
pointer get() const
Definition: multi_ptr.hpp:293
sycl::_V1::detail::MAX_INTEGRAL_DIGITS
constexpr size_t MAX_INTEGRAL_DIGITS
Definition: stream.hpp:69
sycl::_V1::detail::floatingPointToDecStr
EnableIfFP< T, unsigned > floatingPointToDecStr(T AbsVal, char *Digits, int Precision, bool IsSci)
Definition: stream.hpp:350
sycl::_V1::flush
constexpr stream_manipulator flush
Definition: stream.hpp:781
sycl::_V1::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:131
sycl::_V1::range
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: buffer.hpp:51
nd_range.hpp
sycl::_V1::stream_manipulator::scientific
@ scientific
sycl::_V1::detail::writeHItem
void writeHItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const h_item< Dimensions > &HItem)
Definition: stream.hpp:711
export.hpp
sycl::_V1::detail::VEC_OPEN_BRACE
constexpr char VEC_OPEN_BRACE
Definition: stream.hpp:71
sycl::_V1::ext::oneapi::experimental::isnan
std::enable_if_t< std::is_same_v< T, bfloat16 >, bool > isnan(T x)
Definition: bfloat16_math.hpp:35
sycl::_V1::multi_ptr
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: atomic.hpp:34
sycl::_V1::nd_item
Identifies an instance of the function object executing at each point in an nd_range.
Definition: helpers.hpp:36
sycl::_V1::ext::oneapi::experimental::has_property
static constexpr bool has_property()
Definition: annotated_arg.hpp:169
sycl::_V1::nd_range::get_global_range
range< Dimensions > get_global_range() const
Definition: nd_range.hpp:43
sycl::_V1::__width_manipulator__::__width_manipulator__
__width_manipulator__(int Width)
Definition: stream.hpp:809
defines_elementary.hpp
sycl::_V1::detail::append
unsigned append(char *Dst, const char *Src)
Definition: stream.hpp:240
sycl::_V1::detail::GlobalBufAS
constexpr static access::address_space GlobalBufAS
Definition: stream.hpp:94
sycl::_V1::stream_manipulator::noshowbase
@ noshowbase
sycl::_V1::stream_manipulator::flush
@ flush
sycl::_V1::nd_range::get_local_range
range< Dimensions > get_local_range() const
Definition: nd_range.hpp:45
sycl::_V1::stream_manipulator::showpos
@ showpos
sycl::_V1::detail::SetFlushBufOffset
void SetFlushBufOffset(GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset, unsigned Offset)
Definition: stream.hpp:125
sycl::_V1::item
Identifies an instance of the function object executing at each point in a range.
Definition: helpers.hpp:32
sycl::_V1::detail::MAX_DIMENSIONS
constexpr size_t MAX_DIMENSIONS
Definition: stream.hpp:74
sycl::_V1::detail::operator<<
std::ostream & operator<<(std::ostream &os, std::optional< T > const &opt)
Definition: device_filter.hpp:27
sycl::_V1::detail::checkForInfNan
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, unsigned > checkForInfNan(char *Buf, T Val)
Definition: stream.hpp:310
range.hpp
sycl::_V1::oct
constexpr stream_manipulator oct
Definition: stream.hpp:769
sycl::_V1::detail::writeArray
void writeArray(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const array< ArrayLength > &Arr)
Definition: stream.hpp:610
sycl::_V1::detail::IsSwizzleOp
Definition: stream.hpp:731
sycl::_V1::detail::MAX_FLOATING_POINT_DIGITS
constexpr size_t MAX_FLOATING_POINT_DIGITS
Definition: stream.hpp:68
sycl::_V1::detail::ArrayToStr
unsigned ArrayToStr(char *Buf, const array< ArrayLength > &Arr)
Definition: stream.hpp:594
defines.hpp
sycl::_V1::detail::VEC_ELEMENT_DELIMITER
constexpr const char * VEC_ELEMENT_DELIMITER
Definition: stream.hpp:70
sycl::_V1::noshowpos
constexpr stream_manipulator noshowpos
Definition: stream.hpp:775
sycl::_V1::detail::writeNDItem
void writeNDItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const nd_item< Dimensions > &ND_Item)
Definition: stream.hpp:657
sycl::_V1::detail::TargetToAS
Definition: access.hpp:140
sycl::_V1::nd_range
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: uniform.hpp:30
sycl::_V1::detail::IsSwizzleOp< sycl::detail::SwizzleOp< VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes... > >::T
typename VecT::element_type T
Definition: stream.hpp:738
sycl::_V1::detail::ScalarToStr
std::enable_if_t< std::is_integral_v< T >, unsigned > ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision=-1)
Definition: stream.hpp:194
sycl::_V1::h_item
Identifies an instance of a group::parallel_for_work_item function object executing at each point in ...
Definition: helpers.hpp:37
sycl::_V1::read_write
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
sycl::_V1::half
sycl::detail::half_impl::half half
Definition: aliases.hpp:101
sycl::_V1::detail::FLUSH_BUF_OFFSET_SIZE
constexpr unsigned FLUSH_BUF_OFFSET_SIZE
Definition: stream.hpp:83
sycl::_V1::ext::oneapi::experimental::get_property
static constexpr auto get_property()
Definition: annotated_arg.hpp:173
sycl::_V1::element_type
std::conditional_t< std::is_same_v< ElementType, half >, sycl::detail::half_impl::BIsRepresentationT, ElementType > element_type
Definition: multi_ptr.hpp:752
sycl::_V1::detail::GlobalOffsetDim
constexpr static int GlobalOffsetDim
Definition: stream.hpp:107
sycl::_V1::access::decorated
decorated
Definition: access.hpp:63
sycl::_V1::stream_manipulator::noshowpos
@ noshowpos
sycl::_V1::detail::GlobalOffsetPtrType
typename detail::DecoratedType< unsigned, GlobalBufAS >::type * GlobalOffsetPtrType
Definition: stream.hpp:106
sycl::_V1::accessor
Definition: accessor.hpp:246
sycl::_V1::dec
constexpr stream_manipulator dec
Definition: stream.hpp:765
sycl::_V1::item::get_offset
std::enable_if_t< has_offset, id< Dimensions > > get_offset() const
Definition: item.hpp:85
accessor.hpp
sycl::_V1::__precision_manipulator__
Definition: stream.hpp:793
sycl::_V1::detail::ShowPos
static constexpr FmtFlags ShowPos
Definition: stream.hpp:54
sycl::_V1::setw
__width_manipulator__ setw(int Width)
Definition: stream.hpp:821
sycl::_V1::stream_manipulator::endl
@ endl
sycl::_V1::setprecision
__precision_manipulator__ setprecision(int Precision)
Definition: stream.hpp:817
sycl::_V1::detail::VecToStr
std::enable_if_t<(VecLength==1), unsigned > VecToStr(const vec< T, VecLength > &Vec, char *VecStr, unsigned Flags, int Width, int Precision)
Definition: stream.hpp:551
sycl::_V1::detail::Hex
static constexpr FmtFlags Hex
Definition: stream.hpp:51
sycl::_V1::detail::isFastMathNan
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathNan(T Val)
Definition: stream.hpp:284
handler.hpp
sycl::_V1::detail::GlobalOffsetAccessorT
accessor< unsigned, 1, sycl::access::mode::atomic, sycl::access::target::device > GlobalOffsetAccessorT
Definition: stream.hpp:101
sycl::_V1::detail::getAbsVal
std::make_unsigned_t< T > getAbsVal(const T Val, const int Base)
Definition: stream.hpp:166
sycl::_V1::h_item::get_global
item< Dimensions, false > get_global() const
Definition: h_item.hpp:42
sycl::_V1::vec
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: aliases.hpp:18
sycl::_V1::detail::Scientific
static constexpr FmtFlags Scientific
Definition: stream.hpp:56
sycl::_V1::detail::VEC_CLOSE_BRACE
constexpr char VEC_CLOSE_BRACE
Definition: stream.hpp:72
std
Definition: accessor.hpp:4139
item_base.hpp
sycl::_V1::detail::FloatField
static constexpr FmtFlags FloatField
Definition: stream.hpp:66
sycl::_V1::hex
constexpr stream_manipulator hex
Definition: stream.hpp:767
sycl::_V1::detail::writeNDRange
void writeNDRange(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const nd_range< Dimensions > &ND_Range)
Definition: stream.hpp:638
sycl::_V1::showbase
constexpr stream_manipulator showbase
Definition: stream.hpp:773
half_type.hpp
aliases.hpp
sycl::_V1::ext::oneapi::experimental::matrix::use
use
Definition: matrix-unified-utils.hpp:17
sycl::_V1::detail::__SYCL2020_DEPRECATED
struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") IsDeprecatedDeviceCopyable< T
sycl::_V1::scientific
constexpr stream_manipulator scientific
Definition: stream.hpp:785
sycl::_V1::stream_manipulator::hexfloat
@ hexfloat
sycl::_V1::h_item::get_logical_local
item< Dimensions, false > get_logical_local() const
Definition: h_item.hpp:46
h_item.hpp
sycl::_V1::detail::DecoratedType
Definition: access.hpp:163
nd_item.hpp
sycl::_V1::nd_item::get_local_id
id< Dimensions > get_local_id() const
Definition: nd_item.hpp:65
sycl::_V1::detail::flushBuffer
void flushBuffer(GlobalOffsetAccessorT &GlobalOffset, GlobalBufAccessorT &GlobalBuf, GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset)
Definition: stream.hpp:530
sycl::_V1::detail::BaseField
static constexpr FmtFlags BaseField
Definition: stream.hpp:61
sycl::_V1::ext::intel::experimental::operator!=
bool operator!=(const cache_config &lhs, const cache_config &rhs)
Definition: kernel_execution_properties.hpp:40
sycl::_V1::detail::writeItem
void writeItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const item< Dimensions > &Item)
Definition: stream.hpp:619
sycl::_V1::detail::GetFlushBufOffset
unsigned GetFlushBufOffset(const GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset)
Definition: stream.hpp:113
sycl::_V1::stream_manipulator
stream_manipulator
Definition: stream.hpp:749
sycl::_V1::detail::GlobalBufPtrType
typename detail::DecoratedType< char, GlobalBufAS >::type * GlobalBufPtrType
Definition: stream.hpp:97
sycl::_V1::detail::array
Definition: array.hpp:24
sycl::_V1::detail::F2I32
unsigned F2I32(float Val)
Definition: stream.hpp:250
sycl::_V1::detail::Oct
static constexpr FmtFlags Oct
Definition: stream.hpp:52
sycl::_V1::group< Dimensions >
sycl::_V1::detail::ShowBase
static constexpr FmtFlags ShowBase
Definition: stream.hpp:53
atomic.hpp
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
sycl::_V1::detail::Fixed
static constexpr FmtFlags Fixed
Definition: stream.hpp:55
sycl::_V1::noshowbase
constexpr stream_manipulator noshowbase
Definition: stream.hpp:771
sycl::_V1::detail::isFastMathSignBit
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathSignBit(T Val)
Definition: stream.hpp:297
sycl::_V1::fixed
constexpr stream_manipulator fixed
Definition: stream.hpp:783
sycl::_V1::access::address_space
address_space
Definition: access.hpp:51