DPC++ Runtime
Runtime libraries for oneAPI DPC++
stream.hpp
Go to the documentation of this file.
1 //==----------------- stream.hpp - SYCL standard header file ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for target, mode, address_space
12 #include <sycl/accessor.hpp> // for accessor
13 #include <sycl/aliases.hpp> // for half
14 #include <sycl/atomic.hpp> // for atomic
15 #include <sycl/builtins.hpp> // for isinf, isnan, signbit
16 #include <sycl/detail/array.hpp> // for array
17 #include <sycl/detail/cg.hpp> // for stream_impl
18 #include <sycl/detail/defines.hpp> // for __SYCL_SPECIAL_CLASS, __S...
19 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
20 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
21 #include <sycl/detail/item_base.hpp> // for id, range
22 #include <sycl/detail/owner_less_base.hpp> // for OwnerLessBase
23 #include <sycl/ext/oneapi/bfloat16.hpp> // for bfloat16
24 #include <sycl/group.hpp> // for group
25 #include <sycl/h_item.hpp> // for h_item
26 #include <sycl/half_type.hpp> // for half, operator-, operator<
27 #include <sycl/handler.hpp> // for handler
28 #include <sycl/item.hpp> // for item
29 #include <sycl/nd_item.hpp> // for nd_item
30 #include <sycl/nd_range.hpp> // for nd_range
31 #include <sycl/property_list.hpp> // for property_list
32 #include <sycl/range.hpp> // for range
33 #include <sycl/sub_group.hpp> // for multi_ptr
34 #include <sycl/types.hpp> // for vec, SwizzleOp
35 
36 #include <cstddef> // for size_t, byte
37 #include <memory> // for hash, shared_ptr
38 #include <stdint.h> // for uint16_t, uint8_t
39 #include <type_traits> // for enable_if_t, is_same, fal...
40 #include <variant> // for hash
41 
42 namespace sycl {
43 inline namespace _V1 {
44 
45 namespace detail {
46 
47 using FmtFlags = unsigned int;
48 
49 // Mapping from stream_manipulator to FmtFlags. Each manipulator corresponds
50 // to the bit in FmtFlags.
51 static constexpr FmtFlags Dec = 0x0001;
52 static constexpr FmtFlags Hex = 0x0002;
53 static constexpr FmtFlags Oct = 0x0004;
54 static constexpr FmtFlags ShowBase = 0x0008;
55 static constexpr FmtFlags ShowPos = 0x0010;
56 static constexpr FmtFlags Fixed = 0x0020;
57 static constexpr FmtFlags Scientific = 0x0040;
58 
59 // Bitmask made of the combination of the base flags. Base flags are mutually
60 // exclusive, this mask is used to clean base field before setting the new
61 // base flag.
62 static constexpr FmtFlags BaseField = Dec | Hex | Oct;
63 
64 // Bitmask made of the combination of the floating point value format flags.
65 // Thease flags are mutually exclusive, this mask is used to clean float field
66 // before setting the new float flag.
67 static constexpr FmtFlags FloatField = Scientific | Fixed;
68 
69 constexpr size_t MAX_FLOATING_POINT_DIGITS = 24;
70 constexpr size_t MAX_INTEGRAL_DIGITS = 23;
71 constexpr const char *VEC_ELEMENT_DELIMITER = ", ";
72 constexpr char VEC_OPEN_BRACE = '{';
73 constexpr char VEC_CLOSE_BRACE = '}';
74 
75 constexpr size_t MAX_DIMENSIONS = 3;
76 
77 // Space for integrals (up to 3), comma and space between the
78 // integrals and enclosing braces.
79 constexpr size_t MAX_ARRAY_SIZE =
81 
82 // First 2 bytes in each work item's flush buffer are reserved for saving
83 // statement offset.
84 constexpr unsigned FLUSH_BUF_OFFSET_SIZE = 2;
85 
86 template <class F, class T = void>
87 using EnableIfFP = typename std::enable_if_t<
88  detail::check_type_in_v<F, float, double, half, ext::oneapi::bfloat16>, T>;
89 
91  sycl::access::target::device>;
92 
97 constexpr static int GlobalBufDim = 1;
98 
99 using GlobalOffsetAccessorT = accessor<unsigned, 1, sycl::access::mode::atomic,
100  sycl::access::target::device>;
101 
106 constexpr static int GlobalOffsetDim = 1;
107 
108 // Read first 2 bytes of flush buffer to get buffer offset.
109 // TODO: Should be optimized to the following:
110 // return *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]);
111 // when an issue with device code compilation using this optimization is fixed.
112 inline unsigned GetFlushBufOffset(const GlobalBufAccessorT &GlobalFlushBuf,
113  unsigned WIOffset) {
114  return ((static_cast<unsigned>(static_cast<uint8_t>(GlobalFlushBuf[WIOffset]))
115  << 8) +
116  static_cast<uint8_t>(GlobalFlushBuf[WIOffset + 1]));
117 }
118 
119 // Write flush buffer's offset into first 2 bytes of that buffer.
120 // TODO: Should be optimized to the following:
121 // *reinterpret_cast<uint16_t *>(&GlobalFlushBuf[WIOffset]) =
122 // static_cast<uint16_t>(Offset);
123 // when an issue with device code compilation using this optimization is fixed.
124 inline void SetFlushBufOffset(GlobalBufAccessorT &GlobalFlushBuf,
125  unsigned WIOffset, unsigned Offset) {
126  GlobalFlushBuf[WIOffset] = static_cast<char>((Offset >> 8) & 0xff);
127  GlobalFlushBuf[WIOffset + 1] = static_cast<char>(Offset & 0xff);
128 }
129 
130 inline void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
131  unsigned WIOffset, const char *Str, unsigned Len,
132  unsigned Padding = 0) {
133  unsigned Offset =
134  GetFlushBufOffset(GlobalFlushBuf, WIOffset) + FLUSH_BUF_OFFSET_SIZE;
135 
136  if ((Offset + Len + Padding > FlushBufferSize) ||
137  (WIOffset + Offset + Len + Padding > GlobalFlushBuf.size()))
138  // TODO: flush here
139  return;
140 
141  // Write padding
142  for (size_t I = 0; I < Padding; ++I, ++Offset)
143  GlobalFlushBuf[WIOffset + Offset] = ' ';
144 
145  for (size_t I = 0; I < Len; ++I, ++Offset) {
146  GlobalFlushBuf[WIOffset + Offset] = Str[I];
147  }
148 
149  SetFlushBufOffset(GlobalFlushBuf, WIOffset, Offset - FLUSH_BUF_OFFSET_SIZE);
150 }
151 
152 inline void reverseBuf(char *Buf, unsigned Len) {
153  int I = Len - 1;
154  int J = 0;
155  while (I > J) {
156  int Temp = Buf[I];
157  Buf[I] = Buf[J];
158  Buf[J] = Temp;
159  I--;
160  J++;
161  }
162 }
163 
164 template <typename T>
165 inline std::make_unsigned_t<T> getAbsVal(const T Val, const int Base) {
166  return ((Base == 10) && (Val < 0)) ? -Val : Val;
167 }
168 
169 inline char digitToChar(const int Digit) {
170  if (Digit < 10) {
171  return '0' + Digit;
172  } else {
173  return 'a' + Digit - 10;
174  }
175 }
176 
177 template <typename T>
178 inline typename std::enable_if_t<std::is_integral_v<T>, unsigned>
179 integralToBase(T Val, int Base, char *Digits) {
180  unsigned NumDigits = 0;
181 
182  do {
183  Digits[NumDigits++] = digitToChar(Val % Base);
184  Val /= Base;
185  } while (Val);
186 
187  return NumDigits;
188 }
189 
190 // Returns number of symbols written to the buffer
191 template <typename T>
192 inline typename std::enable_if_t<std::is_integral_v<T>, unsigned>
193 ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision = -1) {
194  (void)Precision;
195  int Base = 10;
196 
197  // append base manipulator
198  switch (Flags & BaseField) {
199  case Dec:
200  Base = 10;
201  break;
202  case Hex:
203  Base = 16;
204  break;
205  case Oct:
206  Base = 8;
207  break;
208  default:
209  // default value is 10
210  break;
211  }
212 
213  unsigned Offset = 0;
214 
215  // write '+' to the stream if the base is 10 and the value is non-negative
216  // or write '-' to stream if base is 10 and the value is negative
217  if (Base == 10) {
218  if ((Flags & ShowPos) && Val >= 0)
219  Buf[Offset++] = '+';
220  else if (Val < 0)
221  Buf[Offset++] = '-';
222  }
223 
224  // write 0 or 0x to the stream if base is not 10 and the manipulator is set
225  if (Base != 10 && (Flags & ShowBase)) {
226  Buf[Offset++] = '0';
227  if (Base == 16)
228  Buf[Offset++] = 'x';
229  }
230 
231  auto AbsVal = getAbsVal(Val, Base);
232 
233  const unsigned NumBuf = integralToBase(AbsVal, Base, Buf + Offset);
234 
235  reverseBuf(Buf + Offset, NumBuf);
236  return Offset + NumBuf;
237 }
238 
239 inline unsigned append(char *Dst, const char *Src) {
240  unsigned Len = 0;
241  for (; Src[Len] != '\0'; ++Len)
242  ;
243 
244  for (unsigned I = 0; I < Len; ++I)
245  Dst[I] = Src[I];
246  return Len;
247 }
248 
249 inline unsigned F2I32(float Val) {
250  union {
251  float FVal;
252  unsigned I32Val;
253  } Internal;
254  Internal.FVal = Val;
255  return Internal.I32Val;
256 }
257 
258 inline unsigned long long D2I64(double Val) {
259  union {
260  double DVal;
261  unsigned long long I64Val;
262  } Internal;
263  Internal.DVal = Val;
264  return Internal.I64Val;
265 }
266 
267 template <typename T>
268 inline typename detail::enable_if_t<
269  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
271  if constexpr (sizeof(Val) == 4) {
272  return (F2I32(Val) & 0x7fffffff) == 0x7f800000;
273  } else if constexpr (sizeof(Val) == 8) {
274  return (D2I64(Val) & -1ULL >> 1) == 0x7ffULL << 52;
275  }
276 
277  return false;
278 }
279 
280 template <typename T>
281 inline typename detail::enable_if_t<
282  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
284  if constexpr (sizeof(Val) == 4) {
285  return (F2I32(Val) & 0x7fffffff) > 0x7f800000;
286  } else if constexpr (sizeof(Val) == 8) {
287  return (D2I64(Val) & -1ULL >> 1) > 0x7ffULL << 52;
288  }
289 
290  return false;
291 }
292 
293 template <typename T>
294 inline typename detail::enable_if_t<
295  std::is_same<T, float>::value || std::is_same<T, double>::value, bool>
297  if constexpr (sizeof(Val) == 4) {
298  return F2I32(Val) >> 31;
299  } else if constexpr (sizeof(Val) == 8) {
300  return D2I64(Val) >> 63;
301  }
302 
303  return false;
304 }
305 
306 template <typename T>
307 typename detail::enable_if_t<
308  std::is_same<T, float>::value || std::is_same<T, double>::value, unsigned>
309 checkForInfNan(char *Buf, T Val) {
310 #ifdef __FAST_MATH__
311  if (isFastMathNan(Val))
312 #else
313  if (isnan(Val))
314 #endif
315  return append(Buf, "nan");
316 #ifdef __FAST_MATH__
317  if (isFastMathInf(Val)) {
318  if (isFastMathSignBit(Val))
319 #else
320  if (isinf(Val)) {
321  if (signbit(Val))
322 #endif
323  return append(Buf, "-inf");
324  return append(Buf, "inf");
325  }
326  return 0;
327 }
328 
329 template <typename T>
330 inline typename std::enable_if_t<std::is_same_v<T, half>, unsigned>
331 checkForInfNan(char *Buf, T Val) {
332  if (Val != Val)
333  return append(Buf, "nan");
334 
335  // Extract the sign from the bits
336  const uint16_t Sign = sycl::bit_cast<uint16_t>(Val) & 0x8000;
337  // Extract the exponent from the bits
338  const uint16_t Exp16 = (sycl::bit_cast<uint16_t>(Val) & 0x7c00) >> 10;
339 
340  if (Exp16 == 0x1f) {
341  if (Sign)
342  return append(Buf, "-inf");
343  return append(Buf, "inf");
344  }
345  return 0;
346 }
347 
348 template <typename T>
349 inline typename std::enable_if_t<std::is_same_v<T, ext::oneapi::bfloat16>,
350  unsigned>
351 checkForInfNan(char *Buf, T Val) {
352  if (Val != Val)
353  return append(Buf, "nan");
354 
355  // Extract the sign from the bits
356  const uint16_t Sign = sycl::bit_cast<uint16_t>(Val) & 0x8000;
357  // Extract the exponent from the bits
358  const uint16_t Exp16 = (sycl::bit_cast<uint16_t>(Val) & 0x7f80) >> 7;
359 
360  if (Exp16 == 0x7f) {
361  if (Sign)
362  return append(Buf, "-inf");
363  return append(Buf, "inf");
364  }
365  return 0;
366 }
367 
368 template <typename T>
370  int Precision, bool IsSci) {
371  int Exp = 0;
372 
373  // For the case that the value is larger than 10.0
374  while (AbsVal >= T{10.0}) {
375  ++Exp;
376  AbsVal /= T{10.0};
377  }
378  // For the case that the value is less than 1.0
379  while (AbsVal > T{0.0} && AbsVal < T{1.0}) {
380  --Exp;
381  AbsVal *= T{10.0};
382  }
383 
384  auto IntegralPart = static_cast<int>(AbsVal);
385  auto FractionPart = AbsVal - IntegralPart;
386 
387  int FractionDigits[MAX_FLOATING_POINT_DIGITS] = {0};
388 
389  // Exponent
390  int P = Precision > 0 ? Precision : 4;
391  size_t FractionLength = Exp + P;
392 
393  // After normalization integral part contains 1 symbol, also there could be
394  // '.', 'e', sign of the exponent and sign of the number, overall 5 symbols.
395  // So, clamp fraction length if required according to maximum size of the
396  // buffer for floating point number.
397  if (FractionLength > MAX_FLOATING_POINT_DIGITS - 5)
398  FractionLength = MAX_FLOATING_POINT_DIGITS - 5;
399 
400  for (unsigned I = 0; I < FractionLength; ++I) {
401  FractionPart *= T{10.0};
402  FractionDigits[I] = static_cast<int>(FractionPart);
403  FractionPart -= static_cast<int>(FractionPart);
404  }
405 
406  int Carry = FractionPart > static_cast<T>(0.5) ? 1 : 0;
407 
408  // Propagate the Carry
409  for (int I = FractionLength - 1; I >= 0 && Carry; --I) {
410  auto Digit = FractionDigits[I] + Carry;
411  FractionDigits[I] = Digit % 10;
412  Carry = Digit / 10;
413  }
414 
415  // Carry from the fraction part is propagated to integral part
416  IntegralPart += Carry;
417  if (IntegralPart == 10) {
418  IntegralPart = 1;
419  ++Exp;
420  }
421 
422  unsigned Offset = 0;
423 
424  // Assemble the final string correspondingly
425  if (IsSci) { // scientific mode
426  // Append the integral part
427  Digits[Offset++] = digitToChar(IntegralPart);
428  Digits[Offset++] = '.';
429 
430  // Append all fraction
431  for (unsigned I = 0; I < FractionLength; ++I)
432  Digits[Offset++] = digitToChar(FractionDigits[I]);
433 
434  auto AbsExp = Exp < 0 ? -Exp : Exp;
435  // Exponent part
436  Digits[Offset++] = 'e';
437  Digits[Offset++] = Exp >= 0 ? '+' : '-';
438  Digits[Offset++] = digitToChar(AbsExp / 10);
439  Digits[Offset++] = digitToChar(AbsExp % 10);
440  } else { // normal mode
441  if (Exp < 0) {
442  Digits[Offset++] = '0';
443  Digits[Offset++] = '.';
444  while (++Exp)
445  Digits[Offset++] = '0';
446 
447  // Append the integral part
448  Digits[Offset++] = digitToChar(IntegralPart);
449 
450  // Append all fraction
451  for (unsigned I = 0; I < FractionLength; ++I)
452  Digits[Offset++] = digitToChar(FractionDigits[I]);
453  } else {
454  // Append the integral part
455  Digits[Offset++] = digitToChar(IntegralPart);
456  unsigned I = 0;
457  // Append the integral part first
458  for (; I < FractionLength && Exp--; ++I)
459  Digits[Offset++] = digitToChar(FractionDigits[I]);
460 
461  // Put the dot
462  Digits[Offset++] = '.';
463 
464  // Append the rest of fraction part, or the real fraction part
465  for (; I < FractionLength; ++I)
466  Digits[Offset++] = digitToChar(FractionDigits[I]);
467  }
468  // The normal mode requires no tailing zero digit, then we need to first
469  // find the first non-zero digit
470  while (Digits[Offset - 1] == '0')
471  Offset--;
472 
473  // If dot is the last digit, it should be stripped off as well
474  if (Digits[Offset - 1] == '.')
475  Offset--;
476  }
477  return Offset;
478 }
479 
480 // Returns number of symbols written to the buffer
481 template <typename T>
482 inline EnableIfFP<T, unsigned>
483 ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision = -1) {
484  unsigned Offset = checkForInfNan(Buf, Val);
485  if (Offset)
486  return Offset;
487 
488  T Neg = -Val;
489  auto AbsVal = Val < 0 ? Neg : Val;
490 
491  if (Val < 0) {
492  Buf[Offset++] = '-';
493  } else if (Flags & ShowPos) {
494  Buf[Offset++] = '+';
495  }
496 
497  bool IsSci = false;
498  if (Flags & detail::Scientific)
499  IsSci = true;
500 
501  // TODO: manipulators for floating-point output - hexfloat, fixed
502  Offset += floatingPointToDecStr(AbsVal, Buf + Offset, Precision, IsSci);
503 
504  return Offset;
505 }
506 
507 template <typename T>
508 inline typename std::enable_if_t<std::is_integral_v<T>>
509 writeIntegral(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
510  unsigned WIOffset, unsigned Flags, int Width, const T &Val) {
511  char Digits[MAX_INTEGRAL_DIGITS] = {0};
512  unsigned Len = ScalarToStr(Val, Digits, Flags, Width);
513  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
514  (Width > 0 && static_cast<unsigned>(Width) > Len)
515  ? static_cast<unsigned>(Width) - Len
516  : 0);
517 }
518 
519 template <typename T>
520 inline EnableIfFP<T>
521 writeFloatingPoint(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
522  unsigned WIOffset, unsigned Flags, int Width, int Precision,
523  const T &Val) {
524  char Digits[MAX_FLOATING_POINT_DIGITS] = {0};
525  unsigned Len = ScalarToStr(Val, Digits, Flags, Width, Precision);
526  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Digits, Len,
527  (Width > 0 && static_cast<unsigned>(Width) > Len)
528  ? static_cast<unsigned>(Width) - Len
529  : 0);
530 }
531 
532 // Helper method to update offset in the global buffer atomically according to
533 // the provided size of the data in the flush buffer. Return true if offset is
534 // updated and false in case of overflow.
535 inline bool updateOffset(GlobalOffsetAccessorT &GlobalOffset,
536  GlobalBufAccessorT &GlobalBuf, unsigned Size,
537  unsigned &Cur) {
538  unsigned New;
539  Cur = GlobalOffset[0].load();
540  do {
541  if (GlobalBuf.get_range().size() - Cur < Size)
542  // Overflow
543  return false;
544  New = Cur + Size;
545  } while (!GlobalOffset[0].compare_exchange_strong(Cur, New));
546  return true;
547 }
548 
549 inline void flushBuffer(GlobalOffsetAccessorT &GlobalOffset,
550  GlobalBufAccessorT &GlobalBuf,
551  GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset) {
552  unsigned Offset = GetFlushBufOffset(GlobalFlushBuf, WIOffset);
553  if (Offset == 0)
554  return;
555 
556  unsigned Cur = 0;
557  if (!updateOffset(GlobalOffset, GlobalBuf, Offset, Cur))
558  return;
559 
560  unsigned StmtOffset = WIOffset + FLUSH_BUF_OFFSET_SIZE;
561  for (unsigned I = StmtOffset; I < StmtOffset + Offset; I++) {
562  GlobalBuf[Cur++] = GlobalFlushBuf[I];
563  }
564  // Reset the offset in the flush buffer
565  SetFlushBufOffset(GlobalFlushBuf, WIOffset, 0);
566 }
567 
568 template <typename T, int VecLength>
569 typename std::enable_if_t<(VecLength == 1), unsigned>
570 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
571  int Precision) {
572  return ScalarToStr(static_cast<T>(Vec.x()), VecStr, Flags, Width, Precision);
573 }
574 
575 template <typename T, int VecLength>
576 typename std::enable_if_t<(VecLength == 2 || VecLength == 4 || VecLength == 8 ||
577  VecLength == 16),
578  unsigned>
579 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
580  int Precision) {
581  unsigned Len =
582  VecToStr<T, VecLength / 2>(Vec.lo(), VecStr, Flags, Width, Precision);
583  Len += append(VecStr + Len, VEC_ELEMENT_DELIMITER);
584  Len += VecToStr<T, VecLength / 2>(Vec.hi(), VecStr + Len, Flags, Width,
585  Precision);
586  return Len;
587 }
588 
589 template <typename T, int VecLength>
590 typename std::enable_if_t<(VecLength == 3), unsigned>
591 VecToStr(const vec<T, VecLength> &Vec, char *VecStr, unsigned Flags, int Width,
592  int Precision) {
593  unsigned Len = VecToStr<T, 2>(Vec.lo(), VecStr, Flags, Width, Precision);
594  Len += append(VecStr + Len, VEC_ELEMENT_DELIMITER);
595  Len += VecToStr<T, 1>(Vec.z(), VecStr + Len, Flags, Width, Precision);
596  return Len;
597 }
598 
599 template <typename T, int VecLength>
600 inline void writeVec(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize,
601  unsigned WIOffset, unsigned Flags, int Width,
602  int Precision, const vec<T, VecLength> &Vec) {
603  // Reserve space for vector elements and delimiters
604  constexpr size_t MAX_VEC_SIZE =
605  MAX_FLOATING_POINT_DIGITS * VecLength + (VecLength - 1) * 2;
606  char VecStr[MAX_VEC_SIZE] = {0};
607  unsigned Len = VecToStr<T, VecLength>(Vec, VecStr, Flags, Width, Precision);
608  write(GlobalFlushBuf, FlushBufferSize, WIOffset, VecStr, Len,
609  (Width > 0 && Width > Len) ? Width - Len : 0);
610 }
611 
612 template <int ArrayLength>
613 inline unsigned ArrayToStr(char *Buf, const array<ArrayLength> &Arr) {
614  unsigned Len = 0;
615  Buf[Len++] = VEC_OPEN_BRACE;
616 
617  for (int I = 0; I < ArrayLength; ++I) {
618  Len += ScalarToStr(Arr[I], Buf + Len, 0 /* No flags */, -1, -1);
619  if (I != ArrayLength - 1)
620  Len += append(Buf + Len, VEC_ELEMENT_DELIMITER);
621  }
622 
623  Buf[Len++] = VEC_CLOSE_BRACE;
624 
625  return Len;
626 }
627 
628 template <int ArrayLength>
629 inline void writeArray(GlobalBufAccessorT &GlobalFlushBuf,
630  size_t FlushBufferSize, unsigned WIOffset,
631  const array<ArrayLength> &Arr) {
632  char Buf[MAX_ARRAY_SIZE];
633  unsigned Len = ArrayToStr(Buf, Arr);
634  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
635 }
636 
637 template <int Dimensions>
638 inline void writeItem(GlobalBufAccessorT &GlobalFlushBuf,
639  size_t FlushBufferSize, unsigned WIOffset,
640  const item<Dimensions> &Item) {
641  // Reserve space for 3 arrays and additional place (40 symbols) for printing
642  // the text
643  char Buf[3 * MAX_ARRAY_SIZE + 40];
644  unsigned Len = 0;
645  Len += append(Buf, "item(");
646  Len += append(Buf + Len, "range: ");
647  Len += ArrayToStr(Buf + Len, Item.get_range());
648  Len += append(Buf + Len, ", id: ");
649  Len += ArrayToStr(Buf + Len, Item.get_id());
650  Len += append(Buf + Len, ", offset: ");
651  Len += ArrayToStr(Buf + Len, Item.get_offset());
652  Buf[Len++] = ')';
653  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
654 }
655 
656 template <int Dimensions>
657 inline void writeNDRange(GlobalBufAccessorT &GlobalFlushBuf,
658  size_t FlushBufferSize, unsigned WIOffset,
659  const nd_range<Dimensions> &ND_Range) {
660  // Reserve space for 3 arrays and additional place (50 symbols) for printing
661  // the text
662  char Buf[3 * MAX_ARRAY_SIZE + 50];
663  unsigned Len = 0;
664  Len += append(Buf, "nd_range(");
665  Len += append(Buf + Len, "global_range: ");
666  Len += ArrayToStr(Buf + Len, ND_Range.get_global_range());
667  Len += append(Buf + Len, ", local_range: ");
668  Len += ArrayToStr(Buf + Len, ND_Range.get_local_range());
669  Len += append(Buf + Len, ", offset: ");
670  Len += ArrayToStr(Buf + Len, ND_Range.get_offset());
671  Buf[Len++] = ')';
672  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
673 }
674 
675 template <int Dimensions>
676 inline void writeNDItem(GlobalBufAccessorT &GlobalFlushBuf,
677  size_t FlushBufferSize, unsigned WIOffset,
678  const nd_item<Dimensions> &ND_Item) {
679  // Reserve space for 2 arrays and additional place (40 symbols) for printing
680  // the text
681  char Buf[2 * MAX_ARRAY_SIZE + 40];
682  unsigned Len = 0;
683  Len += append(Buf, "nd_item(");
684  Len += append(Buf + Len, "global_id: ");
685  Len += ArrayToStr(Buf + Len, ND_Item.get_global_id());
686  Len += append(Buf + Len, ", local_id: ");
687  Len += ArrayToStr(Buf + Len, ND_Item.get_local_id());
688  Buf[Len++] = ')';
689  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
690 }
691 
692 template <int Dimensions>
693 inline void writeGroup(GlobalBufAccessorT &GlobalFlushBuf,
694  size_t FlushBufferSize, unsigned WIOffset,
695  const group<Dimensions> &Group) {
696  // Reserve space for 4 arrays and additional place (60 symbols) for printing
697  // the text
698  char Buf[4 * MAX_ARRAY_SIZE + 60];
699  unsigned Len = 0;
700  Len += append(Buf, "group(");
701  Len += append(Buf + Len, "id: ");
702  Len += ArrayToStr(Buf + Len, Group.get_id());
703  Len += append(Buf + Len, ", global_range: ");
704  Len += ArrayToStr(Buf + Len, Group.get_global_range());
705  Len += append(Buf + Len, ", local_range: ");
706  Len += ArrayToStr(Buf + Len, Group.get_local_range());
707  Len += append(Buf + Len, ", group_range: ");
708  Len += ArrayToStr(Buf + Len, Group.get_group_range());
709  Buf[Len++] = ')';
710  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
711 }
712 
713 // Space for 2 arrays and additional place (20 symbols) for printing
714 // the text
715 constexpr size_t MAX_ITEM_SIZE = 2 * MAX_ARRAY_SIZE + 20;
716 
717 template <int Dimensions>
718 inline unsigned ItemToStr(char *Buf, const item<Dimensions, false> &Item) {
719  unsigned Len = 0;
720  Len += append(Buf, "item(");
721  for (int I = 0; I < 2; ++I) {
722  Len += append(Buf + Len, I == 0 ? "range: " : ", id: ");
723  Len += ArrayToStr(Buf + Len, I == 0 ? Item.get_range() : Item.get_id());
724  }
725  Buf[Len++] = ')';
726  return Len;
727 }
728 
729 template <int Dimensions>
730 inline void writeHItem(GlobalBufAccessorT &GlobalFlushBuf,
731  size_t FlushBufferSize, unsigned WIOffset,
732  const h_item<Dimensions> &HItem) {
733  // Reserve space for 3 items and additional place (60 symbols) for printing
734  // the text
735  char Buf[3 * MAX_ITEM_SIZE + 60];
736  unsigned Len = 0;
737  Len += append(Buf, "h_item(");
738  for (int I = 0; I < 3; ++I) {
739  Len += append(Buf + Len, I == 0 ? "\n global "
740  : I == 1 ? "\n logical local "
741  : "\n physical local ");
742  Len += ItemToStr(Buf + Len, I == 0 ? HItem.get_global()
743  : I == 1 ? HItem.get_logical_local()
744  : HItem.get_physical_local());
745  }
746  Len += append(Buf + Len, "\n)");
747  write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len);
748 }
749 
750 template <typename> struct IsSwizzleOp : std::false_type {};
751 
752 template <typename VecT, typename OperationLeftT, typename OperationRightT,
753  template <typename> class OperationCurrentT, int... Indexes>
755  VecT, OperationLeftT, OperationRightT, OperationCurrentT, Indexes...>>
756  : std::true_type {
757  using T = typename VecT::element_type;
758  using Type = typename sycl::vec<T, (sizeof...(Indexes))>;
759 };
760 
761 template <typename T>
763  typename std::enable_if_t<IsSwizzleOp<T>::value,
764  typename IsSwizzleOp<T>::Type>;
765 
766 } // namespace detail
767 
768 enum class stream_manipulator {
769  dec = 0,
770  hex = 1,
771  oct = 2,
772  noshowbase = 3,
773  showbase = 4,
774  noshowpos = 5,
775  showpos = 6,
776  endl = 7,
777  flush = 8,
778  fixed = 9,
779  scientific = 10,
780  hexfloat = 11,
781  defaultfloat = 12
782 };
783 
785 
787 
789 
791 
793 
795 
797 
799 
801 
803 
805 
807 
809 
810 class stream;
811 
813  int Precision_;
814 
815 public:
816  __precision_manipulator__(int Precision) : Precision_(Precision) {}
817 
818  int precision() const { return Precision_; }
819 
820  friend const stream &operator<<(const stream &,
821  const __precision_manipulator__ &);
822 };
823 
825  int Width_;
826 
827 public:
828  __width_manipulator__(int Width) : Width_(Width) {}
829 
830  int width() const { return Width_; }
831 
832  friend const stream &operator<<(const stream &,
833  const __width_manipulator__ &);
834 };
835 
837  return __precision_manipulator__(Precision);
838 }
839 
840 inline __width_manipulator__ setw(int Width) {
841  return __width_manipulator__(Width);
842 }
843 
848 class __SYCL_EXPORT __SYCL_SPECIAL_CLASS __SYCL_TYPE(stream) stream
849  : public detail::OwnerLessBase<stream> {
850 private:
851 #ifndef __SYCL_DEVICE_ONLY__
852  // Constructor for recreating a stream.
853  stream(std::shared_ptr<detail::stream_impl> Impl,
854  detail::GlobalBufAccessorT GlobalBuf,
855  detail::GlobalOffsetAccessorT GlobalOffset,
856  detail::GlobalBufAccessorT GlobalFlushBuf)
857  : impl{Impl}, GlobalBuf{GlobalBuf}, GlobalOffset{GlobalOffset},
858  GlobalFlushBuf{GlobalFlushBuf} {}
859 #endif
860 
861 public:
862 #ifdef __SYCL_DEVICE_ONLY__
863  // Default constructor for objects later initialized with __init member.
864  stream() = default;
865 #endif
866 
867  // Throws exception in case of invalid input parameters
868  stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH);
869 
870  // Property-list constructor variant.
871  // TODO: Merge with other stream constructor and give PropList default value.
872  stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH,
873  const property_list &PropList);
874 
875 #ifdef __SYCL_DEVICE_ONLY__
876  // We need the definitions of these functions in the header for device,
877  // otherwise they are not visible. Also, we cannot use `impl` because it's not
878  // there on the device, so we rely on GlobalBuf/GlobalFlushBuf.
879  size_t size() const noexcept { return GlobalBuf.size(); }
880 
881  size_t get_work_item_buffer_size() const {
882  return GlobalFlushBuf.size() - detail::FLUSH_BUF_OFFSET_SIZE;
883  }
884 
886  "get_size() is deprecated since SYCL 2020. Please use size() instead.")
887  size_t get_size() const { return size(); }
888 
889  __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
890  "2020. Please use get_work_item_buffer_size() instead.")
891  size_t get_max_statement_size() const { return get_work_item_buffer_size(); }
892 #else
893  size_t size() const noexcept;
894 
895  size_t get_work_item_buffer_size() const;
896 
898  "get_size() is deprecated since SYCL 2020. Please use size() instead.")
899  size_t get_size() const;
900 
901  __SYCL2020_DEPRECATED("get_max_statement_size() is deprecated since SYCL "
902  "2020. Please use get_work_item_buffer_size() instead.")
903  size_t get_max_statement_size() const;
904 #endif
905 
906  size_t get_precision() const { return Precision; }
907 
908  size_t get_width() const { return Width; }
909 
910  stream_manipulator get_stream_mode() const { return Manipulator; }
911 
912  bool operator==(const stream &RHS) const;
913 
914  bool operator!=(const stream &LHS) const;
915 
916  template <typename propertyT> bool has_property() const noexcept;
917 
918  template <typename propertyT> propertyT get_property() const;
919 
920 private:
921 #ifdef __SYCL_DEVICE_ONLY__
922  char padding[sizeof(std::shared_ptr<detail::stream_impl>)];
923 #else
924  std::shared_ptr<detail::stream_impl> impl;
925  template <class Obj>
926  friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
927 #endif
928 
929  // NOTE: Some members are required for reconstructing the stream, but are not
930  // part of the implementation class. If more members are added, they should
931  // also be added to the weak_object specialization for streams.
932 
933  // Accessor to the global stream buffer. Global buffer contains all output
934  // from the kernel.
935  mutable detail::GlobalBufAccessorT GlobalBuf;
936 
937  // Atomic accessor to the global offset variable. It represents an offset in
938  // the global stream buffer. Since work items will flush data to global buffer
939  // in parallel we need atomic access to this offset.
940  mutable detail::GlobalOffsetAccessorT GlobalOffset;
941 
942  // Accessor to the flush buffer. Each work item writes its
943  // output to a designated section of the flush buffer.
944  mutable detail::GlobalBufAccessorT GlobalFlushBuf;
945 
946  // Offset of the WI's flush buffer in the pool.
947  mutable unsigned WIOffset = 0;
948 
949  mutable size_t FlushBufferSize;
950 
951  // Fields and methods to work with manipulators
952  mutable stream_manipulator Manipulator = defaultfloat;
953 
954  // Type used for format flags
955  using FmtFlags = unsigned int;
956 
957  mutable int Precision = -1;
958  mutable int Width = -1;
959  mutable FmtFlags Flags = 0x0;
960 
961  void set_flag(FmtFlags FormatFlag) const { Flags |= FormatFlag; }
962 
963  void unset_flag(FmtFlags FormatFlag) const { Flags &= ~FormatFlag; }
964 
965  FmtFlags get_flags() const { return Flags; }
966 
967  // This method is used to set the flag for base and float manipulators. These
968  // flags are mutually exclusive and base/float field needs to be cleared
969  // before the setting new flag.
970  void set_flag(FmtFlags FormatFlag, FmtFlags Mask) const {
971  unset_flag(Mask);
972  Flags |= FormatFlag & Mask;
973  }
974 
975  // Set the flags which correspond to the input stream manipulator.
976  void set_manipulator(const stream_manipulator SM) const {
977  switch (SM) {
979  set_flag(detail::Dec, detail::BaseField);
980  break;
982  set_flag(detail::Hex, detail::BaseField);
983  break;
985  set_flag(detail::Oct, detail::BaseField);
986  break;
988  unset_flag(detail::ShowBase);
989  break;
991  set_flag(detail::ShowBase);
992  break;
994  unset_flag(detail::ShowPos);
995  break;
997  set_flag(detail::ShowPos);
998  break;
1000  set_flag(detail::Fixed, detail::FloatField);
1001  break;
1004  break;
1007  break;
1009  unset_flag(detail::FloatField);
1010  break;
1011  default:
1012  // Unknown manipulator
1013  break;
1014  }
1015  }
1016 
1017 #ifdef __SYCL_DEVICE_ONLY__
1018  void __init(detail::GlobalBufPtrType GlobalBufPtr,
1019  range<detail::GlobalBufDim> GlobalBufAccRange,
1020  range<detail::GlobalBufDim> GlobalBufMemRange,
1021  id<detail::GlobalBufDim> GlobalBufId,
1022  detail::GlobalOffsetPtrType GlobalOffsetPtr,
1023  range<detail::GlobalOffsetDim> GlobalOffsetAccRange,
1024  range<detail::GlobalOffsetDim> GlobalOffsetMemRange,
1025  id<detail::GlobalOffsetDim> GlobalOffsetId,
1026  detail::GlobalBufPtrType GlobalFlushPtr,
1027  range<detail::GlobalBufDim> GlobalFlushAccRange,
1028  range<detail::GlobalBufDim> GlobalFlushMemRange,
1029  id<detail::GlobalBufDim> GlobalFlushId, size_t _FlushBufferSize) {
1030  GlobalBuf.__init(GlobalBufPtr, GlobalBufAccRange, GlobalBufMemRange,
1031  GlobalBufId);
1032  GlobalOffset.__init(GlobalOffsetPtr, GlobalOffsetAccRange,
1033  GlobalOffsetMemRange, GlobalOffsetId);
1034  GlobalFlushBuf.__init(GlobalFlushPtr, GlobalFlushAccRange,
1035  GlobalFlushMemRange, GlobalFlushId);
1036  FlushBufferSize = _FlushBufferSize;
1037  // Calculate offset in the flush buffer for each work item in the global
1038  // work space. We need to avoid calling intrinsics to get global id because
1039  // when stream is used in a single_task kernel this could cause some
1040  // overhead on FPGA target. That is why use global atomic variable to
1041  // calculate offsets.
1042  WIOffset = GlobalOffset[1].fetch_add(FlushBufferSize);
1043 
1044  // Initialize flush subbuffer's offset for each work item on device.
1045  // Initialization on host device is performed via submition of additional
1046  // host task.
1047  SetFlushBufOffset(GlobalFlushBuf, WIOffset, 0);
1048  }
1049 
1050  void __finalize() {
1051  // Flush data to global buffer if flush buffer is not empty. This could be
1052  // necessary if user hasn't yet flushed data on its own and kernel execution
1053  // is finished
1054  // NOTE: A call to this function will be generated by compiler
1055  // NOTE: In the current implementation user should explicitly flush data on
1056  // the host device. Data is not flushed automatically after kernel execution
1057  // because of the missing feature in scheduler.
1058  flushBuffer(GlobalOffset, GlobalBuf, GlobalFlushBuf, WIOffset);
1059  }
1060 #endif
1061 
1062  friend class handler;
1063 
1064  template <typename SYCLObjT> friend class ext::oneapi::weak_object;
1065 
1066  friend const stream &operator<<(const stream &, const char);
1067  friend const stream &operator<<(const stream &, const char *);
1068  template <typename ValueType>
1069  friend
1070  typename std::enable_if_t<std::is_integral_v<ValueType>, const stream &>
1071  operator<<(const stream &, const ValueType &);
1072  friend const stream &operator<<(const stream &, const float &);
1073  friend const stream &operator<<(const stream &, const double &);
1074  friend const stream &operator<<(const stream &, const half &);
1075  friend const stream &operator<<(const stream &,
1076  const ext::oneapi::bfloat16 &);
1077 
1078  friend const stream &operator<<(const stream &, const stream_manipulator);
1079 
1080  friend const stream &operator<<(const stream &Out,
1081  const __precision_manipulator__ &RHS);
1082 
1083  friend const stream &operator<<(const stream &Out,
1084  const __width_manipulator__ &RHS);
1085  template <typename T, int Dimensions>
1086  friend const stream &operator<<(const stream &Out,
1087  const vec<T, Dimensions> &RHS);
1088  template <typename T>
1089  friend const stream &operator<<(const stream &Out, const T *RHS);
1090  template <int Dimensions>
1091  friend const stream &operator<<(const stream &Out, const id<Dimensions> &RHS);
1092 
1093  template <int Dimensions>
1094  friend const stream &operator<<(const stream &Out,
1095  const range<Dimensions> &RHS);
1096 
1097  template <int Dimensions>
1098  friend const stream &operator<<(const stream &Out,
1099  const item<Dimensions> &RHS);
1100 
1101  template <int Dimensions>
1102  friend const stream &operator<<(const stream &Out,
1103  const nd_range<Dimensions> &RHS);
1104 
1105  template <int Dimensions>
1106  friend const stream &operator<<(const stream &Out,
1107  const nd_item<Dimensions> &RHS);
1108 
1109  template <int Dimensions>
1110  friend const stream &operator<<(const stream &Out,
1111  const group<Dimensions> &RHS);
1112 
1113  template <int Dimensions>
1114  friend const stream &operator<<(const stream &Out,
1115  const h_item<Dimensions> &RHS);
1116 };
1117 
1118 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
1119 // Byte (has to be converted to a numeric value)
1120 template <typename T>
1121 inline std::enable_if_t<std::is_same_v<T, std::byte>, const stream &>
1122 operator<<(const stream &, const T &) {
1123  static_assert(std::is_integral<T>(),
1124  "Convert the byte to a numeric value using std::to_integer");
1125 }
1126 #endif
1127 
1128 // Character
1129 inline const stream &operator<<(const stream &Out, const char C) {
1130  detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, &C, 1);
1131  return Out;
1132 }
1133 
1134 // String
1135 inline const stream &operator<<(const stream &Out, const char *Str) {
1136  unsigned Len = 0;
1137  for (; Str[Len] != '\0'; Len++)
1138  ;
1139 
1140  detail::write(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Str,
1141  Len);
1142  return Out;
1143 }
1144 
1145 // Boolean
1146 inline const stream &operator<<(const stream &Out, const bool &RHS) {
1147  Out << (RHS ? "true" : "false");
1148  return Out;
1149 }
1150 
1151 // Integral
1152 template <typename ValueType>
1153 inline typename std::enable_if_t<std::is_integral_v<ValueType>, const stream &>
1154 operator<<(const stream &Out, const ValueType &RHS) {
1155  detail::writeIntegral(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset,
1156  Out.get_flags(), Out.get_width(), RHS);
1157  return Out;
1158 }
1159 
1160 // Floating points
1161 
1162 inline const stream &operator<<(const stream &Out, const float &RHS) {
1163  detail::writeFloatingPoint<float>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1164  Out.WIOffset, Out.get_flags(),
1165  Out.get_width(), Out.get_precision(), RHS);
1166  return Out;
1167 }
1168 
1169 inline const stream &operator<<(const stream &Out, const double &RHS) {
1170  detail::writeFloatingPoint<double>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1171  Out.WIOffset, Out.get_flags(),
1172  Out.get_width(), Out.get_precision(), RHS);
1173  return Out;
1174 }
1175 
1176 inline const stream &operator<<(const stream &Out, const half &RHS) {
1177  detail::writeFloatingPoint<half>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1178  Out.WIOffset, Out.get_flags(),
1179  Out.get_width(), Out.get_precision(), RHS);
1180  return Out;
1181 }
1182 
1183 inline const stream &operator<<(const stream &Out,
1184  const ext::oneapi::bfloat16 &RHS) {
1185  detail::writeFloatingPoint<ext::oneapi::bfloat16>(
1186  Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset, Out.get_flags(),
1187  Out.get_width(), Out.get_precision(), RHS);
1188  return Out;
1189 }
1190 
1191 // Pointer
1192 
1193 template <typename ElementType, access::address_space Space,
1194  access::decorated IsDecorated>
1195 inline const stream &
1196 operator<<(const stream &Out,
1198  Out << RHS.get();
1199  return Out;
1200 }
1201 
1202 template <typename T>
1203 const stream &operator<<(const stream &Out, const T *RHS) {
1204  detail::FmtFlags Flags = Out.get_flags();
1205  Flags &= ~detail::BaseField;
1206  Flags |= detail::Hex | detail::ShowBase;
1207  detail::writeIntegral(Out.GlobalFlushBuf, Out.FlushBufferSize, Out.WIOffset,
1208  Flags, Out.get_width(), reinterpret_cast<size_t>(RHS));
1209  return Out;
1210 }
1211 
1212 // Manipulators
1213 
1214 inline const stream &operator<<(const stream &Out,
1215  const __precision_manipulator__ &RHS) {
1216  Out.Precision = RHS.precision();
1217  return Out;
1218 }
1219 
1220 inline const stream &operator<<(const stream &Out,
1221  const __width_manipulator__ &RHS) {
1222  Out.Width = RHS.width();
1223  return Out;
1224 }
1225 
1226 inline const stream &operator<<(const stream &Out,
1227  const stream_manipulator RHS) {
1228  switch (RHS) {
1230  Out << '\n';
1231  flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1232  Out.WIOffset);
1233  break;
1235  flushBuffer(Out.GlobalOffset, Out.GlobalBuf, Out.GlobalFlushBuf,
1236  Out.WIOffset);
1237  break;
1238  default:
1239  Out.set_manipulator(RHS);
1240  break;
1241  }
1242  return Out;
1243 }
1244 
1245 // Vec
1246 
1247 template <typename T, int VectorLength>
1248 const stream &operator<<(const stream &Out, const vec<T, VectorLength> &RHS) {
1249  detail::writeVec<T, VectorLength>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1250  Out.WIOffset, Out.get_flags(),
1251  Out.get_width(), Out.get_precision(), RHS);
1252  return Out;
1253 }
1254 
1255 // SYCL types
1256 
1257 template <int Dimensions>
1258 inline const stream &operator<<(const stream &Out, const id<Dimensions> &RHS) {
1259  detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1260  Out.WIOffset, RHS);
1261  return Out;
1262 }
1263 
1264 template <int Dimensions>
1265 inline const stream &operator<<(const stream &Out,
1266  const range<Dimensions> &RHS) {
1267  detail::writeArray<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1268  Out.WIOffset, RHS);
1269  return Out;
1270 }
1271 
1272 template <int Dimensions>
1273 inline const stream &operator<<(const stream &Out,
1274  const item<Dimensions> &RHS) {
1275  detail::writeItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1276  Out.WIOffset, RHS);
1277  return Out;
1278 }
1279 
1280 template <int Dimensions>
1281 inline const stream &operator<<(const stream &Out,
1282  const nd_range<Dimensions> &RHS) {
1283  detail::writeNDRange<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1284  Out.WIOffset, RHS);
1285  return Out;
1286 }
1287 
1288 template <int Dimensions>
1289 inline const stream &operator<<(const stream &Out,
1290  const nd_item<Dimensions> &RHS) {
1291  detail::writeNDItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1292  Out.WIOffset, RHS);
1293  return Out;
1294 }
1295 
1296 template <int Dimensions>
1297 inline const stream &operator<<(const stream &Out,
1298  const group<Dimensions> &RHS) {
1299  detail::writeGroup<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1300  Out.WIOffset, RHS);
1301  return Out;
1302 }
1303 
1304 template <int Dimensions>
1305 inline const stream &operator<<(const stream &Out,
1306  const h_item<Dimensions> &RHS) {
1307  detail::writeHItem<Dimensions>(Out.GlobalFlushBuf, Out.FlushBufferSize,
1308  Out.WIOffset, RHS);
1309  return Out;
1310 }
1311 
1312 template <typename T, typename RT = detail::EnableIfSwizzleVec<T>>
1313 inline const stream &operator<<(const stream &Out, const T &RHS) {
1314  RT V = RHS;
1315  Out << V;
1316  return Out;
1317 }
1318 
1319 } // namespace _V1
1320 } // namespace sycl
1321 namespace std {
1322 template <> struct hash<sycl::stream> {
1323  size_t operator()(const sycl::stream &S) const {
1324 #ifdef __SYCL_DEVICE_ONLY__
1325  (void)S;
1326  return 0;
1327 #else
1328  return hash<std::shared_ptr<sycl::detail::stream_impl>>()(
1330 #endif
1331  }
1332 };
1333 } // namespace std
The file contains implementations of accessor class.
__precision_manipulator__(int Precision)
Definition: stream.hpp:816
Identifies an instance of a group::parallel_for_work_item function object executing at each point in ...
Definition: h_item.hpp:31
item< Dimensions, false > get_logical_local() const
Definition: h_item.hpp:46
item< Dimensions, false > get_global() const
Definition: h_item.hpp:42
item< Dimensions, false > get_physical_local() const
Definition: h_item.hpp:48
A unique identifier of an item in an index space.
Definition: id.hpp:36
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:37
id< Dimensions > get_id() const
Definition: item.hpp:55
std::enable_if_t< has_offset, id< Dimensions > > get_offset() const
Definition: item.hpp:81
range< Dimensions > get_range() const
Definition: item.hpp:69
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
id< Dimensions > get_local_id() const
Definition: nd_item.hpp:85
id< Dimensions > get_global_id() const
Definition: nd_item.hpp:52
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
range< Dimensions > get_global_range() const
Definition: nd_range.hpp:43
range< Dimensions > get_local_range() const
Definition: nd_range.hpp:45
id< Dimensions > get_offset() const
Definition: nd_range.hpp:50
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: types.hpp:284
#define __SYCL_SPECIAL_CLASS
Definition: defines.hpp:29
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor< DataT
Image accessors.
static constexpr FmtFlags Fixed
Definition: stream.hpp:56
typename detail::DecoratedType< unsigned, GlobalBufAS >::type * GlobalOffsetPtrType
Definition: stream.hpp:105
static constexpr FmtFlags ShowPos
Definition: stream.hpp:55
std::enable_if_t<(VecLength==1), unsigned > VecToStr(const vec< T, VecLength > &Vec, char *VecStr, unsigned Flags, int Width, int Precision)
Definition: stream.hpp:570
constexpr size_t MAX_ARRAY_SIZE
Definition: stream.hpp:79
std::enable_if_t< std::is_integral_v< T > > writeIntegral(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, const T &Val)
Definition: stream.hpp:509
static constexpr FmtFlags Oct
Definition: stream.hpp:53
constexpr static int GlobalOffsetDim
Definition: stream.hpp:106
constexpr size_t MAX_FLOATING_POINT_DIGITS
Definition: stream.hpp:69
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathSignBit(T Val)
Definition: stream.hpp:296
static constexpr FmtFlags FloatField
Definition: stream.hpp:67
std::enable_if_t< std::is_integral_v< T >, unsigned > ScalarToStr(const T &Val, char *Buf, unsigned Flags, int, int Precision=-1)
Definition: stream.hpp:193
void writeHItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const h_item< Dimensions > &HItem)
Definition: stream.hpp:730
typename std::enable_if_t< detail::check_type_in_v< F, float, double, half, ext::oneapi::bfloat16 >, T > EnableIfFP
Definition: stream.hpp:88
void writeItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const item< Dimensions > &Item)
Definition: stream.hpp:638
std::make_unsigned_t< T > getAbsVal(const T Val, const int Base)
Definition: stream.hpp:165
void writeVec(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, int Precision, const vec< T, VecLength > &Vec)
Definition: stream.hpp:600
typename detail::DecoratedType< char, GlobalBufAS >::type * GlobalBufPtrType
Definition: stream.hpp:96
void writeArray(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const array< ArrayLength > &Arr)
Definition: stream.hpp:629
void flushBuffer(GlobalOffsetAccessorT &GlobalOffset, GlobalBufAccessorT &GlobalBuf, GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset)
Definition: stream.hpp:549
EnableIfFP< T > writeFloatingPoint(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, unsigned Flags, int Width, int Precision, const T &Val)
Definition: stream.hpp:521
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, unsigned > checkForInfNan(char *Buf, T Val)
Definition: stream.hpp:309
void writeNDItem(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const nd_item< Dimensions > &ND_Item)
Definition: stream.hpp:676
accessor< unsigned, 1, sycl::access::mode::atomic, sycl::access::target::device > GlobalOffsetAccessorT
Definition: stream.hpp:100
constexpr char VEC_CLOSE_BRACE
Definition: stream.hpp:73
accessor< char, 1, sycl::access::mode::read_write, sycl::access::target::device > GlobalBufAccessorT
Definition: stream.hpp:91
constexpr const char * VEC_ELEMENT_DELIMITER
Definition: stream.hpp:71
unsigned GetFlushBufOffset(const GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset)
Definition: stream.hpp:112
std::ostream & operator<<(std::ostream &os, std::optional< T > const &opt)
static constexpr FmtFlags ShowBase
Definition: stream.hpp:54
constexpr size_t MAX_DIMENSIONS
Definition: stream.hpp:75
void reverseBuf(char *Buf, unsigned Len)
Definition: stream.hpp:152
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
unsigned int FmtFlags
Definition: stream.hpp:47
char digitToChar(const int Digit)
Definition: stream.hpp:169
EnableIfFP< T, unsigned > floatingPointToDecStr(T AbsVal, char *Digits, int Precision, bool IsSci)
Definition: stream.hpp:369
static constexpr FmtFlags Hex
Definition: stream.hpp:52
constexpr char VEC_OPEN_BRACE
Definition: stream.hpp:72
void writeGroup(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const group< Dimensions > &Group)
Definition: stream.hpp:693
unsigned ItemToStr(char *Buf, const item< Dimensions, false > &Item)
Definition: stream.hpp:718
unsigned long long D2I64(double Val)
Definition: stream.hpp:258
constexpr static access::address_space GlobalOffsetAS
Definition: stream.hpp:102
void SetFlushBufOffset(GlobalBufAccessorT &GlobalFlushBuf, unsigned WIOffset, unsigned Offset)
Definition: stream.hpp:124
void writeNDRange(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const nd_range< Dimensions > &ND_Range)
Definition: stream.hpp:657
static constexpr FmtFlags BaseField
Definition: stream.hpp:62
constexpr size_t MAX_INTEGRAL_DIGITS
Definition: stream.hpp:70
struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") IsDeprecatedDeviceCopyable< T
constexpr static int GlobalBufDim
Definition: stream.hpp:97
constexpr size_t MAX_ITEM_SIZE
Definition: stream.hpp:715
constexpr unsigned FLUSH_BUF_OFFSET_SIZE
Definition: stream.hpp:84
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathInf(T Val)
Definition: stream.hpp:270
static constexpr FmtFlags Scientific
Definition: stream.hpp:57
unsigned append(char *Dst, const char *Src)
Definition: stream.hpp:239
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:130
static constexpr FmtFlags Dec
Definition: stream.hpp:51
typename std::enable_if_t< IsSwizzleOp< T >::value, typename IsSwizzleOp< T >::Type > EnableIfSwizzleVec
Definition: stream.hpp:764
std::enable_if_t< std::is_integral_v< T >, unsigned > integralToBase(T Val, int Base, char *Digits)
Definition: stream.hpp:179
unsigned F2I32(float Val)
Definition: stream.hpp:249
unsigned ArrayToStr(char *Buf, const array< ArrayLength > &Arr)
Definition: stream.hpp:613
bool updateOffset(GlobalOffsetAccessorT &GlobalOffset, GlobalBufAccessorT &GlobalBuf, unsigned Size, unsigned &Cur)
Definition: stream.hpp:535
constexpr static access::address_space GlobalBufAS
Definition: stream.hpp:93
detail::enable_if_t< std::is_same< T, float >::value||std::is_same< T, double >::value, bool > isFastMathNan(T Val)
Definition: stream.hpp:283
bool operator==(const cache_config &lhs, const cache_config &rhs)
bool operator!=(const cache_config &lhs, const cache_config &rhs)
static constexpr bool has_property()
std::enable_if_t< std::is_same_v< T, bfloat16 >, bool > isnan(T x)
static constexpr auto get_property()
constexpr stream_manipulator endl
Definition: stream.hpp:798
__precision_manipulator__ setprecision(int Precision)
Definition: stream.hpp:836
constexpr stream_manipulator fixed
Definition: stream.hpp:802
constexpr stream_manipulator noshowbase
Definition: stream.hpp:790
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
constexpr stream_manipulator hex
Definition: stream.hpp:786
constexpr stream_manipulator flush
Definition: stream.hpp:800
__width_manipulator__ setw(int Width)
Definition: stream.hpp:840
std::uint8_t instead
Definition: aliases.hpp:93
constexpr stream_manipulator scientific
Definition: stream.hpp:804
constexpr stream_manipulator noshowpos
Definition: stream.hpp:794
stream_manipulator
Definition: stream.hpp:768
constexpr stream_manipulator showpos
Definition: stream.hpp:796
constexpr stream_manipulator dec
Definition: stream.hpp:784
constexpr stream_manipulator showbase
Definition: stream.hpp:792
constexpr stream_manipulator oct
Definition: stream.hpp:788
sycl::detail::half_impl::half half
Definition: aliases.hpp:101
constexpr stream_manipulator defaultfloat
Definition: stream.hpp:808
constexpr stream_manipulator hexfloat
Definition: stream.hpp:806
Definition: access.hpp:18
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
size_t operator()(const sycl::stream &S) const
Definition: stream.hpp:1323