DPC++ Runtime
Runtime libraries for oneAPI DPC++
spirv_ops.hpp
Go to the documentation of this file.
1 //==----------- spirv_ops.hpp --- SPIRV operations -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_types.hpp> // for Scope, __ocl_event_t
12 #include <sycl/detail/defines_elementary.hpp> // for __DPCPP_SYCL_EXTERNAL
13 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
14 
15 #include <stddef.h> // for size_t
16 #include <stdint.h> // for uint32_t
17 #include <type_traits>
18 
19 // Convergent attribute
20 #ifdef __SYCL_DEVICE_ONLY__
21 #define __SYCL_CONVERGENT__ __attribute__((convergent))
22 #else
23 #define __SYCL_CONVERGENT__
24 #endif
25 
26 #ifdef __SYCL_DEVICE_ONLY__
27 
28 extern __DPCPP_SYCL_EXTERNAL float __spirv_RoundFToTF32INTEL(float a);
29 
30 #ifndef __SPIRV_USE_COOPERATIVE_MATRIX
31 template <typename T, typename Tp, std::size_t R, std::size_t C,
34  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
37  __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
38  __spv::MatrixLayout Layout = L,
39  __spv::Scope::Flag Sc = S, int MemOperand = 0);
40 
41 template <typename T, typename Tp, std::size_t R, std::size_t C,
44  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
45 extern __DPCPP_SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
47  std::size_t Stride, __spv::MatrixLayout Layout = L,
48  __spv::Scope::Flag Sc = S, int MemOperand = 0);
49 
50 template <typename T, typename Tp, std::size_t R, std::size_t C,
53  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
56  __spirv_CooperativeMatrixConstructCheckedINTEL(int32_t CoordX,
57  int32_t CoordY,
58  uint32_t Height,
59  uint32_t Width,
60  const T Value);
61 
62 template <typename T, typename Tp, std::size_t R, std::size_t C,
65  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
68  __spirv_CooperativeMatrixLoadCheckedINTEL(
69  T *Ptr, int32_t CoordX, int32_t CoordY, __spv::MatrixLayout Layout = L,
70  uint32_t Height = 0, uint32_t Width = 0, std::size_t Stride = 0,
71  int MemOperand = 0);
72 
73 template <typename T, typename Tp, std::size_t R, std::size_t C,
76  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
77 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL(
78  T *Ptr, int32_t CoordX, int32_t CoordY,
80  __spv::MatrixLayout Layout = L, uint32_t Height = 0, uint32_t Width = 0,
81  std::size_t Stride = 0, int MemOperand = 0);
82 
83 template <typename TA, typename TB, typename TC, std::size_t M, std::size_t K,
84  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
89  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
92  __spirv_JointMatrixMadINTEL(
96  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
97 
98 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
99  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
100  __spv::MatrixUse UC,
104  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
107  __spirv_JointMatrixUUMadINTEL(
111  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
112 
113 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
114  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
115  __spv::MatrixUse UC,
119  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
122  __spirv_JointMatrixUSMadINTEL(
126  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
127 
128 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
129  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
130  __spv::MatrixUse UC,
134  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
137  __spirv_JointMatrixSUMadINTEL(
141  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
142 
143 template <typename T, typename Tp, std::size_t R, std::size_t C,
146  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
149  __spirv_CompositeConstruct(const T v);
150 
151 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
153  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
154 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<uint32_t, 2>
155 __spirv_JointMatrixGetElementCoordINTEL(
157 
158 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
160  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
161 extern __DPCPP_SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
163 
164 template <typename Ts, typename T, std::size_t R, std::size_t C,
167  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
168 extern __DPCPP_SYCL_EXTERNAL Ts __spirv_VectorExtractDynamic(
170 
171 template <typename Ts, typename T, std::size_t R, std::size_t C,
174  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
176 __spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *,
177  Ts val, size_t i);
178 #else // __SPIRV_USE_COOPERATIVE_MATRIX
179 template <typename T, typename Tp, std::size_t R, std::size_t C,
182  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
184  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *
185  __spirv_CooperativeMatrixLoadKHR(T *Ptr, __spv::MatrixLayout Layout = L,
186  std::size_t Stride = 0,
187  int MemOperand = 0);
188 
189 template <typename T, typename Tp, std::size_t R, std::size_t C,
192  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
193 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreKHR(
194  T *Ptr, __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *Object,
195  __spv::MatrixLayout Layout = L, std::size_t Stride = 0, int MemOperand = 0);
196 
197 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
199  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
200 extern __DPCPP_SYCL_EXTERNAL size_t __spirv_CooperativeMatrixLengthKHR(
201  __spv::__spirv_CooperativeMatrixKHR<T, S, R, C, U> *);
202 
203 template <typename T, typename Tp, std::size_t R, std::size_t C,
206  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
208  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *
209  __spirv_CooperativeMatrixConstructCheckedINTEL(const T Value, size_t Height,
210  size_t Stride, size_t Width,
211  size_t CoordX,
212  size_t CoordY);
213 
214 template <typename T, typename Tp, std::size_t R, std::size_t C,
217  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
219  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *
220  __spirv_CooperativeMatrixLoadCheckedINTEL(T *Ptr, std::size_t Stride,
221  size_t Height, size_t Width,
222  size_t CoordX, size_t CoordY,
223  __spv::MatrixLayout Layout = L,
224  int MemOperand = 0);
225 
226 template <typename T, typename Tp, std::size_t R, std::size_t C,
229  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
230 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL(
231  T *Ptr, __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *Object,
232  std::size_t Stride, size_t Height, size_t Width, size_t CoordX,
233  size_t CoordY, __spv::MatrixLayout Layout = L, int MemOperand = 0);
234 
235 template <typename TA, typename TB, typename TC, std::size_t M, std::size_t K,
236  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
237  __spv::MatrixUse UC,
241  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
243  __spv::__spirv_CooperativeMatrixKHR<TC, S, M, N, UC> *
244  __spirv_CooperativeMatrixMulAddKHR(
245  __spv::__spirv_CooperativeMatrixKHR<TA, S, M, K, UA> *A,
246  __spv::__spirv_CooperativeMatrixKHR<TB, S, K, N, UB> *B,
247  __spv::__spirv_CooperativeMatrixKHR<TC, S, M, N, UC> *C,
248  size_t Operands = 0);
249 
250 template <typename T, typename Tp, std::size_t R, std::size_t C,
253  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
255  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *
256  __spirv_CompositeConstruct(const T v);
257 
258 // TODO: replace with __spirv_CooperativeMatrixGetElementCoordINTEL when ready
259 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
261  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
262 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<uint32_t, 2>
263 __spirv_JointMatrixGetElementCoordINTEL(
264  __spv::__spirv_CooperativeMatrixKHR<T, S, R, C, U> *, size_t i);
265 
266 // AccessChain followed by load/store serves to extract/insert and element
267 // from/to the matrix
268 template <typename Ts, typename T, std::size_t R, std::size_t C,
270  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
271 extern __DPCPP_SYCL_EXTERNAL Ts *
272 __spirv_AccessChain(__spv::__spirv_CooperativeMatrixKHR<T, S, R, C, U> **,
273  size_t i);
274 
275 template <typename T, typename Tp, std::size_t R, std::size_t C,
278  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
280  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *
281  __spirv_CooperativeMatrixConstructCheckedINTEL(int32_t CoordX,
282  int32_t CoordY,
283  uint32_t Height,
284  uint32_t Width,
285  const T Value);
286 
287 template <typename T, typename Tp, std::size_t R, std::size_t C,
290  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
292  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *
293  __spirv_CooperativeMatrixLoadCheckedINTEL(
294  T *Ptr, int32_t CoordX, int32_t CoordY, __spv::MatrixLayout Layout = L,
295  uint32_t Height = 0, uint32_t Width = 0, std::size_t Stride = 0,
296  int MemOperand = 0);
297 
298 template <typename T, typename Tp, std::size_t R, std::size_t C,
301  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
302 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL(
303  T *Ptr, int32_t CoordX, int32_t CoordY,
304  __spv::__spirv_CooperativeMatrixKHR<Tp, S, R, C, U> *Object,
305  __spv::MatrixLayout Layout = L, uint32_t Height = 0, uint32_t Width = 0,
306  std::size_t Stride = 0, int MemOperand = 0);
307 #endif // __SPIRV_USE_COOPERATIVE_MATRIX
308 
309 template <typename T>
310 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixPrefetchINTEL(
311  T *Ptr, uint32_t NumRows, uint32_t NumCols, unsigned int CacheLevel,
312  __spv::MatrixLayout Layout, size_t Stride);
313 
314 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
315 #error \
316  "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag."
317 #endif
318 
319 template <typename RetT, typename ImageT>
320 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT);
321 
322 template <typename RetT, typename ImageT>
323 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT);
324 
325 template <typename RetT, typename ImageT>
326 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT);
327 
328 template <typename ImageT, typename CoordT, typename ValT>
329 extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT);
330 
331 template <class RetT, typename ImageT, typename TempArgT>
332 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT);
333 
334 template <class RetT, typename ImageT, typename TempArgT>
335 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT);
336 
337 template <class RetT, typename ImageT, typename TempArgT>
338 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageFetch(ImageT, TempArgT);
339 
340 template <class RetT, typename ImageT, typename TempArgT>
341 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayFetch(ImageT, TempArgT,
342  int);
343 
344 template <class RetT, typename ImageT, typename TempArgT>
345 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageArrayFetch(ImageT,
346  TempArgT, int);
347 
348 template <class RetT, typename ImageT, typename TempArgT>
349 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayRead(ImageT, TempArgT, int);
350 
351 template <typename ImageT, typename CoordT, typename ValT>
352 extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageArrayWrite(ImageT, CoordT, int,
353  ValT);
354 
355 template <typename ImageT, typename SampledType>
356 extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT,
358 
359 template <typename SampledType, typename TempRetT, typename TempArgT>
360 extern __DPCPP_SYCL_EXTERNAL TempRetT
361 __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, float);
362 
363 template <typename SampledType, typename TempRetT, typename TempArgT>
364 extern __DPCPP_SYCL_EXTERNAL TempRetT
365 __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, TempArgT, TempArgT);
366 
367 template <typename SampledType, typename TempRetT, typename TempArgT>
368 extern __DPCPP_SYCL_EXTERNAL TempRetT __spirv_ImageSampleCubemap(SampledType,
369  TempArgT);
370 
371 template <typename RetT, class HandleT>
372 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToImageINTEL(HandleT);
373 
374 template <typename RetT, class HandleT>
375 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToSamplerINTEL(HandleT);
376 
377 template <typename RetT, class HandleT>
379  RetT __spirv_ConvertHandleToSampledImageINTEL(HandleT);
380 
381 #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy
382 #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy
383 
384 // Atomic SPIR-V builtins
385 #define __SPIRV_ATOMIC_LOAD(AS, Type) \
386  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \
387  AS const Type *P, __spv::Scope::Flag S, \
388  __spv::MemorySemanticsMask::Flag O);
389 #define __SPIRV_ATOMIC_STORE(AS, Type) \
390  extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \
391  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
392  Type V);
393 #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \
394  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \
395  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
396  Type V);
397 #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
398  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \
399  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \
400  __spv::MemorySemanticsMask::Flag U, Type V, Type C);
401 #define __SPIRV_ATOMIC_IADD(AS, Type) \
402  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \
403  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
404  Type V);
405 #define __SPIRV_ATOMIC_ISUB(AS, Type) \
406  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \
407  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
408  Type V);
409 #define __SPIRV_ATOMIC_FADD(AS, Type) \
410  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \
411  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
412  Type V);
413 #define __SPIRV_ATOMIC_SMIN(AS, Type) \
414  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \
415  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
416  Type V);
417 #define __SPIRV_ATOMIC_UMIN(AS, Type) \
418  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \
419  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
420  Type V);
421 #define __SPIRV_ATOMIC_FMIN(AS, Type) \
422  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
423  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
424  Type V);
425 #define __SPIRV_ATOMIC_SMAX(AS, Type) \
426  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \
427  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
428  Type V);
429 #define __SPIRV_ATOMIC_UMAX(AS, Type) \
430  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \
431  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
432  Type V);
433 #define __SPIRV_ATOMIC_FMAX(AS, Type) \
434  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
435  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
436  Type V);
437 #define __SPIRV_ATOMIC_AND(AS, Type) \
438  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \
439  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
440  Type V);
441 #define __SPIRV_ATOMIC_OR(AS, Type) \
442  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \
443  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
444  Type V);
445 #define __SPIRV_ATOMIC_XOR(AS, Type) \
446  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \
447  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
448  Type V);
449 
450 #define __SPIRV_ATOMIC_FLOAT(AS, Type) \
451  __SPIRV_ATOMIC_FADD(AS, Type) \
452  __SPIRV_ATOMIC_FMIN(AS, Type) \
453  __SPIRV_ATOMIC_FMAX(AS, Type) \
454  __SPIRV_ATOMIC_LOAD(AS, Type) \
455  __SPIRV_ATOMIC_STORE(AS, Type) \
456  __SPIRV_ATOMIC_EXCHANGE(AS, Type)
457 
458 #define __SPIRV_ATOMIC_BASE(AS, Type) \
459  __SPIRV_ATOMIC_FLOAT(AS, Type) \
460  __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
461  __SPIRV_ATOMIC_IADD(AS, Type) \
462  __SPIRV_ATOMIC_ISUB(AS, Type) \
463  __SPIRV_ATOMIC_AND(AS, Type) \
464  __SPIRV_ATOMIC_OR(AS, Type) \
465  __SPIRV_ATOMIC_XOR(AS, Type)
466 
467 #define __SPIRV_ATOMIC_SIGNED(AS, Type) \
468  __SPIRV_ATOMIC_BASE(AS, Type) \
469  __SPIRV_ATOMIC_SMIN(AS, Type) \
470  __SPIRV_ATOMIC_SMAX(AS, Type)
471 
472 #define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \
473  __SPIRV_ATOMIC_BASE(AS, Type) \
474  __SPIRV_ATOMIC_UMIN(AS, Type) \
475  __SPIRV_ATOMIC_UMAX(AS, Type)
476 
477 // Helper atomic operations which select correct signed/unsigned version
478 // of atomic min/max based on the type
479 #define __SPIRV_ATOMIC_MINMAX(AS, Op) \
480  template <typename T> \
481  typename std::enable_if_t< \
482  std::is_integral<T>::value && std::is_signed<T>::value, T> \
483  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
484  __spv::MemorySemanticsMask::Flag Semantics, \
485  T Value) { \
486  return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
487  } \
488  template <typename T> \
489  typename std::enable_if_t< \
490  std::is_integral<T>::value && !std::is_signed<T>::value, T> \
491  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
492  __spv::MemorySemanticsMask::Flag Semantics, \
493  T Value) { \
494  return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
495  } \
496  template <typename T> \
497  typename std::enable_if_t<std::is_floating_point<T>::value, T> \
498  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
499  __spv::MemorySemanticsMask::Flag Semantics, \
500  T Value) { \
501  return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
502  }
503 
504 #define __SPIRV_ATOMICS(macro, Arg) \
505  macro(__attribute__((opencl_global)), Arg) \
506  macro(__attribute__((opencl_local)), Arg) macro(, Arg)
507 
508 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, _Float16)
509 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float)
510 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double)
511 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int)
512 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long)
513 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long)
514 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int)
515 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long)
516 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long)
517 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
518 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
519 
520 #undef __SPIRV_ATOMICS
521 #undef __SPIRV_ATOMIC_AND
522 #undef __SPIRV_ATOMIC_BASE
523 #undef __SPIRV_ATOMIC_CMP_EXCHANGE
524 #undef __SPIRV_ATOMIC_EXCHANGE
525 #undef __SPIRV_ATOMIC_FADD
526 #undef __SPIRV_ATOMIC_FLOAT
527 #undef __SPIRV_ATOMIC_FMAX
528 #undef __SPIRV_ATOMIC_FMIN
529 #undef __SPIRV_ATOMIC_IADD
530 #undef __SPIRV_ATOMIC_ISUB
531 #undef __SPIRV_ATOMIC_LOAD
532 #undef __SPIRV_ATOMIC_MINMAX
533 #undef __SPIRV_ATOMIC_OR
534 #undef __SPIRV_ATOMIC_SIGNED
535 #undef __SPIRV_ATOMIC_SMAX
536 #undef __SPIRV_ATOMIC_SMIN
537 #undef __SPIRV_ATOMIC_STORE
538 #undef __SPIRV_ATOMIC_UMAX
539 #undef __SPIRV_ATOMIC_UMIN
540 #undef __SPIRV_ATOMIC_UNSIGNED
541 #undef __SPIRV_ATOMIC_XOR
542 
543 template <typename dataT>
544 extern __attribute__((opencl_global)) dataT *
545 __SYCL_GenericCastToPtrExplicit_ToGlobal(void *Ptr) noexcept {
546  return (__attribute__((opencl_global)) dataT *)
547  __spirv_GenericCastToPtrExplicit_ToGlobal(
549 }
550 
551 template <typename dataT>
552 extern const __attribute__((opencl_global)) dataT *
553 __SYCL_GenericCastToPtrExplicit_ToGlobal(const void *Ptr) noexcept {
554  return (const __attribute__((opencl_global)) dataT *)
555  __spirv_GenericCastToPtrExplicit_ToGlobal(
557 }
558 
559 template <typename dataT>
560 extern volatile __attribute__((opencl_global)) dataT *
561 __SYCL_GenericCastToPtrExplicit_ToGlobal(volatile void *Ptr) noexcept {
562  return (volatile __attribute__((opencl_global)) dataT *)
563  __spirv_GenericCastToPtrExplicit_ToGlobal(
565 }
566 
567 template <typename dataT>
568 extern const volatile __attribute__((opencl_global)) dataT *
569 __SYCL_GenericCastToPtrExplicit_ToGlobal(const volatile void *Ptr) noexcept {
570  return (const volatile __attribute__((opencl_global)) dataT *)
571  __spirv_GenericCastToPtrExplicit_ToGlobal(
573 }
574 
575 template <typename dataT>
576 extern __attribute__((opencl_local)) dataT *
577 __SYCL_GenericCastToPtrExplicit_ToLocal(void *Ptr) noexcept {
578  return (__attribute__((opencl_local)) dataT *)
579  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
581 }
582 
583 template <typename dataT>
584 extern const __attribute__((opencl_local)) dataT *
585 __SYCL_GenericCastToPtrExplicit_ToLocal(const void *Ptr) noexcept {
586  return (const __attribute__((opencl_local)) dataT *)
587  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
589 }
590 
591 template <typename dataT>
592 extern volatile __attribute__((opencl_local)) dataT *
593 __SYCL_GenericCastToPtrExplicit_ToLocal(volatile void *Ptr) noexcept {
594  return (volatile __attribute__((opencl_local)) dataT *)
595  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
597 }
598 
599 template <typename dataT>
600 extern const volatile __attribute__((opencl_local)) dataT *
601 __SYCL_GenericCastToPtrExplicit_ToLocal(const volatile void *Ptr) noexcept {
602  return (const volatile __attribute__((opencl_local)) dataT *)
603  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
605 }
606 
607 template <typename dataT>
608 extern __attribute__((opencl_private)) dataT *
609 __SYCL_GenericCastToPtrExplicit_ToPrivate(void *Ptr) noexcept {
610  return (__attribute__((opencl_private)) dataT *)
611  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
613 }
614 
615 template <typename dataT>
616 extern const __attribute__((opencl_private)) dataT *
617 __SYCL_GenericCastToPtrExplicit_ToPrivate(const void *Ptr) noexcept {
618  return (const __attribute__((opencl_private)) dataT *)
619  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
621 }
622 
623 template <typename dataT>
624 extern volatile __attribute__((opencl_private)) dataT *
625 __SYCL_GenericCastToPtrExplicit_ToPrivate(volatile void *Ptr) noexcept {
626  return (volatile __attribute__((opencl_private)) dataT *)
627  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
629 }
630 
631 template <typename dataT>
632 extern const volatile __attribute__((opencl_private)) dataT *
633 __SYCL_GenericCastToPtrExplicit_ToPrivate(const volatile void *Ptr) noexcept {
634  return (const volatile __attribute__((opencl_private)) dataT *)
635  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
637 }
638 
639 template <typename dataT>
640 extern __attribute__((opencl_global)) dataT *
641 __SYCL_GenericCastToPtr_ToGlobal(void *Ptr) noexcept {
642  return (__attribute__((opencl_global)) dataT *)
643  __spirv_GenericCastToPtr_ToGlobal(Ptr,
645 }
646 
647 template <typename dataT>
648 extern const __attribute__((opencl_global)) dataT *
649 __SYCL_GenericCastToPtr_ToGlobal(const void *Ptr) noexcept {
650  return (const __attribute__((opencl_global)) dataT *)
651  __spirv_GenericCastToPtr_ToGlobal(Ptr,
653 }
654 
655 template <typename dataT>
656 extern volatile __attribute__((opencl_global)) dataT *
657 __SYCL_GenericCastToPtr_ToGlobal(volatile void *Ptr) noexcept {
658  return (volatile __attribute__((opencl_global)) dataT *)
659  __spirv_GenericCastToPtr_ToGlobal(Ptr,
661 }
662 
663 template <typename dataT>
664 extern const volatile __attribute__((opencl_global)) dataT *
665 __SYCL_GenericCastToPtr_ToGlobal(const volatile void *Ptr) noexcept {
666  return (const volatile __attribute__((opencl_global)) dataT *)
667  __spirv_GenericCastToPtr_ToGlobal(Ptr,
669 }
670 
671 template <typename dataT>
672 extern __attribute__((opencl_local)) dataT *
673 __SYCL_GenericCastToPtr_ToLocal(void *Ptr) noexcept {
674  return (__attribute__((opencl_local)) dataT *)
675  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
676 }
677 
678 template <typename dataT>
679 extern const __attribute__((opencl_local)) dataT *
680 __SYCL_GenericCastToPtr_ToLocal(const void *Ptr) noexcept {
681  return (const __attribute__((opencl_local)) dataT *)
682  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
683 }
684 
685 template <typename dataT>
686 extern volatile __attribute__((opencl_local)) dataT *
687 __SYCL_GenericCastToPtr_ToLocal(volatile void *Ptr) noexcept {
688  return (volatile __attribute__((opencl_local)) dataT *)
689  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
690 }
691 
692 template <typename dataT>
693 extern const volatile __attribute__((opencl_local)) dataT *
694 __SYCL_GenericCastToPtr_ToLocal(const volatile void *Ptr) noexcept {
695  return (const volatile __attribute__((opencl_local)) dataT *)
696  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
697 }
698 
699 template <typename dataT>
700 extern __attribute__((opencl_private)) dataT *
701 __SYCL_GenericCastToPtr_ToPrivate(void *Ptr) noexcept {
702  return (__attribute__((opencl_private)) dataT *)
703  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
704 }
705 
706 template <typename dataT>
707 extern const __attribute__((opencl_private)) dataT *
708 __SYCL_GenericCastToPtr_ToPrivate(const void *Ptr) noexcept {
709  return (const __attribute__((opencl_private)) dataT *)
710  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
711 }
712 
713 template <typename dataT>
714 extern volatile __attribute__((opencl_private)) dataT *
715 __SYCL_GenericCastToPtr_ToPrivate(volatile void *Ptr) noexcept {
716  return (volatile __attribute__((opencl_private)) dataT *)
717  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
718 }
719 
720 template <typename dataT>
721 extern const volatile __attribute__((opencl_private)) dataT *
722 __SYCL_GenericCastToPtr_ToPrivate(const volatile void *Ptr) noexcept {
723  return (const volatile __attribute__((opencl_private)) dataT *)
724  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
725 }
726 
727 template <typename dataT>
729 __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;
730 template <typename dataT>
732 __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next,
733  uint32_t Delta) noexcept;
734 template <typename dataT>
736 __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current,
737  uint32_t Delta) noexcept;
738 template <typename dataT>
740 __spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept;
741 
742 template <typename dataT>
744 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
745  uint8_t *Ptr) noexcept;
746 
747 template <typename dataT>
749 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint8_t *Ptr,
750  dataT Data) noexcept;
751 
752 template <typename dataT>
754 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
755  uint16_t *Ptr) noexcept;
756 
757 template <typename dataT>
759 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr,
760  dataT Data) noexcept;
761 
762 template <typename dataT>
764 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
765  uint32_t *Ptr) noexcept;
766 
767 template <typename dataT>
769 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr,
770  dataT Data) noexcept;
771 
772 template <typename dataT>
774 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
775  uint64_t *Ptr) noexcept;
776 
777 template <typename dataT>
779 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint64_t *Ptr,
780  dataT Data) noexcept;
781 template <int W, int rW>
782 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
783 __spirv_FixedSqrtINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
784  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
785 template <int W, int rW>
786 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
787 __spirv_FixedRecipINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
788  int32_t rI, int32_t Quantization = 0,
789  int32_t Overflow = 0) noexcept;
790 template <int W, int rW>
791 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
792 __spirv_FixedRsqrtINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
793  int32_t rI, int32_t Quantization = 0,
794  int32_t Overflow = 0) noexcept;
795 template <int W, int rW>
796 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
797 __spirv_FixedSinINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
798  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
799 template <int W, int rW>
800 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
801 __spirv_FixedCosINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
802  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
803 template <int W, int rW>
804 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW>
805 __spirv_FixedSinCosINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
806  int32_t rI, int32_t Quantization = 0,
807  int32_t Overflow = 0) noexcept;
808 template <int W, int rW>
809 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
810 __spirv_FixedSinPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
811  int32_t rI, int32_t Quantization = 0,
812  int32_t Overflow = 0) noexcept;
813 template <int W, int rW>
814 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
815 __spirv_FixedCosPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
816  int32_t rI, int32_t Quantization = 0,
817  int32_t Overflow = 0) noexcept;
818 template <int W, int rW>
819 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW>
820 __spirv_FixedSinCosPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
821  int32_t rI, int32_t Quantization = 0,
822  int32_t Overflow = 0) noexcept;
823 template <int W, int rW>
824 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
825 __spirv_FixedLogINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
826  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
827 template <int W, int rW>
828 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
829 __spirv_FixedExpINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
830  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
831 
832 // In the following built-ins width of arbitrary precision integer type for
833 // a floating point variable should be equal to sum of corresponding
834 // exponent width E, mantissa width M and 1 for sign bit. I.e. WA = EA + MA + 1.
835 template <int WA, int Wout>
836 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
837 __spirv_ArbitraryFloatCastINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
838  int32_t Mout, int32_t EnableSubnormals = 0,
839  int32_t RoundingMode = 0,
840  int32_t RoundingAccuracy = 0) noexcept;
841 
842 template <int WA, int Wout>
843 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
844 __spirv_ArbitraryFloatCastFromIntINTEL(sycl::detail::ap_int<WA> A, int32_t Mout,
845  bool FromSign = false,
846  int32_t EnableSubnormals = 0,
847  int32_t RoundingMode = 0,
848  int32_t RoundingAccuracy = 0) noexcept;
849 
850 template <int WA, int Wout>
851 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
852 __spirv_ArbitraryFloatCastToIntINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
853  bool ToSign = false,
854  int32_t EnableSubnormals = 0,
855  int32_t RoundingMode = 0,
856  int32_t RoundingAccuracy = 0) noexcept;
857 
858 template <int WA, int WB, int Wout>
859 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
860 __spirv_ArbitraryFloatAddINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
861  sycl::detail::ap_int<WB> B, int32_t MB,
862  int32_t Mout, int32_t EnableSubnormals = 0,
863  int32_t RoundingMode = 0,
864  int32_t RoundingAccuracy = 0) noexcept;
865 
866 template <int WA, int WB, int Wout>
867 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
868 __spirv_ArbitraryFloatSubINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
869  sycl::detail::ap_int<WB> B, int32_t MB,
870  int32_t Mout, int32_t EnableSubnormals = 0,
871  int32_t RoundingMode = 0,
872  int32_t RoundingAccuracy = 0) noexcept;
873 
874 template <int WA, int WB, int Wout>
875 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
876 __spirv_ArbitraryFloatMulINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
877  sycl::detail::ap_int<WB> B, int32_t MB,
878  int32_t Mout, int32_t EnableSubnormals = 0,
879  int32_t RoundingMode = 0,
880  int32_t RoundingAccuracy = 0) noexcept;
881 
882 template <int WA, int WB, int Wout>
883 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
884 __spirv_ArbitraryFloatDivINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
885  sycl::detail::ap_int<WB> B, int32_t MB,
886  int32_t Mout, int32_t EnableSubnormals = 0,
887  int32_t RoundingMode = 0,
888  int32_t RoundingAccuracy = 0) noexcept;
889 
890 // Comparison built-ins don't use Subnormal Support, Rounding Mode and
891 // Rounding Accuracy.
892 template <int WA, int WB>
893 extern __DPCPP_SYCL_EXTERNAL bool
894 __spirv_ArbitraryFloatGTINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
895  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
896 
897 template <int WA, int WB>
898 extern __DPCPP_SYCL_EXTERNAL bool
899 __spirv_ArbitraryFloatGEINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
900  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
901 
902 template <int WA, int WB>
903 extern __DPCPP_SYCL_EXTERNAL bool
904 __spirv_ArbitraryFloatLTINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
905  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
906 
907 template <int WA, int WB>
908 extern __DPCPP_SYCL_EXTERNAL bool
909 __spirv_ArbitraryFloatLEINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
910  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
911 
912 template <int WA, int WB>
913 extern __DPCPP_SYCL_EXTERNAL bool
914 __spirv_ArbitraryFloatEQINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
915  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
916 
917 template <int WA, int Wout>
918 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
919 __spirv_ArbitraryFloatRecipINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
920  int32_t Mout, int32_t EnableSubnormals = 0,
921  int32_t RoundingMode = 0,
922  int32_t RoundingAccuracy = 0) noexcept;
923 
924 template <int WA, int Wout>
925 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
926 __spirv_ArbitraryFloatRSqrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
927  int32_t Mout, int32_t EnableSubnormals = 0,
928  int32_t RoundingMode = 0,
929  int32_t RoundingAccuracy = 0) noexcept;
930 
931 template <int WA, int Wout>
932 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
933 __spirv_ArbitraryFloatCbrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
934  int32_t Mout, int32_t EnableSubnormals = 0,
935  int32_t RoundingMode = 0,
936  int32_t RoundingAccuracy = 0) noexcept;
937 
938 template <int WA, int WB, int Wout>
939 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
940 __spirv_ArbitraryFloatHypotINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
941  sycl::detail::ap_int<WB> B, int32_t MB,
942  int32_t Mout, int32_t EnableSubnormals = 0,
943  int32_t RoundingMode = 0,
944  int32_t RoundingAccuracy = 0) noexcept;
945 
946 template <int WA, int Wout>
947 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
948 __spirv_ArbitraryFloatSqrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
949  int32_t Mout, int32_t EnableSubnormals = 0,
950  int32_t RoundingMode = 0,
951  int32_t RoundingAccuracy = 0) noexcept;
952 
953 template <int WA, int Wout>
954 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
955 __spirv_ArbitraryFloatLogINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
956  int32_t Mout, int32_t EnableSubnormals = 0,
957  int32_t RoundingMode = 0,
958  int32_t RoundingAccuracy = 0) noexcept;
959 
960 template <int WA, int Wout>
961 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
962 __spirv_ArbitraryFloatLog2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
963  int32_t Mout, int32_t EnableSubnormals = 0,
964  int32_t RoundingMode = 0,
965  int32_t RoundingAccuracy = 0) noexcept;
966 
967 template <int WA, int Wout>
968 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
969 __spirv_ArbitraryFloatLog10INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
970  int32_t Mout, int32_t EnableSubnormals = 0,
971  int32_t RoundingMode = 0,
972  int32_t RoundingAccuracy = 0) noexcept;
973 
974 template <int WA, int Wout>
975 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
976 __spirv_ArbitraryFloatLog1pINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
977  int32_t Mout, int32_t EnableSubnormals = 0,
978  int32_t RoundingMode = 0,
979  int32_t RoundingAccuracy = 0) noexcept;
980 
981 template <int WA, int Wout>
982 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
983 __spirv_ArbitraryFloatExpINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
984  int32_t Mout, int32_t EnableSubnormals = 0,
985  int32_t RoundingMode = 0,
986  int32_t RoundingAccuracy = 0) noexcept;
987 
988 template <int WA, int Wout>
989 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
990 __spirv_ArbitraryFloatExp2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
991  int32_t Mout, int32_t EnableSubnormals = 0,
992  int32_t RoundingMode = 0,
993  int32_t RoundingAccuracy = 0) noexcept;
994 
995 template <int WA, int Wout>
996 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
997 __spirv_ArbitraryFloatExp10INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
998  int32_t Mout, int32_t EnableSubnormals = 0,
999  int32_t RoundingMode = 0,
1000  int32_t RoundingAccuracy = 0) noexcept;
1001 
1002 template <int WA, int Wout>
1003 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1004 __spirv_ArbitraryFloatExpm1INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1005  int32_t Mout, int32_t EnableSubnormals = 0,
1006  int32_t RoundingMode = 0,
1007  int32_t RoundingAccuracy = 0) noexcept;
1008 
1009 template <int WA, int Wout>
1010 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1011 __spirv_ArbitraryFloatSinINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1012  int32_t Mout, int32_t EnableSubnormals = 0,
1013  int32_t RoundingMode = 0,
1014  int32_t RoundingAccuracy = 0) noexcept;
1015 
1016 template <int WA, int Wout>
1017 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1018 __spirv_ArbitraryFloatCosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1019  int32_t Mout, int32_t EnableSubnormals = 0,
1020  int32_t RoundingMode = 0,
1021  int32_t RoundingAccuracy = 0) noexcept;
1022 
1023 // Result value contains both values of sine and cosine and so has the size of
1024 // 2 * Wout where Wout is equal to (1 + Eout + Mout).
1025 template <int WA, int Wout>
1026 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout>
1027 __spirv_ArbitraryFloatSinCosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1028  int32_t Mout, int32_t EnableSubnormals = 0,
1029  int32_t RoundingMode = 0,
1030  int32_t RoundingAccuracy = 0) noexcept;
1031 
1032 template <int WA, int Wout>
1033 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1034 __spirv_ArbitraryFloatSinPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1035  int32_t Mout, int32_t EnableSubnormals = 0,
1036  int32_t RoundingMode = 0,
1037  int32_t RoundingAccuracy = 0) noexcept;
1038 
1039 template <int WA, int Wout>
1040 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1041 __spirv_ArbitraryFloatCosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1042  int32_t Mout, int32_t EnableSubnormals = 0,
1043  int32_t RoundingMode = 0,
1044  int32_t RoundingAccuracy = 0) noexcept;
1045 
1046 // Result value contains both values of sine(A*pi) and cosine(A*pi) and so has
1047 // the size of 2 * Wout where Wout is equal to (1 + Eout + Mout).
1048 template <int WA, int Wout>
1049 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout>
1050 __spirv_ArbitraryFloatSinCosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1051  int32_t Mout, int32_t EnableSubnormals = 0,
1052  int32_t RoundingMode = 0,
1053  int32_t RoundingAccuracy = 0) noexcept;
1054 
1055 template <int WA, int Wout>
1056 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1057 __spirv_ArbitraryFloatASinINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1058  int32_t Mout, int32_t EnableSubnormals = 0,
1059  int32_t RoundingMode = 0,
1060  int32_t RoundingAccuracy = 0) noexcept;
1061 
1062 template <int WA, int Wout>
1063 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1064 __spirv_ArbitraryFloatASinPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1065  int32_t Mout, int32_t EnableSubnormals = 0,
1066  int32_t RoundingMode = 0,
1067  int32_t RoundingAccuracy = 0) noexcept;
1068 
1069 template <int WA, int Wout>
1070 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1071 __spirv_ArbitraryFloatACosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1072  int32_t Mout, int32_t EnableSubnormals = 0,
1073  int32_t RoundingMode = 0,
1074  int32_t RoundingAccuracy = 0) noexcept;
1075 
1076 template <int WA, int Wout>
1077 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1078 __spirv_ArbitraryFloatACosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1079  int32_t Mout, int32_t EnableSubnormals = 0,
1080  int32_t RoundingMode = 0,
1081  int32_t RoundingAccuracy = 0) noexcept;
1082 
1083 template <int WA, int Wout>
1084 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1085 __spirv_ArbitraryFloatATanINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1086  int32_t Mout, int32_t EnableSubnormals = 0,
1087  int32_t RoundingMode = 0,
1088  int32_t RoundingAccuracy = 0) noexcept;
1089 
1090 template <int WA, int Wout>
1091 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1092 __spirv_ArbitraryFloatATanPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1093  int32_t Mout, int32_t EnableSubnormals = 0,
1094  int32_t RoundingMode = 0,
1095  int32_t RoundingAccuracy = 0) noexcept;
1096 
1097 template <int WA, int WB, int Wout>
1098 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1099 __spirv_ArbitraryFloatATan2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1100  sycl::detail::ap_int<WB> B, int32_t MB,
1101  int32_t Mout, int32_t EnableSubnormals = 0,
1102  int32_t RoundingMode = 0,
1103  int32_t RoundingAccuracy = 0) noexcept;
1104 
1105 template <int WA, int WB, int Wout>
1106 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1107 __spirv_ArbitraryFloatPowINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1108  sycl::detail::ap_int<WB> B, int32_t MB,
1109  int32_t Mout, int32_t EnableSubnormals = 0,
1110  int32_t RoundingMode = 0,
1111  int32_t RoundingAccuracy = 0) noexcept;
1112 
1113 template <int WA, int WB, int Wout>
1114 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1115 __spirv_ArbitraryFloatPowRINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1116  sycl::detail::ap_int<WB> B, int32_t MB,
1117  int32_t Mout, int32_t EnableSubnormals = 0,
1118  int32_t RoundingMode = 0,
1119  int32_t RoundingAccuracy = 0) noexcept;
1120 
1121 // PowN built-in calculates `A^B` where `A` is arbitrary precision floating
1122 // point number and `B` is signed or unsigned arbitrary precision integer,
1123 // i.e. its width doesn't depend on sum of exponent and mantissa.
1124 template <int WA, int WB, int Wout>
1125 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
1126 __spirv_ArbitraryFloatPowNINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
1127  sycl::detail::ap_int<WB> B, bool SignOfB,
1128  int32_t Mout, int32_t EnableSubnormals = 0,
1129  int32_t RoundingMode = 0,
1130  int32_t RoundingAccuracy = 0) noexcept;
1131 
1132 template <typename dataT>
1133 extern __DPCPP_SYCL_EXTERNAL int32_t
1134 __spirv_ReadPipe(__ocl_RPipeTy<dataT> Pipe, dataT *Data, int32_t Size,
1135  int32_t Alignment) noexcept;
1136 template <typename dataT>
1137 extern __DPCPP_SYCL_EXTERNAL int32_t
1138 __spirv_WritePipe(__ocl_WPipeTy<dataT> Pipe, const dataT *Data, int32_t Size,
1139  int32_t Alignment) noexcept;
1140 template <typename dataT>
1141 extern __DPCPP_SYCL_EXTERNAL void
1142 __spirv_ReadPipeBlockingINTEL(__ocl_RPipeTy<dataT> Pipe, dataT *Data,
1143  int32_t Size, int32_t Alignment) noexcept;
1144 template <typename dataT>
1145 extern __DPCPP_SYCL_EXTERNAL void
1146 __spirv_WritePipeBlockingINTEL(__ocl_WPipeTy<dataT> Pipe, const dataT *Data,
1147  int32_t Size, int32_t Alignment) noexcept;
1148 template <typename dataT>
1149 extern __DPCPP_SYCL_EXTERNAL __ocl_RPipeTy<dataT>
1150 __spirv_CreatePipeFromPipeStorage_read(
1151  const ConstantPipeStorage *Storage) noexcept;
1152 template <typename dataT>
1153 extern __DPCPP_SYCL_EXTERNAL __ocl_WPipeTy<dataT>
1154 __spirv_CreatePipeFromPipeStorage_write(
1155  const ConstantPipeStorage *Storage) noexcept;
1156 
1157 extern __DPCPP_SYCL_EXTERNAL void
1158 __spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr,
1159  size_t NumBytes) noexcept;
1160 
1161 extern __DPCPP_SYCL_EXTERNAL float
1162  __spirv_ConvertBF16ToFINTEL(uint16_t) noexcept;
1163 extern __DPCPP_SYCL_EXTERNAL uint16_t
1164 __spirv_ConvertFToBF16INTEL(float) noexcept;
1165 template <int N>
1166 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, N>
1167  __spirv_ConvertBF16ToFINTEL(__ocl_vec_t<uint16_t, N>) noexcept;
1168 template <int N>
1169 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<uint16_t, N>
1170  __spirv_ConvertFToBF16INTEL(__ocl_vec_t<float, N>) noexcept;
1171 
1173  __SYCL_EXPORT __ocl_vec_t<uint32_t, 4>
1174  __spirv_GroupNonUniformBallot(uint32_t Execution, bool Predicate) noexcept;
1175 
1176 // TODO: I'm not 100% sure that these NonUniform instructions should be
1177 // convergent Following precedent set for GroupNonUniformBallot above
1178 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT uint32_t
1179 __spirv_GroupNonUniformBallotBitCount(__spv::Scope::Flag, int,
1180  __ocl_vec_t<uint32_t, 4>) noexcept;
1181 
1182 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int
1183  __spirv_GroupNonUniformBallotFindLSB(__spv::Scope::Flag,
1184  __ocl_vec_t<uint32_t, 4>) noexcept;
1185 
1186 template <typename ValueT, typename IdT>
1187 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1188  __spirv_GroupNonUniformBroadcast(__spv::Scope::Flag, ValueT, IdT);
1189 
1190 template <typename ValueT, typename IdT>
1191 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1192  __spirv_GroupNonUniformShuffle(__spv::Scope::Flag, ValueT, IdT) noexcept;
1193 
1194 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1195 __spirv_GroupNonUniformAll(__spv::Scope::Flag, bool);
1196 
1197 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1198 __spirv_GroupNonUniformAny(__spv::Scope::Flag, bool);
1199 
1200 template <typename ValueT>
1201 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1202 __spirv_GroupNonUniformSMin(__spv::Scope::Flag, unsigned int, ValueT);
1203 
1204 template <typename ValueT>
1205 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1206 __spirv_GroupNonUniformUMin(__spv::Scope::Flag, unsigned int, ValueT);
1207 
1208 template <typename ValueT>
1209 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1210 __spirv_GroupNonUniformFMin(__spv::Scope::Flag, unsigned int, ValueT);
1211 
1212 template <typename ValueT>
1213 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1214 __spirv_GroupNonUniformSMax(__spv::Scope::Flag, unsigned int, ValueT);
1215 
1216 template <typename ValueT>
1217 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1218 __spirv_GroupNonUniformUMax(__spv::Scope::Flag, unsigned int, ValueT);
1219 
1220 template <typename ValueT>
1221 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1222 __spirv_GroupNonUniformFMax(__spv::Scope::Flag, unsigned int, ValueT);
1223 
1224 template <typename ValueT>
1225 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1226 __spirv_GroupNonUniformIAdd(__spv::Scope::Flag, unsigned int, ValueT);
1227 
1228 template <typename ValueT>
1229 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1230 __spirv_GroupNonUniformFAdd(__spv::Scope::Flag, unsigned int, ValueT);
1231 
1232 template <typename ValueT>
1233 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1234 __spirv_GroupNonUniformIMul(__spv::Scope::Flag, unsigned int, ValueT);
1235 
1236 template <typename ValueT>
1237 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1238 __spirv_GroupNonUniformFMul(__spv::Scope::Flag, unsigned int, ValueT);
1239 
1240 template <typename ValueT>
1241 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1242 __spirv_GroupNonUniformBitwiseOr(__spv::Scope::Flag, unsigned int, ValueT);
1243 
1244 template <typename ValueT>
1245 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1246 __spirv_GroupNonUniformBitwiseXor(__spv::Scope::Flag, unsigned int, ValueT);
1247 
1248 template <typename ValueT>
1249 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1250 __spirv_GroupNonUniformBitwiseAnd(__spv::Scope::Flag, unsigned int, ValueT);
1251 
1252 template <typename ValueT>
1253 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1254 __spirv_GroupNonUniformLogicalOr(__spv::Scope::Flag, unsigned int, ValueT);
1255 
1256 template <typename ValueT>
1257 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1258 __spirv_GroupNonUniformLogicalAnd(__spv::Scope::Flag, unsigned int, ValueT);
1259 
1260 template <typename ValueT>
1261 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1262 __spirv_GroupNonUniformSMin(__spv::Scope::Flag, unsigned int, ValueT,
1263  unsigned int);
1264 
1265 template <typename ValueT>
1266 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1267 __spirv_GroupNonUniformUMin(__spv::Scope::Flag, unsigned int, ValueT,
1268  unsigned int);
1269 
1270 template <typename ValueT>
1271 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1272 __spirv_GroupNonUniformFMin(__spv::Scope::Flag, unsigned int, ValueT,
1273  unsigned int);
1274 
1275 template <typename ValueT>
1276 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1277 __spirv_GroupNonUniformSMax(__spv::Scope::Flag, unsigned int, ValueT,
1278  unsigned int);
1279 
1280 template <typename ValueT>
1281 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1282 __spirv_GroupNonUniformUMax(__spv::Scope::Flag, unsigned int, ValueT,
1283  unsigned int);
1284 
1285 template <typename ValueT>
1286 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1287 __spirv_GroupNonUniformFMax(__spv::Scope::Flag, unsigned int, ValueT,
1288  unsigned int);
1289 
1290 template <typename ValueT>
1291 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1292 __spirv_GroupNonUniformIAdd(__spv::Scope::Flag, unsigned int, ValueT,
1293  unsigned int);
1294 
1295 template <typename ValueT>
1296 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1297 __spirv_GroupNonUniformFAdd(__spv::Scope::Flag, unsigned int, ValueT,
1298  unsigned int);
1299 
1300 template <typename ValueT>
1301 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1302 __spirv_GroupNonUniformIMul(__spv::Scope::Flag, unsigned int, ValueT,
1303  unsigned int);
1304 
1305 template <typename ValueT>
1306 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1307 __spirv_GroupNonUniformFMul(__spv::Scope::Flag, unsigned int, ValueT,
1308  unsigned int);
1309 
1310 template <typename ValueT>
1311 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1312 __spirv_GroupNonUniformBitwiseOr(__spv::Scope::Flag, unsigned int, ValueT,
1313  unsigned int);
1314 
1315 template <typename ValueT>
1316 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1317 __spirv_GroupNonUniformBitwiseXor(__spv::Scope::Flag, unsigned int, ValueT,
1318  unsigned int);
1319 
1320 template <typename ValueT>
1321 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1322 __spirv_GroupNonUniformBitwiseAnd(__spv::Scope::Flag, unsigned int, ValueT,
1323  unsigned int);
1324 
1325 template <typename ValueT>
1326 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1327 __spirv_GroupNonUniformLogicalOr(__spv::Scope::Flag, unsigned int, ValueT,
1328  unsigned int);
1329 
1330 template <typename ValueT>
1331 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1332 __spirv_GroupNonUniformLogicalAnd(__spv::Scope::Flag, unsigned int, ValueT,
1333  unsigned int);
1334 
1335 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1336 __clc_BarrierInitialize(int64_t *state, int32_t expected_count) noexcept;
1337 
1338 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1339 __clc_BarrierInvalidate(int64_t *state) noexcept;
1340 
1341 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1342 __clc_BarrierArrive(int64_t *state) noexcept;
1343 
1344 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1345 __clc_BarrierArriveAndDrop(int64_t *state) noexcept;
1346 
1347 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1348 __clc_BarrierArriveNoComplete(int64_t *state, int32_t count) noexcept;
1349 
1350 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1351 __clc_BarrierArriveAndDropNoComplete(int64_t *state, int32_t count) noexcept;
1352 
1353 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1354 __clc_BarrierCopyAsyncArrive(int64_t *state) noexcept;
1355 
1356 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1357 __clc_BarrierCopyAsyncArriveNoInc(int64_t *state) noexcept;
1358 
1359 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1360 __clc_BarrierWait(int64_t *state, int64_t arrival) noexcept;
1361 
1362 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1363 __clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept;
1364 
1365 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1366 __clc_BarrierArriveAndWait(int64_t *state) noexcept;
1367 
1368 #ifdef __SYCL_USE_VARIADIC_SPIRV_OCL_PRINTF__
1369 extern __DPCPP_SYCL_EXTERNAL int
1370 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...);
1371 extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...);
1372 #else
1373 template <typename... Args>
1374 extern __DPCPP_SYCL_EXTERNAL int
1375 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format,
1376  Args... args);
1377 template <typename... Args>
1378 extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format,
1379  Args... args);
1380 #endif
1381 
1382 // Native builtin extension
1383 
1384 extern __DPCPP_SYCL_EXTERNAL float __clc_native_tanh(float);
1385 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 2>
1386  __clc_native_tanh(__ocl_vec_t<float, 2>);
1387 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 3>
1388  __clc_native_tanh(__ocl_vec_t<float, 3>);
1389 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 4>
1390  __clc_native_tanh(__ocl_vec_t<float, 4>);
1391 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 8>
1392  __clc_native_tanh(__ocl_vec_t<float, 8>);
1393 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 16>
1394  __clc_native_tanh(__ocl_vec_t<float, 16>);
1395 
1396 extern __DPCPP_SYCL_EXTERNAL _Float16 __clc_native_tanh(_Float16);
1397 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
1398  __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
1399 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
1400  __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
1401 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
1402  __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
1403 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
1404  __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
1405 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
1406  __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
1407 
1408 extern __DPCPP_SYCL_EXTERNAL _Float16 __clc_native_exp2(_Float16);
1409 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
1410  __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
1411 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
1412  __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
1413 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
1414  __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
1415 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
1416  __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
1417 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
1418  __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
1419 
1420 #define __CLC_BF16(...) \
1421  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \
1422  __VA_ARGS__) noexcept; \
1423  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \
1424  __VA_ARGS__, __VA_ARGS__) noexcept; \
1425  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \
1426  __VA_ARGS__, __VA_ARGS__) noexcept; \
1427  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \
1428  __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept;
1429 
1430 #define __CLC_BF16_SCAL_VEC(TYPE) \
1431  __CLC_BF16(TYPE) \
1432  __CLC_BF16(__ocl_vec_t<TYPE, 2>) \
1433  __CLC_BF16(__ocl_vec_t<TYPE, 3>) \
1434  __CLC_BF16(__ocl_vec_t<TYPE, 4>) \
1435  __CLC_BF16(__ocl_vec_t<TYPE, 8>) \
1436  __CLC_BF16(__ocl_vec_t<TYPE, 16>)
1437 
1438 __CLC_BF16_SCAL_VEC(uint16_t)
1439 __CLC_BF16_SCAL_VEC(uint32_t)
1440 
1441 #undef __CLC_BF16_SCAL_VEC
1442 #undef __CLC_BF16
1443 
1444 extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInGlobalHWThreadIDINTEL();
1445 extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInSubDeviceIDINTEL();
1446 extern __DPCPP_SYCL_EXTERNAL uint64_t __spirv_ReadClockKHR(int);
1447 
1448 template <typename from, typename to>
1449 extern __DPCPP_SYCL_EXTERNAL
1450  std::enable_if_t<std::is_integral_v<to> && std::is_unsigned_v<to>, to>
1451  __spirv_ConvertPtrToU(from val) noexcept;
1452 
1453 template <typename RetT, typename... ArgsT>
1454 extern __DPCPP_SYCL_EXTERNAL __spv::__spirv_TaskSequenceINTEL *
1455 __spirv_TaskSequenceCreateINTEL(RetT (*f)(ArgsT...), int Pipelined = -1,
1456  int ClusterMode = -1,
1457  unsigned int ResponseCapacity = 0,
1458  unsigned int InvocationCapacity = 0) noexcept;
1459 
1460 template <typename... ArgsT>
1461 extern __DPCPP_SYCL_EXTERNAL void
1462 __spirv_TaskSequenceAsyncINTEL(__spv::__spirv_TaskSequenceINTEL *TaskSequence,
1463  ArgsT... Args) noexcept;
1464 
1465 template <typename RetT>
1466 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_TaskSequenceGetINTEL(
1467  __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept;
1468 
1469 extern __DPCPP_SYCL_EXTERNAL void __spirv_TaskSequenceReleaseINTEL(
1470  __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept;
1471 
1472 #else // if !__SYCL_DEVICE_ONLY__
1473 
1474 template <typename dataT>
1477  const dataT *Src, size_t NumElements,
1478  size_t Stride, __ocl_event_t) noexcept {
1479  for (size_t i = 0; i < NumElements; i++) {
1480  Dest[i] = Src[i * Stride];
1481  }
1482  // A real instance of the class is not needed, return dummy pointer.
1483  return nullptr;
1484 }
1485 
1486 template <typename dataT>
1489  const dataT *Src, size_t NumElements,
1490  size_t Stride, __ocl_event_t) noexcept {
1491  for (size_t i = 0; i < NumElements; i++) {
1492  Dest[i * Stride] = Src[i];
1493  }
1494  // A real instance of the class is not needed, return dummy pointer.
1495  return nullptr;
1496 }
1497 
1498 extern __SYCL_EXPORT void __spirv_ocl_prefetch(const char *Ptr,
1499  size_t NumBytes) noexcept;
1500 
1501 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1503  uint32_t Semantics) noexcept;
1504 
1505 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1506 __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept;
1507 
1508 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1509 __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents,
1510  __ocl_event_t *WaitEvents) noexcept;
1511 #endif // !__SYCL_DEVICE_ONLY__
#define __DPCPP_SYCL_EXTERNAL
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:51
__attribute__((always_inline)) auto invoke_simd(sycl
The invoke_simd free function invokes a SIMD function using all work-items in a sub_group.
Definition: access.hpp:18
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1476
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:27
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:37
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, __ocl_event_t *WaitEvents) noexcept
Definition: spirv_ops.cpp:19
#define __SYCL_CONVERGENT__
Definition: spirv_ops.hpp:23
void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept
Definition: spirv_ops.cpp:48
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1488
void * __ocl_event_t
void * __ocl_sampler_t
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324