DPC++ Runtime
Runtime libraries for oneAPI DPC++
spirv_ops.hpp
Go to the documentation of this file.
1 //==----------- spirv_ops.hpp --- SPIRV operations -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_types.hpp> // for Scope, __ocl_event_t
12 #include <sycl/detail/defines_elementary.hpp> // for __DPCPP_SYCL_EXTERNAL
13 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
14 
15 #include <stddef.h> // for size_t
16 #include <stdint.h> // for uint32_t
17 #include <type_traits>
18 
19 // Convergent attribute
20 #ifdef __SYCL_DEVICE_ONLY__
21 #define __SYCL_CONVERGENT__ __attribute__((convergent))
22 #else
23 #define __SYCL_CONVERGENT__
24 #endif
25 
26 #ifdef __SYCL_DEVICE_ONLY__
27 
28 extern __DPCPP_SYCL_EXTERNAL float __spirv_RoundFToTF32INTEL(float a);
29 
30 template <typename T, typename Tp, std::size_t R, std::size_t C,
33  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
36  __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
37  __spv::MatrixLayout Layout = L,
38  __spv::Scope::Flag Sc = S, int MemOperand = 0);
39 
40 template <typename T, typename Tp, std::size_t R, std::size_t C,
43  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
44 extern __DPCPP_SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
46  std::size_t Stride, __spv::MatrixLayout Layout = L,
47  __spv::Scope::Flag Sc = S, int MemOperand = 0);
48 
49 template <typename T, typename Tp, std::size_t R, std::size_t C,
52  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
55  __spirv_CooperativeMatrixConstructCheckedINTEL(int32_t CoordX,
56  int32_t CoordY,
57  uint32_t Height,
58  uint32_t Width,
59  const T Value);
60 
61 template <typename T, typename Tp, std::size_t R, std::size_t C,
64  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
67  __spirv_CooperativeMatrixLoadCheckedINTEL(
68  T *Ptr, int32_t CoordX, int32_t CoordY, __spv::MatrixLayout Layout = L,
69  uint32_t Height = 0, uint32_t Width = 0, std::size_t Stride = 0,
70  int MemOperand = 0);
71 
72 template <typename T, typename Tp, std::size_t R, std::size_t C,
75  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
76 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL(
77  T *Ptr, int32_t CoordX, int32_t CoordY,
79  __spv::MatrixLayout Layout = L, uint32_t Height = 0, uint32_t Width = 0,
80  std::size_t Stride = 0, int MemOperand = 0);
81 
82 template <typename TA, typename TB, typename TC, std::size_t M, std::size_t K,
83  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
88  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
91  __spirv_JointMatrixMadINTEL(
95  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
96 
97 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
98  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
103  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
106  __spirv_JointMatrixUUMadINTEL(
110  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
111 
112 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
113  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
114  __spv::MatrixUse UC,
118  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
121  __spirv_JointMatrixUSMadINTEL(
125  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
126 
127 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
128  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
129  __spv::MatrixUse UC,
133  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
136  __spirv_JointMatrixSUMadINTEL(
140  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
141 
142 template <typename T, typename Tp, std::size_t R, std::size_t C,
145  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
148  __spirv_CompositeConstruct(const T v);
149 
150 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
152  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
153 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<uint32_t, 2>
154 __spirv_JointMatrixGetElementCoordINTEL(
156 
157 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
159  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
160 extern __DPCPP_SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
162 
163 template <typename Ts, typename T, std::size_t R, std::size_t C,
166  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
167 extern __DPCPP_SYCL_EXTERNAL Ts __spirv_VectorExtractDynamic(
169 
170 template <typename Ts, typename T, std::size_t R, std::size_t C,
173  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
175 __spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *,
176  Ts val, size_t i);
177 
178 template <typename T>
179 extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixPrefetchINTEL(
180  T *Ptr, uint32_t NumRows, uint32_t NumCols, unsigned int CacheLevel,
181  __spv::MatrixLayout Layout, size_t Stride);
182 
183 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
184 #error \
185  "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag."
186 #endif
187 
188 template <typename RetT, typename ImageT>
189 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT);
190 
191 template <typename RetT, typename ImageT>
192 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT);
193 
194 template <typename RetT, typename ImageT>
195 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT);
196 
197 template <typename ImageT, typename CoordT, typename ValT>
198 extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT);
199 
200 template <class RetT, typename ImageT, typename TempArgT>
201 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT);
202 
203 template <class RetT, typename ImageT, typename TempArgT>
204 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT);
205 
206 template <class RetT, typename ImageT, typename TempArgT>
207 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageFetch(ImageT, TempArgT);
208 
209 template <class RetT, typename ImageT, typename TempArgT>
210 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayFetch(ImageT, TempArgT,
211  int);
212 
213 template <class RetT, typename ImageT, typename TempArgT>
214 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageArrayFetch(ImageT,
215  TempArgT, int);
216 
217 template <class RetT, typename ImageT, typename TempArgT>
218 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayRead(ImageT, TempArgT, int);
219 
220 template <typename ImageT, typename CoordT, typename ValT>
221 extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageArrayWrite(ImageT, CoordT, int,
222  ValT);
223 
224 template <typename ImageT, typename SampledType>
225 extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT,
227 
228 template <typename SampledType, typename TempRetT, typename TempArgT>
229 extern __DPCPP_SYCL_EXTERNAL TempRetT
230 __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, float);
231 
232 template <typename SampledType, typename TempRetT, typename TempArgT>
233 extern __DPCPP_SYCL_EXTERNAL TempRetT
234 __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, TempArgT, TempArgT);
235 
236 template <typename SampledType, typename TempRetT, typename TempArgT>
237 extern __DPCPP_SYCL_EXTERNAL TempRetT __spirv_ImageSampleCubemap(SampledType,
238  TempArgT);
239 
240 template <typename RetT, class HandleT>
241 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToImageINTEL(HandleT);
242 
243 template <typename RetT, class HandleT>
244 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToSamplerINTEL(HandleT);
245 
246 template <typename RetT, class HandleT>
248  RetT __spirv_ConvertHandleToSampledImageINTEL(HandleT);
249 
250 #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy
251 #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy
252 
253 // Atomic SPIR-V builtins
254 #define __SPIRV_ATOMIC_LOAD(AS, Type) \
255  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \
256  AS const Type *P, __spv::Scope::Flag S, \
257  __spv::MemorySemanticsMask::Flag O);
258 #define __SPIRV_ATOMIC_STORE(AS, Type) \
259  extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \
260  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
261  Type V);
262 #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \
263  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \
264  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
265  Type V);
266 #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
267  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \
268  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \
269  __spv::MemorySemanticsMask::Flag U, Type V, Type C);
270 #define __SPIRV_ATOMIC_IADD(AS, Type) \
271  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \
272  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
273  Type V);
274 #define __SPIRV_ATOMIC_ISUB(AS, Type) \
275  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \
276  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
277  Type V);
278 #define __SPIRV_ATOMIC_FADD(AS, Type) \
279  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \
280  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
281  Type V);
282 #define __SPIRV_ATOMIC_SMIN(AS, Type) \
283  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \
284  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
285  Type V);
286 #define __SPIRV_ATOMIC_UMIN(AS, Type) \
287  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \
288  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
289  Type V);
290 #define __SPIRV_ATOMIC_FMIN(AS, Type) \
291  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
292  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
293  Type V);
294 #define __SPIRV_ATOMIC_SMAX(AS, Type) \
295  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \
296  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
297  Type V);
298 #define __SPIRV_ATOMIC_UMAX(AS, Type) \
299  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \
300  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
301  Type V);
302 #define __SPIRV_ATOMIC_FMAX(AS, Type) \
303  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
304  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
305  Type V);
306 #define __SPIRV_ATOMIC_AND(AS, Type) \
307  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \
308  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
309  Type V);
310 #define __SPIRV_ATOMIC_OR(AS, Type) \
311  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \
312  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
313  Type V);
314 #define __SPIRV_ATOMIC_XOR(AS, Type) \
315  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \
316  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
317  Type V);
318 
319 #define __SPIRV_ATOMIC_FLOAT(AS, Type) \
320  __SPIRV_ATOMIC_FADD(AS, Type) \
321  __SPIRV_ATOMIC_FMIN(AS, Type) \
322  __SPIRV_ATOMIC_FMAX(AS, Type) \
323  __SPIRV_ATOMIC_LOAD(AS, Type) \
324  __SPIRV_ATOMIC_STORE(AS, Type) \
325  __SPIRV_ATOMIC_EXCHANGE(AS, Type)
326 
327 #define __SPIRV_ATOMIC_BASE(AS, Type) \
328  __SPIRV_ATOMIC_FLOAT(AS, Type) \
329  __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
330  __SPIRV_ATOMIC_IADD(AS, Type) \
331  __SPIRV_ATOMIC_ISUB(AS, Type) \
332  __SPIRV_ATOMIC_AND(AS, Type) \
333  __SPIRV_ATOMIC_OR(AS, Type) \
334  __SPIRV_ATOMIC_XOR(AS, Type)
335 
336 #define __SPIRV_ATOMIC_SIGNED(AS, Type) \
337  __SPIRV_ATOMIC_BASE(AS, Type) \
338  __SPIRV_ATOMIC_SMIN(AS, Type) \
339  __SPIRV_ATOMIC_SMAX(AS, Type)
340 
341 #define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \
342  __SPIRV_ATOMIC_BASE(AS, Type) \
343  __SPIRV_ATOMIC_UMIN(AS, Type) \
344  __SPIRV_ATOMIC_UMAX(AS, Type)
345 
346 // Helper atomic operations which select correct signed/unsigned version
347 // of atomic min/max based on the type
348 #define __SPIRV_ATOMIC_MINMAX(AS, Op) \
349  template <typename T> \
350  typename std::enable_if_t< \
351  std::is_integral<T>::value && std::is_signed<T>::value, T> \
352  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
353  __spv::MemorySemanticsMask::Flag Semantics, \
354  T Value) { \
355  return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
356  } \
357  template <typename T> \
358  typename std::enable_if_t< \
359  std::is_integral<T>::value && !std::is_signed<T>::value, T> \
360  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
361  __spv::MemorySemanticsMask::Flag Semantics, \
362  T Value) { \
363  return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
364  } \
365  template <typename T> \
366  typename std::enable_if_t<std::is_floating_point<T>::value, T> \
367  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
368  __spv::MemorySemanticsMask::Flag Semantics, \
369  T Value) { \
370  return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
371  }
372 
373 #define __SPIRV_ATOMICS(macro, Arg) \
374  macro(__attribute__((opencl_global)), Arg) \
375  macro(__attribute__((opencl_local)), Arg) macro(, Arg)
376 
377 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float)
378 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double)
379 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int)
380 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long)
381 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long)
382 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int)
383 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long)
384 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long)
385 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
386 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
387 
388 #undef __SPIRV_ATOMICS
389 #undef __SPIRV_ATOMIC_AND
390 #undef __SPIRV_ATOMIC_BASE
391 #undef __SPIRV_ATOMIC_CMP_EXCHANGE
392 #undef __SPIRV_ATOMIC_EXCHANGE
393 #undef __SPIRV_ATOMIC_FADD
394 #undef __SPIRV_ATOMIC_FLOAT
395 #undef __SPIRV_ATOMIC_FMAX
396 #undef __SPIRV_ATOMIC_FMIN
397 #undef __SPIRV_ATOMIC_IADD
398 #undef __SPIRV_ATOMIC_ISUB
399 #undef __SPIRV_ATOMIC_LOAD
400 #undef __SPIRV_ATOMIC_MINMAX
401 #undef __SPIRV_ATOMIC_OR
402 #undef __SPIRV_ATOMIC_SIGNED
403 #undef __SPIRV_ATOMIC_SMAX
404 #undef __SPIRV_ATOMIC_SMIN
405 #undef __SPIRV_ATOMIC_STORE
406 #undef __SPIRV_ATOMIC_UMAX
407 #undef __SPIRV_ATOMIC_UMIN
408 #undef __SPIRV_ATOMIC_UNSIGNED
409 #undef __SPIRV_ATOMIC_XOR
410 
411 template <typename dataT>
412 extern __attribute__((opencl_global)) dataT *
413 __SYCL_GenericCastToPtrExplicit_ToGlobal(void *Ptr) noexcept {
414  return (__attribute__((opencl_global)) dataT *)
415  __spirv_GenericCastToPtrExplicit_ToGlobal(
417 }
418 
419 template <typename dataT>
420 extern const __attribute__((opencl_global)) dataT *
421 __SYCL_GenericCastToPtrExplicit_ToGlobal(const void *Ptr) noexcept {
422  return (const __attribute__((opencl_global)) dataT *)
423  __spirv_GenericCastToPtrExplicit_ToGlobal(
425 }
426 
427 template <typename dataT>
428 extern volatile __attribute__((opencl_global)) dataT *
429 __SYCL_GenericCastToPtrExplicit_ToGlobal(volatile void *Ptr) noexcept {
430  return (volatile __attribute__((opencl_global)) dataT *)
431  __spirv_GenericCastToPtrExplicit_ToGlobal(
433 }
434 
435 template <typename dataT>
436 extern const volatile __attribute__((opencl_global)) dataT *
437 __SYCL_GenericCastToPtrExplicit_ToGlobal(const volatile void *Ptr) noexcept {
438  return (const volatile __attribute__((opencl_global)) dataT *)
439  __spirv_GenericCastToPtrExplicit_ToGlobal(
441 }
442 
443 template <typename dataT>
444 extern __attribute__((opencl_local)) dataT *
445 __SYCL_GenericCastToPtrExplicit_ToLocal(void *Ptr) noexcept {
446  return (__attribute__((opencl_local)) dataT *)
447  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
449 }
450 
451 template <typename dataT>
452 extern const __attribute__((opencl_local)) dataT *
453 __SYCL_GenericCastToPtrExplicit_ToLocal(const void *Ptr) noexcept {
454  return (const __attribute__((opencl_local)) dataT *)
455  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
457 }
458 
459 template <typename dataT>
460 extern volatile __attribute__((opencl_local)) dataT *
461 __SYCL_GenericCastToPtrExplicit_ToLocal(volatile void *Ptr) noexcept {
462  return (volatile __attribute__((opencl_local)) dataT *)
463  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
465 }
466 
467 template <typename dataT>
468 extern const volatile __attribute__((opencl_local)) dataT *
469 __SYCL_GenericCastToPtrExplicit_ToLocal(const volatile void *Ptr) noexcept {
470  return (const volatile __attribute__((opencl_local)) dataT *)
471  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
473 }
474 
475 template <typename dataT>
476 extern __attribute__((opencl_private)) dataT *
477 __SYCL_GenericCastToPtrExplicit_ToPrivate(void *Ptr) noexcept {
478  return (__attribute__((opencl_private)) dataT *)
479  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
481 }
482 
483 template <typename dataT>
484 extern const __attribute__((opencl_private)) dataT *
485 __SYCL_GenericCastToPtrExplicit_ToPrivate(const void *Ptr) noexcept {
486  return (const __attribute__((opencl_private)) dataT *)
487  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
489 }
490 
491 template <typename dataT>
492 extern volatile __attribute__((opencl_private)) dataT *
493 __SYCL_GenericCastToPtrExplicit_ToPrivate(volatile void *Ptr) noexcept {
494  return (volatile __attribute__((opencl_private)) dataT *)
495  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
497 }
498 
499 template <typename dataT>
500 extern const volatile __attribute__((opencl_private)) dataT *
501 __SYCL_GenericCastToPtrExplicit_ToPrivate(const volatile void *Ptr) noexcept {
502  return (const volatile __attribute__((opencl_private)) dataT *)
503  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
505 }
506 
507 template <typename dataT>
508 extern __attribute__((opencl_global)) dataT *
509 __SYCL_GenericCastToPtr_ToGlobal(void *Ptr) noexcept {
510  return (__attribute__((opencl_global)) dataT *)
511  __spirv_GenericCastToPtr_ToGlobal(Ptr,
513 }
514 
515 template <typename dataT>
516 extern const __attribute__((opencl_global)) dataT *
517 __SYCL_GenericCastToPtr_ToGlobal(const void *Ptr) noexcept {
518  return (const __attribute__((opencl_global)) dataT *)
519  __spirv_GenericCastToPtr_ToGlobal(Ptr,
521 }
522 
523 template <typename dataT>
524 extern volatile __attribute__((opencl_global)) dataT *
525 __SYCL_GenericCastToPtr_ToGlobal(volatile void *Ptr) noexcept {
526  return (volatile __attribute__((opencl_global)) dataT *)
527  __spirv_GenericCastToPtr_ToGlobal(Ptr,
529 }
530 
531 template <typename dataT>
532 extern const volatile __attribute__((opencl_global)) dataT *
533 __SYCL_GenericCastToPtr_ToGlobal(const volatile void *Ptr) noexcept {
534  return (const volatile __attribute__((opencl_global)) dataT *)
535  __spirv_GenericCastToPtr_ToGlobal(Ptr,
537 }
538 
539 template <typename dataT>
540 extern __attribute__((opencl_local)) dataT *
541 __SYCL_GenericCastToPtr_ToLocal(void *Ptr) noexcept {
542  return (__attribute__((opencl_local)) dataT *)
543  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
544 }
545 
546 template <typename dataT>
547 extern const __attribute__((opencl_local)) dataT *
548 __SYCL_GenericCastToPtr_ToLocal(const void *Ptr) noexcept {
549  return (const __attribute__((opencl_local)) dataT *)
550  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
551 }
552 
553 template <typename dataT>
554 extern volatile __attribute__((opencl_local)) dataT *
555 __SYCL_GenericCastToPtr_ToLocal(volatile void *Ptr) noexcept {
556  return (volatile __attribute__((opencl_local)) dataT *)
557  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
558 }
559 
560 template <typename dataT>
561 extern const volatile __attribute__((opencl_local)) dataT *
562 __SYCL_GenericCastToPtr_ToLocal(const volatile void *Ptr) noexcept {
563  return (const volatile __attribute__((opencl_local)) dataT *)
564  __spirv_GenericCastToPtr_ToLocal(Ptr, __spv::StorageClass::Workgroup);
565 }
566 
567 template <typename dataT>
568 extern __attribute__((opencl_private)) dataT *
569 __SYCL_GenericCastToPtr_ToPrivate(void *Ptr) noexcept {
570  return (__attribute__((opencl_private)) dataT *)
571  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
572 }
573 
574 template <typename dataT>
575 extern const __attribute__((opencl_private)) dataT *
576 __SYCL_GenericCastToPtr_ToPrivate(const void *Ptr) noexcept {
577  return (const __attribute__((opencl_private)) dataT *)
578  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
579 }
580 
581 template <typename dataT>
582 extern volatile __attribute__((opencl_private)) dataT *
583 __SYCL_GenericCastToPtr_ToPrivate(volatile void *Ptr) noexcept {
584  return (volatile __attribute__((opencl_private)) dataT *)
585  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
586 }
587 
588 template <typename dataT>
589 extern const volatile __attribute__((opencl_private)) dataT *
590 __SYCL_GenericCastToPtr_ToPrivate(const volatile void *Ptr) noexcept {
591  return (const volatile __attribute__((opencl_private)) dataT *)
592  __spirv_GenericCastToPtr_ToPrivate(Ptr, __spv::StorageClass::Function);
593 }
594 
595 template <typename dataT>
597 __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;
598 template <typename dataT>
600 __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next,
601  uint32_t Delta) noexcept;
602 template <typename dataT>
604 __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current,
605  uint32_t Delta) noexcept;
606 template <typename dataT>
608 __spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept;
609 
610 template <typename dataT>
612 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
613  uint8_t *Ptr) noexcept;
614 
615 template <typename dataT>
617 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint8_t *Ptr,
618  dataT Data) noexcept;
619 
620 template <typename dataT>
622 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
623  uint16_t *Ptr) noexcept;
624 
625 template <typename dataT>
627 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr,
628  dataT Data) noexcept;
629 
630 template <typename dataT>
632 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
633  uint32_t *Ptr) noexcept;
634 
635 template <typename dataT>
637 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr,
638  dataT Data) noexcept;
639 
640 template <typename dataT>
642 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
643  uint64_t *Ptr) noexcept;
644 
645 template <typename dataT>
647 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint64_t *Ptr,
648  dataT Data) noexcept;
649 template <int W, int rW>
650 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
651 __spirv_FixedSqrtINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
652  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
653 template <int W, int rW>
654 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
655 __spirv_FixedRecipINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
656  int32_t rI, int32_t Quantization = 0,
657  int32_t Overflow = 0) noexcept;
658 template <int W, int rW>
659 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
660 __spirv_FixedRsqrtINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
661  int32_t rI, int32_t Quantization = 0,
662  int32_t Overflow = 0) noexcept;
663 template <int W, int rW>
664 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
665 __spirv_FixedSinINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
666  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
667 template <int W, int rW>
668 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
669 __spirv_FixedCosINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
670  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
671 template <int W, int rW>
672 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW>
673 __spirv_FixedSinCosINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
674  int32_t rI, int32_t Quantization = 0,
675  int32_t Overflow = 0) noexcept;
676 template <int W, int rW>
677 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
678 __spirv_FixedSinPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
679  int32_t rI, int32_t Quantization = 0,
680  int32_t Overflow = 0) noexcept;
681 template <int W, int rW>
682 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
683 __spirv_FixedCosPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
684  int32_t rI, int32_t Quantization = 0,
685  int32_t Overflow = 0) noexcept;
686 template <int W, int rW>
687 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW>
688 __spirv_FixedSinCosPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
689  int32_t rI, int32_t Quantization = 0,
690  int32_t Overflow = 0) noexcept;
691 template <int W, int rW>
692 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
693 __spirv_FixedLogINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
694  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
695 template <int W, int rW>
696 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
697 __spirv_FixedExpINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
698  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
699 
700 // In the following built-ins width of arbitrary precision integer type for
701 // a floating point variable should be equal to sum of corresponding
702 // exponent width E, mantissa width M and 1 for sign bit. I.e. WA = EA + MA + 1.
703 template <int WA, int Wout>
704 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
705 __spirv_ArbitraryFloatCastINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
706  int32_t Mout, int32_t EnableSubnormals = 0,
707  int32_t RoundingMode = 0,
708  int32_t RoundingAccuracy = 0) noexcept;
709 
710 template <int WA, int Wout>
711 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
712 __spirv_ArbitraryFloatCastFromIntINTEL(sycl::detail::ap_int<WA> A, int32_t Mout,
713  bool FromSign = false,
714  int32_t EnableSubnormals = 0,
715  int32_t RoundingMode = 0,
716  int32_t RoundingAccuracy = 0) noexcept;
717 
718 template <int WA, int Wout>
719 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
720 __spirv_ArbitraryFloatCastToIntINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
721  bool ToSign = false,
722  int32_t EnableSubnormals = 0,
723  int32_t RoundingMode = 0,
724  int32_t RoundingAccuracy = 0) noexcept;
725 
726 template <int WA, int WB, int Wout>
727 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
728 __spirv_ArbitraryFloatAddINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
729  sycl::detail::ap_int<WB> B, int32_t MB,
730  int32_t Mout, int32_t EnableSubnormals = 0,
731  int32_t RoundingMode = 0,
732  int32_t RoundingAccuracy = 0) noexcept;
733 
734 template <int WA, int WB, int Wout>
735 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
736 __spirv_ArbitraryFloatSubINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
737  sycl::detail::ap_int<WB> B, int32_t MB,
738  int32_t Mout, int32_t EnableSubnormals = 0,
739  int32_t RoundingMode = 0,
740  int32_t RoundingAccuracy = 0) noexcept;
741 
742 template <int WA, int WB, int Wout>
743 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
744 __spirv_ArbitraryFloatMulINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
745  sycl::detail::ap_int<WB> B, int32_t MB,
746  int32_t Mout, int32_t EnableSubnormals = 0,
747  int32_t RoundingMode = 0,
748  int32_t RoundingAccuracy = 0) noexcept;
749 
750 template <int WA, int WB, int Wout>
751 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
752 __spirv_ArbitraryFloatDivINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
753  sycl::detail::ap_int<WB> B, int32_t MB,
754  int32_t Mout, int32_t EnableSubnormals = 0,
755  int32_t RoundingMode = 0,
756  int32_t RoundingAccuracy = 0) noexcept;
757 
758 // Comparison built-ins don't use Subnormal Support, Rounding Mode and
759 // Rounding Accuracy.
760 template <int WA, int WB>
761 extern __DPCPP_SYCL_EXTERNAL bool
762 __spirv_ArbitraryFloatGTINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
763  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
764 
765 template <int WA, int WB>
766 extern __DPCPP_SYCL_EXTERNAL bool
767 __spirv_ArbitraryFloatGEINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
768  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
769 
770 template <int WA, int WB>
771 extern __DPCPP_SYCL_EXTERNAL bool
772 __spirv_ArbitraryFloatLTINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
773  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
774 
775 template <int WA, int WB>
776 extern __DPCPP_SYCL_EXTERNAL bool
777 __spirv_ArbitraryFloatLEINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
778  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
779 
780 template <int WA, int WB>
781 extern __DPCPP_SYCL_EXTERNAL bool
782 __spirv_ArbitraryFloatEQINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
783  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
784 
785 template <int WA, int Wout>
786 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
787 __spirv_ArbitraryFloatRecipINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
788  int32_t Mout, int32_t EnableSubnormals = 0,
789  int32_t RoundingMode = 0,
790  int32_t RoundingAccuracy = 0) noexcept;
791 
792 template <int WA, int Wout>
793 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
794 __spirv_ArbitraryFloatRSqrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
795  int32_t Mout, int32_t EnableSubnormals = 0,
796  int32_t RoundingMode = 0,
797  int32_t RoundingAccuracy = 0) noexcept;
798 
799 template <int WA, int Wout>
800 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
801 __spirv_ArbitraryFloatCbrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
802  int32_t Mout, int32_t EnableSubnormals = 0,
803  int32_t RoundingMode = 0,
804  int32_t RoundingAccuracy = 0) noexcept;
805 
806 template <int WA, int WB, int Wout>
807 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
808 __spirv_ArbitraryFloatHypotINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
809  sycl::detail::ap_int<WB> B, int32_t MB,
810  int32_t Mout, int32_t EnableSubnormals = 0,
811  int32_t RoundingMode = 0,
812  int32_t RoundingAccuracy = 0) noexcept;
813 
814 template <int WA, int Wout>
815 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
816 __spirv_ArbitraryFloatSqrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
817  int32_t Mout, int32_t EnableSubnormals = 0,
818  int32_t RoundingMode = 0,
819  int32_t RoundingAccuracy = 0) noexcept;
820 
821 template <int WA, int Wout>
822 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
823 __spirv_ArbitraryFloatLogINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
824  int32_t Mout, int32_t EnableSubnormals = 0,
825  int32_t RoundingMode = 0,
826  int32_t RoundingAccuracy = 0) noexcept;
827 
828 template <int WA, int Wout>
829 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
830 __spirv_ArbitraryFloatLog2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
831  int32_t Mout, int32_t EnableSubnormals = 0,
832  int32_t RoundingMode = 0,
833  int32_t RoundingAccuracy = 0) noexcept;
834 
835 template <int WA, int Wout>
836 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
837 __spirv_ArbitraryFloatLog10INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
838  int32_t Mout, int32_t EnableSubnormals = 0,
839  int32_t RoundingMode = 0,
840  int32_t RoundingAccuracy = 0) noexcept;
841 
842 template <int WA, int Wout>
843 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
844 __spirv_ArbitraryFloatLog1pINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
845  int32_t Mout, int32_t EnableSubnormals = 0,
846  int32_t RoundingMode = 0,
847  int32_t RoundingAccuracy = 0) noexcept;
848 
849 template <int WA, int Wout>
850 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
851 __spirv_ArbitraryFloatExpINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
852  int32_t Mout, int32_t EnableSubnormals = 0,
853  int32_t RoundingMode = 0,
854  int32_t RoundingAccuracy = 0) noexcept;
855 
856 template <int WA, int Wout>
857 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
858 __spirv_ArbitraryFloatExp2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
859  int32_t Mout, int32_t EnableSubnormals = 0,
860  int32_t RoundingMode = 0,
861  int32_t RoundingAccuracy = 0) noexcept;
862 
863 template <int WA, int Wout>
864 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
865 __spirv_ArbitraryFloatExp10INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
866  int32_t Mout, int32_t EnableSubnormals = 0,
867  int32_t RoundingMode = 0,
868  int32_t RoundingAccuracy = 0) noexcept;
869 
870 template <int WA, int Wout>
871 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
872 __spirv_ArbitraryFloatExpm1INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
873  int32_t Mout, int32_t EnableSubnormals = 0,
874  int32_t RoundingMode = 0,
875  int32_t RoundingAccuracy = 0) noexcept;
876 
877 template <int WA, int Wout>
878 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
879 __spirv_ArbitraryFloatSinINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
880  int32_t Mout, int32_t EnableSubnormals = 0,
881  int32_t RoundingMode = 0,
882  int32_t RoundingAccuracy = 0) noexcept;
883 
884 template <int WA, int Wout>
885 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
886 __spirv_ArbitraryFloatCosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
887  int32_t Mout, int32_t EnableSubnormals = 0,
888  int32_t RoundingMode = 0,
889  int32_t RoundingAccuracy = 0) noexcept;
890 
891 // Result value contains both values of sine and cosine and so has the size of
892 // 2 * Wout where Wout is equal to (1 + Eout + Mout).
893 template <int WA, int Wout>
894 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout>
895 __spirv_ArbitraryFloatSinCosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
896  int32_t Mout, int32_t EnableSubnormals = 0,
897  int32_t RoundingMode = 0,
898  int32_t RoundingAccuracy = 0) noexcept;
899 
900 template <int WA, int Wout>
901 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
902 __spirv_ArbitraryFloatSinPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
903  int32_t Mout, int32_t EnableSubnormals = 0,
904  int32_t RoundingMode = 0,
905  int32_t RoundingAccuracy = 0) noexcept;
906 
907 template <int WA, int Wout>
908 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
909 __spirv_ArbitraryFloatCosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
910  int32_t Mout, int32_t EnableSubnormals = 0,
911  int32_t RoundingMode = 0,
912  int32_t RoundingAccuracy = 0) noexcept;
913 
914 // Result value contains both values of sine(A*pi) and cosine(A*pi) and so has
915 // the size of 2 * Wout where Wout is equal to (1 + Eout + Mout).
916 template <int WA, int Wout>
917 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout>
918 __spirv_ArbitraryFloatSinCosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
919  int32_t Mout, int32_t EnableSubnormals = 0,
920  int32_t RoundingMode = 0,
921  int32_t RoundingAccuracy = 0) noexcept;
922 
923 template <int WA, int Wout>
924 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
925 __spirv_ArbitraryFloatASinINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
926  int32_t Mout, int32_t EnableSubnormals = 0,
927  int32_t RoundingMode = 0,
928  int32_t RoundingAccuracy = 0) noexcept;
929 
930 template <int WA, int Wout>
931 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
932 __spirv_ArbitraryFloatASinPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
933  int32_t Mout, int32_t EnableSubnormals = 0,
934  int32_t RoundingMode = 0,
935  int32_t RoundingAccuracy = 0) noexcept;
936 
937 template <int WA, int Wout>
938 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
939 __spirv_ArbitraryFloatACosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
940  int32_t Mout, int32_t EnableSubnormals = 0,
941  int32_t RoundingMode = 0,
942  int32_t RoundingAccuracy = 0) noexcept;
943 
944 template <int WA, int Wout>
945 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
946 __spirv_ArbitraryFloatACosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
947  int32_t Mout, int32_t EnableSubnormals = 0,
948  int32_t RoundingMode = 0,
949  int32_t RoundingAccuracy = 0) noexcept;
950 
951 template <int WA, int Wout>
952 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
953 __spirv_ArbitraryFloatATanINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
954  int32_t Mout, int32_t EnableSubnormals = 0,
955  int32_t RoundingMode = 0,
956  int32_t RoundingAccuracy = 0) noexcept;
957 
958 template <int WA, int Wout>
959 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
960 __spirv_ArbitraryFloatATanPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
961  int32_t Mout, int32_t EnableSubnormals = 0,
962  int32_t RoundingMode = 0,
963  int32_t RoundingAccuracy = 0) noexcept;
964 
965 template <int WA, int WB, int Wout>
966 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
967 __spirv_ArbitraryFloatATan2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
968  sycl::detail::ap_int<WB> B, int32_t MB,
969  int32_t Mout, int32_t EnableSubnormals = 0,
970  int32_t RoundingMode = 0,
971  int32_t RoundingAccuracy = 0) noexcept;
972 
973 template <int WA, int WB, int Wout>
974 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
975 __spirv_ArbitraryFloatPowINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
976  sycl::detail::ap_int<WB> B, int32_t MB,
977  int32_t Mout, int32_t EnableSubnormals = 0,
978  int32_t RoundingMode = 0,
979  int32_t RoundingAccuracy = 0) noexcept;
980 
981 template <int WA, int WB, int Wout>
982 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
983 __spirv_ArbitraryFloatPowRINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
984  sycl::detail::ap_int<WB> B, int32_t MB,
985  int32_t Mout, int32_t EnableSubnormals = 0,
986  int32_t RoundingMode = 0,
987  int32_t RoundingAccuracy = 0) noexcept;
988 
989 // PowN built-in calculates `A^B` where `A` is arbitrary precision floating
990 // point number and `B` is signed or unsigned arbitrary precision integer,
991 // i.e. its width doesn't depend on sum of exponent and mantissa.
992 template <int WA, int WB, int Wout>
993 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
994 __spirv_ArbitraryFloatPowNINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
995  sycl::detail::ap_int<WB> B, bool SignOfB,
996  int32_t Mout, int32_t EnableSubnormals = 0,
997  int32_t RoundingMode = 0,
998  int32_t RoundingAccuracy = 0) noexcept;
999 
1000 template <typename dataT>
1001 extern __DPCPP_SYCL_EXTERNAL int32_t
1002 __spirv_ReadPipe(__ocl_RPipeTy<dataT> Pipe, dataT *Data, int32_t Size,
1003  int32_t Alignment) noexcept;
1004 template <typename dataT>
1005 extern __DPCPP_SYCL_EXTERNAL int32_t
1006 __spirv_WritePipe(__ocl_WPipeTy<dataT> Pipe, const dataT *Data, int32_t Size,
1007  int32_t Alignment) noexcept;
1008 template <typename dataT>
1009 extern __DPCPP_SYCL_EXTERNAL void
1010 __spirv_ReadPipeBlockingINTEL(__ocl_RPipeTy<dataT> Pipe, dataT *Data,
1011  int32_t Size, int32_t Alignment) noexcept;
1012 template <typename dataT>
1013 extern __DPCPP_SYCL_EXTERNAL void
1014 __spirv_WritePipeBlockingINTEL(__ocl_WPipeTy<dataT> Pipe, const dataT *Data,
1015  int32_t Size, int32_t Alignment) noexcept;
1016 template <typename dataT>
1017 extern __DPCPP_SYCL_EXTERNAL __ocl_RPipeTy<dataT>
1018 __spirv_CreatePipeFromPipeStorage_read(
1019  const ConstantPipeStorage *Storage) noexcept;
1020 template <typename dataT>
1021 extern __DPCPP_SYCL_EXTERNAL __ocl_WPipeTy<dataT>
1022 __spirv_CreatePipeFromPipeStorage_write(
1023  const ConstantPipeStorage *Storage) noexcept;
1024 
1025 extern __DPCPP_SYCL_EXTERNAL void
1026 __spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr,
1027  size_t NumBytes) noexcept;
1028 
1029 extern __DPCPP_SYCL_EXTERNAL float
1030  __spirv_ConvertBF16ToFINTEL(uint16_t) noexcept;
1031 extern __DPCPP_SYCL_EXTERNAL uint16_t
1032 __spirv_ConvertFToBF16INTEL(float) noexcept;
1033 template <int N>
1034 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, N>
1035  __spirv_ConvertBF16ToFINTEL(__ocl_vec_t<uint16_t, N>) noexcept;
1036 template <int N>
1037 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<uint16_t, N>
1038  __spirv_ConvertFToBF16INTEL(__ocl_vec_t<float, N>) noexcept;
1039 
1041  __SYCL_EXPORT __ocl_vec_t<uint32_t, 4>
1042  __spirv_GroupNonUniformBallot(uint32_t Execution, bool Predicate) noexcept;
1043 
1044 // TODO: I'm not 100% sure that these NonUniform instructions should be
1045 // convergent Following precedent set for GroupNonUniformBallot above
1046 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT uint32_t
1047 __spirv_GroupNonUniformBallotBitCount(__spv::Scope::Flag, int,
1048  __ocl_vec_t<uint32_t, 4>) noexcept;
1049 
1050 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int
1051  __spirv_GroupNonUniformBallotFindLSB(__spv::Scope::Flag,
1052  __ocl_vec_t<uint32_t, 4>) noexcept;
1053 
1054 template <typename ValueT, typename IdT>
1055 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1056  __spirv_GroupNonUniformBroadcast(__spv::Scope::Flag, ValueT, IdT);
1057 
1058 template <typename ValueT, typename IdT>
1059 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1060  __spirv_GroupNonUniformShuffle(__spv::Scope::Flag, ValueT, IdT) noexcept;
1061 
1062 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1063 __spirv_GroupNonUniformAll(__spv::Scope::Flag, bool);
1064 
1065 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1066 __spirv_GroupNonUniformAny(__spv::Scope::Flag, bool);
1067 
1068 template <typename ValueT>
1069 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1070 __spirv_GroupNonUniformSMin(__spv::Scope::Flag, unsigned int, ValueT);
1071 
1072 template <typename ValueT>
1073 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1074 __spirv_GroupNonUniformUMin(__spv::Scope::Flag, unsigned int, ValueT);
1075 
1076 template <typename ValueT>
1077 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1078 __spirv_GroupNonUniformFMin(__spv::Scope::Flag, unsigned int, ValueT);
1079 
1080 template <typename ValueT>
1081 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1082 __spirv_GroupNonUniformSMax(__spv::Scope::Flag, unsigned int, ValueT);
1083 
1084 template <typename ValueT>
1085 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1086 __spirv_GroupNonUniformUMax(__spv::Scope::Flag, unsigned int, ValueT);
1087 
1088 template <typename ValueT>
1089 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1090 __spirv_GroupNonUniformFMax(__spv::Scope::Flag, unsigned int, ValueT);
1091 
1092 template <typename ValueT>
1093 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1094 __spirv_GroupNonUniformIAdd(__spv::Scope::Flag, unsigned int, ValueT);
1095 
1096 template <typename ValueT>
1097 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1098 __spirv_GroupNonUniformFAdd(__spv::Scope::Flag, unsigned int, ValueT);
1099 
1100 template <typename ValueT>
1101 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1102 __spirv_GroupNonUniformIMul(__spv::Scope::Flag, unsigned int, ValueT);
1103 
1104 template <typename ValueT>
1105 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1106 __spirv_GroupNonUniformFMul(__spv::Scope::Flag, unsigned int, ValueT);
1107 
1108 template <typename ValueT>
1109 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1110 __spirv_GroupNonUniformBitwiseOr(__spv::Scope::Flag, unsigned int, ValueT);
1111 
1112 template <typename ValueT>
1113 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1114 __spirv_GroupNonUniformBitwiseXor(__spv::Scope::Flag, unsigned int, ValueT);
1115 
1116 template <typename ValueT>
1117 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1118 __spirv_GroupNonUniformBitwiseAnd(__spv::Scope::Flag, unsigned int, ValueT);
1119 
1120 template <typename ValueT>
1121 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1122 __spirv_GroupNonUniformLogicalOr(__spv::Scope::Flag, unsigned int, ValueT);
1123 
1124 template <typename ValueT>
1125 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1126 __spirv_GroupNonUniformLogicalAnd(__spv::Scope::Flag, unsigned int, ValueT);
1127 
1128 template <typename ValueT>
1129 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1130 __spirv_GroupNonUniformSMin(__spv::Scope::Flag, unsigned int, ValueT,
1131  unsigned int);
1132 
1133 template <typename ValueT>
1134 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1135 __spirv_GroupNonUniformUMin(__spv::Scope::Flag, unsigned int, ValueT,
1136  unsigned int);
1137 
1138 template <typename ValueT>
1139 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1140 __spirv_GroupNonUniformFMin(__spv::Scope::Flag, unsigned int, ValueT,
1141  unsigned int);
1142 
1143 template <typename ValueT>
1144 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1145 __spirv_GroupNonUniformSMax(__spv::Scope::Flag, unsigned int, ValueT,
1146  unsigned int);
1147 
1148 template <typename ValueT>
1149 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1150 __spirv_GroupNonUniformUMax(__spv::Scope::Flag, unsigned int, ValueT,
1151  unsigned int);
1152 
1153 template <typename ValueT>
1154 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1155 __spirv_GroupNonUniformFMax(__spv::Scope::Flag, unsigned int, ValueT,
1156  unsigned int);
1157 
1158 template <typename ValueT>
1159 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1160 __spirv_GroupNonUniformIAdd(__spv::Scope::Flag, unsigned int, ValueT,
1161  unsigned int);
1162 
1163 template <typename ValueT>
1164 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1165 __spirv_GroupNonUniformFAdd(__spv::Scope::Flag, unsigned int, ValueT,
1166  unsigned int);
1167 
1168 template <typename ValueT>
1169 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1170 __spirv_GroupNonUniformIMul(__spv::Scope::Flag, unsigned int, ValueT,
1171  unsigned int);
1172 
1173 template <typename ValueT>
1174 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1175 __spirv_GroupNonUniformFMul(__spv::Scope::Flag, unsigned int, ValueT,
1176  unsigned int);
1177 
1178 template <typename ValueT>
1179 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1180 __spirv_GroupNonUniformBitwiseOr(__spv::Scope::Flag, unsigned int, ValueT,
1181  unsigned int);
1182 
1183 template <typename ValueT>
1184 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1185 __spirv_GroupNonUniformBitwiseXor(__spv::Scope::Flag, unsigned int, ValueT,
1186  unsigned int);
1187 
1188 template <typename ValueT>
1189 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1190 __spirv_GroupNonUniformBitwiseAnd(__spv::Scope::Flag, unsigned int, ValueT,
1191  unsigned int);
1192 
1193 template <typename ValueT>
1194 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1195 __spirv_GroupNonUniformLogicalOr(__spv::Scope::Flag, unsigned int, ValueT,
1196  unsigned int);
1197 
1198 template <typename ValueT>
1199 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1200 __spirv_GroupNonUniformLogicalAnd(__spv::Scope::Flag, unsigned int, ValueT,
1201  unsigned int);
1202 
1203 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1204 __clc_BarrierInitialize(int64_t *state, int32_t expected_count) noexcept;
1205 
1206 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1207 __clc_BarrierInvalidate(int64_t *state) noexcept;
1208 
1209 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1210 __clc_BarrierArrive(int64_t *state) noexcept;
1211 
1212 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1213 __clc_BarrierArriveAndDrop(int64_t *state) noexcept;
1214 
1215 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1216 __clc_BarrierArriveNoComplete(int64_t *state, int32_t count) noexcept;
1217 
1218 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1219 __clc_BarrierArriveAndDropNoComplete(int64_t *state, int32_t count) noexcept;
1220 
1221 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1222 __clc_BarrierCopyAsyncArrive(int64_t *state) noexcept;
1223 
1224 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1225 __clc_BarrierCopyAsyncArriveNoInc(int64_t *state) noexcept;
1226 
1227 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1228 __clc_BarrierWait(int64_t *state, int64_t arrival) noexcept;
1229 
1230 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1231 __clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept;
1232 
1233 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1234 __clc_BarrierArriveAndWait(int64_t *state) noexcept;
1235 
1236 #ifdef __SYCL_USE_VARIADIC_SPIRV_OCL_PRINTF__
1237 extern __DPCPP_SYCL_EXTERNAL int
1238 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...);
1239 extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...);
1240 #else
1241 template <typename... Args>
1242 extern __DPCPP_SYCL_EXTERNAL int
1243 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format,
1244  Args... args);
1245 template <typename... Args>
1246 extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format,
1247  Args... args);
1248 #endif
1249 
1250 // Native builtin extension
1251 
1252 extern __DPCPP_SYCL_EXTERNAL float __clc_native_tanh(float);
1253 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 2>
1254  __clc_native_tanh(__ocl_vec_t<float, 2>);
1255 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 3>
1256  __clc_native_tanh(__ocl_vec_t<float, 3>);
1257 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 4>
1258  __clc_native_tanh(__ocl_vec_t<float, 4>);
1259 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 8>
1260  __clc_native_tanh(__ocl_vec_t<float, 8>);
1261 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 16>
1262  __clc_native_tanh(__ocl_vec_t<float, 16>);
1263 
1264 extern __DPCPP_SYCL_EXTERNAL _Float16 __clc_native_tanh(_Float16);
1265 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
1266  __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
1267 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
1268  __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
1269 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
1270  __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
1271 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
1272  __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
1273 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
1274  __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
1275 
1276 extern __DPCPP_SYCL_EXTERNAL _Float16 __clc_native_exp2(_Float16);
1277 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
1278  __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
1279 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
1280  __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
1281 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
1282  __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
1283 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
1284  __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
1285 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
1286  __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
1287 
1288 #define __CLC_BF16(...) \
1289  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \
1290  __VA_ARGS__) noexcept; \
1291  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \
1292  __VA_ARGS__, __VA_ARGS__) noexcept; \
1293  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \
1294  __VA_ARGS__, __VA_ARGS__) noexcept; \
1295  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \
1296  __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept;
1297 
1298 #define __CLC_BF16_SCAL_VEC(TYPE) \
1299  __CLC_BF16(TYPE) \
1300  __CLC_BF16(__ocl_vec_t<TYPE, 2>) \
1301  __CLC_BF16(__ocl_vec_t<TYPE, 3>) \
1302  __CLC_BF16(__ocl_vec_t<TYPE, 4>) \
1303  __CLC_BF16(__ocl_vec_t<TYPE, 8>) \
1304  __CLC_BF16(__ocl_vec_t<TYPE, 16>)
1305 
1306 __CLC_BF16_SCAL_VEC(uint16_t)
1307 __CLC_BF16_SCAL_VEC(uint32_t)
1308 
1309 #undef __CLC_BF16_SCAL_VEC
1310 #undef __CLC_BF16
1311 
1312 extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInGlobalHWThreadIDINTEL();
1313 extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInSubDeviceIDINTEL();
1314 extern __DPCPP_SYCL_EXTERNAL uint64_t __spirv_ReadClockKHR(int);
1315 
1316 template <typename from, typename to>
1317 extern __DPCPP_SYCL_EXTERNAL
1318  std::enable_if_t<std::is_integral_v<to> && std::is_unsigned_v<to>, to>
1319  __spirv_ConvertPtrToU(from val) noexcept;
1320 
1321 template <typename RetT, typename... ArgsT>
1322 extern __DPCPP_SYCL_EXTERNAL __spv::__spirv_TaskSequenceINTEL *
1323 __spirv_TaskSequenceCreateINTEL(RetT (*f)(ArgsT...), int Pipelined = -1,
1324  int ClusterMode = -1,
1325  unsigned int ResponseCapacity = 0,
1326  unsigned int InvocationCapacity = 0) noexcept;
1327 
1328 template <typename... ArgsT>
1329 extern __DPCPP_SYCL_EXTERNAL void
1330 __spirv_TaskSequenceAsyncINTEL(__spv::__spirv_TaskSequenceINTEL *TaskSequence,
1331  ArgsT... Args) noexcept;
1332 
1333 template <typename RetT>
1334 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_TaskSequenceGetINTEL(
1335  __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept;
1336 
1337 extern __DPCPP_SYCL_EXTERNAL void __spirv_TaskSequenceReleaseINTEL(
1338  __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept;
1339 
1340 #else // if !__SYCL_DEVICE_ONLY__
1341 
1342 template <typename dataT>
1345  const dataT *Src, size_t NumElements,
1346  size_t Stride, __ocl_event_t) noexcept {
1347  for (size_t i = 0; i < NumElements; i++) {
1348  Dest[i] = Src[i * Stride];
1349  }
1350  // A real instance of the class is not needed, return dummy pointer.
1351  return nullptr;
1352 }
1353 
1354 template <typename dataT>
1357  const dataT *Src, size_t NumElements,
1358  size_t Stride, __ocl_event_t) noexcept {
1359  for (size_t i = 0; i < NumElements; i++) {
1360  Dest[i * Stride] = Src[i];
1361  }
1362  // A real instance of the class is not needed, return dummy pointer.
1363  return nullptr;
1364 }
1365 
1366 extern __SYCL_EXPORT void __spirv_ocl_prefetch(const char *Ptr,
1367  size_t NumBytes) noexcept;
1368 
1369 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1371  uint32_t Semantics) noexcept;
1372 
1373 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1374 __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept;
1375 
1376 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1377 __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents,
1378  __ocl_event_t *WaitEvents) noexcept;
1379 #endif // !__SYCL_DEVICE_ONLY__
#define __DPCPP_SYCL_EXTERNAL
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
__attribute__((always_inline)) auto invoke_simd(sycl
The invoke_simd free function invokes a SIMD function using all work-items in a sub_group.
Definition: access.hpp:18
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1344
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:27
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:37
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, __ocl_event_t *WaitEvents) noexcept
Definition: spirv_ops.cpp:19
#define __SYCL_CONVERGENT__
Definition: spirv_ops.hpp:23
void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept
Definition: spirv_ops.cpp:48
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1356
void * __ocl_event_t
void * __ocl_sampler_t
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324