DPC++ Runtime
Runtime libraries for oneAPI DPC++
spirv_ops.hpp
Go to the documentation of this file.
1 //==----------- spirv_ops.hpp --- SPIRV operations -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_types.hpp> // for Scope, __ocl_event_t
12 #include <sycl/detail/defines_elementary.hpp> // for __DPCPP_SYCL_EXTERNAL
13 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
14 
15 #include <stddef.h> // for size_t
16 #include <stdint.h> // for uint32_t
17 
18 // Convergent attribute
19 #ifdef __SYCL_DEVICE_ONLY__
20 #define __SYCL_CONVERGENT__ __attribute__((convergent))
21 #else
22 #define __SYCL_CONVERGENT__
23 #endif
24 
25 #ifdef __SYCL_DEVICE_ONLY__
26 
27 #if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
28 extern __DPCPP_SYCL_EXTERNAL float __spirv_RoundFToTF32INTEL(float a);
29 template <typename T, typename Tp, std::size_t R, std::size_t C,
32  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
35  __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
36  __spv::MatrixLayout Layout = L,
37  __spv::Scope::Flag Sc = S, int MemOperand = 0);
38 
39 template <typename T, typename Tp, std::size_t R, std::size_t C,
42  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
43 extern __DPCPP_SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
45  std::size_t Stride, __spv::MatrixLayout Layout = L,
46  __spv::Scope::Flag Sc = S, int MemOperand = 0);
47 
48 template <typename T1, typename T2, std::size_t M, std::size_t K, std::size_t N,
53  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
56  __spirv_JointMatrixMadINTEL(
60  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
61 
62 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
63  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
68  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
71  __spirv_JointMatrixUUMadINTEL(
75  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
76 
77 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
78  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
83  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
86  __spirv_JointMatrixUSMadINTEL(
90  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
91 
92 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
93  std::size_t N, __spv::MatrixUse UA, __spv::MatrixUse UB,
98  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
101  __spirv_JointMatrixSUMadINTEL(
105  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
106 
107 template <typename T, typename Tp, std::size_t R, std::size_t C,
110  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
113  __spirv_CompositeConstruct(const T v);
114 
115 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
117  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
118 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<uint32_t, 2>
119 __spirv_JointMatrixGetElementCoordINTEL(
121 
122 template <typename T, std::size_t R, std::size_t C, __spv::MatrixUse U,
124  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
125 extern __DPCPP_SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
127 
128 template <typename Ts, typename T, std::size_t R, std::size_t C,
131  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
132 extern __DPCPP_SYCL_EXTERNAL Ts __spirv_VectorExtractDynamic(
134 
135 template <typename Ts, typename T, std::size_t R, std::size_t C,
138  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
140 __spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, L, S, U> *,
141  Ts val, size_t i);
142 #else
143 template <typename T, typename Tp, std::size_t R, std::size_t C,
145  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
147 __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
148  __spv::MatrixLayout Layout = L,
149  __spv::Scope::Flag Sc = S, int MemOperand = 0);
150 
151 template <typename T, typename Tp, std::size_t R, std::size_t C,
153  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
154 extern __DPCPP_SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
156  std::size_t Stride, __spv::MatrixLayout Layout = L,
157  __spv::Scope::Flag Sc = S, int MemOperand = 0);
158 
159 template <typename T1, typename T2, std::size_t M, std::size_t K, std::size_t N,
163  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
165 __spirv_JointMatrixMadINTEL(
169  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
170 
171 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
175  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
177 __spirv_JointMatrixUUMadINTEL(
181  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
182 
183 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
187  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
189 __spirv_JointMatrixUSMadINTEL(
193  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
194 
195 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
199  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
201 __spirv_JointMatrixSUMadINTEL(
205  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
206 
207 template <typename T, std::size_t R, std::size_t C,
209  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
211 __spirv_CompositeConstruct(const T v);
212 
213 template <typename T, std::size_t R, std::size_t C,
215  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
216 extern __DPCPP_SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
218 
219 template <typename T, std::size_t R, std::size_t C,
221  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
222 extern __DPCPP_SYCL_EXTERNAL T __spirv_VectorExtractDynamic(
224 
225 template <typename T, std::size_t R, std::size_t C,
227  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
229 __spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, L, S> *,
230  T val, size_t i);
231 #endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
232 
233 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
234 #error \
235  "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag."
236 #endif
237 
238 template <typename RetT, typename ImageT>
239 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT);
240 
241 template <typename RetT, typename ImageT>
242 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT);
243 
244 template <typename RetT, typename ImageT>
245 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT);
246 
247 template <typename ImageT, typename CoordT, typename ValT>
248 extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT);
249 
250 template <class RetT, typename ImageT, typename TempArgT>
251 extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT);
252 
253 template <typename ImageT, typename SampledType>
254 extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT,
256 
257 template <typename SampledType, typename TempRetT, typename TempArgT>
258 extern __DPCPP_SYCL_EXTERNAL TempRetT
259 __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, float);
260 
261 template <typename SampledType, typename TempRetT, typename TempArgT>
262 extern __DPCPP_SYCL_EXTERNAL TempRetT
263 __spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, TempArgT, TempArgT);
264 
265 #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy
266 #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy
267 
268 // Atomic SPIR-V builtins
269 #define __SPIRV_ATOMIC_LOAD(AS, Type) \
270  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \
271  AS const Type *P, __spv::Scope::Flag S, \
272  __spv::MemorySemanticsMask::Flag O);
273 #define __SPIRV_ATOMIC_STORE(AS, Type) \
274  extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \
275  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
276  Type V);
277 #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \
278  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \
279  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
280  Type V);
281 #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
282  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \
283  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \
284  __spv::MemorySemanticsMask::Flag U, Type V, Type C);
285 #define __SPIRV_ATOMIC_IADD(AS, Type) \
286  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \
287  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
288  Type V);
289 #define __SPIRV_ATOMIC_ISUB(AS, Type) \
290  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \
291  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
292  Type V);
293 #define __SPIRV_ATOMIC_FADD(AS, Type) \
294  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \
295  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
296  Type V);
297 #define __SPIRV_ATOMIC_SMIN(AS, Type) \
298  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \
299  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
300  Type V);
301 #define __SPIRV_ATOMIC_UMIN(AS, Type) \
302  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \
303  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
304  Type V);
305 #define __SPIRV_ATOMIC_FMIN(AS, Type) \
306  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
307  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
308  Type V);
309 #define __SPIRV_ATOMIC_SMAX(AS, Type) \
310  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \
311  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
312  Type V);
313 #define __SPIRV_ATOMIC_UMAX(AS, Type) \
314  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \
315  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
316  Type V);
317 #define __SPIRV_ATOMIC_FMAX(AS, Type) \
318  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
319  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
320  Type V);
321 #define __SPIRV_ATOMIC_AND(AS, Type) \
322  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \
323  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
324  Type V);
325 #define __SPIRV_ATOMIC_OR(AS, Type) \
326  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \
327  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
328  Type V);
329 #define __SPIRV_ATOMIC_XOR(AS, Type) \
330  extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \
331  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
332  Type V);
333 
334 #define __SPIRV_ATOMIC_FLOAT(AS, Type) \
335  __SPIRV_ATOMIC_FADD(AS, Type) \
336  __SPIRV_ATOMIC_FMIN(AS, Type) \
337  __SPIRV_ATOMIC_FMAX(AS, Type) \
338  __SPIRV_ATOMIC_LOAD(AS, Type) \
339  __SPIRV_ATOMIC_STORE(AS, Type) \
340  __SPIRV_ATOMIC_EXCHANGE(AS, Type)
341 
342 #define __SPIRV_ATOMIC_BASE(AS, Type) \
343  __SPIRV_ATOMIC_FLOAT(AS, Type) \
344  __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
345  __SPIRV_ATOMIC_IADD(AS, Type) \
346  __SPIRV_ATOMIC_ISUB(AS, Type) \
347  __SPIRV_ATOMIC_AND(AS, Type) \
348  __SPIRV_ATOMIC_OR(AS, Type) \
349  __SPIRV_ATOMIC_XOR(AS, Type)
350 
351 #define __SPIRV_ATOMIC_SIGNED(AS, Type) \
352  __SPIRV_ATOMIC_BASE(AS, Type) \
353  __SPIRV_ATOMIC_SMIN(AS, Type) \
354  __SPIRV_ATOMIC_SMAX(AS, Type)
355 
356 #define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \
357  __SPIRV_ATOMIC_BASE(AS, Type) \
358  __SPIRV_ATOMIC_UMIN(AS, Type) \
359  __SPIRV_ATOMIC_UMAX(AS, Type)
360 
361 // Helper atomic operations which select correct signed/unsigned version
362 // of atomic min/max based on the type
363 #define __SPIRV_ATOMIC_MINMAX(AS, Op) \
364  template <typename T> \
365  typename std::enable_if_t< \
366  std::is_integral<T>::value && std::is_signed<T>::value, T> \
367  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
368  __spv::MemorySemanticsMask::Flag Semantics, \
369  T Value) { \
370  return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
371  } \
372  template <typename T> \
373  typename std::enable_if_t< \
374  std::is_integral<T>::value && !std::is_signed<T>::value, T> \
375  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
376  __spv::MemorySemanticsMask::Flag Semantics, \
377  T Value) { \
378  return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
379  } \
380  template <typename T> \
381  typename std::enable_if_t<std::is_floating_point<T>::value, T> \
382  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
383  __spv::MemorySemanticsMask::Flag Semantics, \
384  T Value) { \
385  return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
386  }
387 
388 #define __SPIRV_ATOMICS(macro, Arg) \
389  macro(__attribute__((opencl_global)), Arg) \
390  macro(__attribute__((opencl_local)), Arg) macro(, Arg)
391 
392 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float)
393 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double)
394 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int)
395 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long)
396 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long)
397 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int)
398 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long)
399 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long)
400 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
401 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
402 
403 #undef __SPIRV_ATOMICS
404 #undef __SPIRV_ATOMIC_AND
405 #undef __SPIRV_ATOMIC_BASE
406 #undef __SPIRV_ATOMIC_CMP_EXCHANGE
407 #undef __SPIRV_ATOMIC_EXCHANGE
408 #undef __SPIRV_ATOMIC_FADD
409 #undef __SPIRV_ATOMIC_FLOAT
410 #undef __SPIRV_ATOMIC_FMAX
411 #undef __SPIRV_ATOMIC_FMIN
412 #undef __SPIRV_ATOMIC_IADD
413 #undef __SPIRV_ATOMIC_ISUB
414 #undef __SPIRV_ATOMIC_LOAD
415 #undef __SPIRV_ATOMIC_MINMAX
416 #undef __SPIRV_ATOMIC_OR
417 #undef __SPIRV_ATOMIC_SIGNED
418 #undef __SPIRV_ATOMIC_SMAX
419 #undef __SPIRV_ATOMIC_SMIN
420 #undef __SPIRV_ATOMIC_STORE
421 #undef __SPIRV_ATOMIC_UMAX
422 #undef __SPIRV_ATOMIC_UMIN
423 #undef __SPIRV_ATOMIC_UNSIGNED
424 #undef __SPIRV_ATOMIC_XOR
425 
426 template <typename dataT>
427 extern __attribute__((opencl_global)) dataT *
428 __SYCL_GenericCastToPtrExplicit_ToGlobal(void *Ptr) noexcept {
429  return (__attribute__((opencl_global)) dataT *)
430  __spirv_GenericCastToPtrExplicit_ToGlobal(
432 }
433 
434 template <typename dataT>
435 extern const __attribute__((opencl_global)) dataT *
436 __SYCL_GenericCastToPtrExplicit_ToGlobal(const void *Ptr) noexcept {
437  return (const __attribute__((opencl_global)) dataT *)
438  __spirv_GenericCastToPtrExplicit_ToGlobal(
440 }
441 
442 template <typename dataT>
443 extern volatile __attribute__((opencl_global)) dataT *
444 __SYCL_GenericCastToPtrExplicit_ToGlobal(volatile void *Ptr) noexcept {
445  return (volatile __attribute__((opencl_global)) dataT *)
446  __spirv_GenericCastToPtrExplicit_ToGlobal(
448 }
449 
450 template <typename dataT>
451 extern const volatile __attribute__((opencl_global)) dataT *
452 __SYCL_GenericCastToPtrExplicit_ToGlobal(const volatile void *Ptr) noexcept {
453  return (const volatile __attribute__((opencl_global)) dataT *)
454  __spirv_GenericCastToPtrExplicit_ToGlobal(
456 }
457 
458 template <typename dataT>
459 extern __attribute__((opencl_local)) dataT *
460 __SYCL_GenericCastToPtrExplicit_ToLocal(void *Ptr) noexcept {
461  return (__attribute__((opencl_local)) dataT *)
462  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
464 }
465 
466 template <typename dataT>
467 extern const __attribute__((opencl_local)) dataT *
468 __SYCL_GenericCastToPtrExplicit_ToLocal(const void *Ptr) noexcept {
469  return (const __attribute__((opencl_local)) dataT *)
470  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
472 }
473 
474 template <typename dataT>
475 extern volatile __attribute__((opencl_local)) dataT *
476 __SYCL_GenericCastToPtrExplicit_ToLocal(volatile void *Ptr) noexcept {
477  return (volatile __attribute__((opencl_local)) dataT *)
478  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
480 }
481 
482 template <typename dataT>
483 extern const volatile __attribute__((opencl_local)) dataT *
484 __SYCL_GenericCastToPtrExplicit_ToLocal(const volatile void *Ptr) noexcept {
485  return (const volatile __attribute__((opencl_local)) dataT *)
486  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
488 }
489 
490 template <typename dataT>
491 extern __attribute__((opencl_private)) dataT *
492 __SYCL_GenericCastToPtrExplicit_ToPrivate(void *Ptr) noexcept {
493  return (__attribute__((opencl_private)) dataT *)
494  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
496 }
497 
498 template <typename dataT>
499 extern const __attribute__((opencl_private)) dataT *
500 __SYCL_GenericCastToPtrExplicit_ToPrivate(const void *Ptr) noexcept {
501  return (const __attribute__((opencl_private)) dataT *)
502  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
504 }
505 
506 template <typename dataT>
507 extern volatile __attribute__((opencl_private)) dataT *
508 __SYCL_GenericCastToPtrExplicit_ToPrivate(volatile void *Ptr) noexcept {
509  return (volatile __attribute__((opencl_private)) dataT *)
510  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
512 }
513 
514 template <typename dataT>
515 extern const volatile __attribute__((opencl_private)) dataT *
516 __SYCL_GenericCastToPtrExplicit_ToPrivate(const volatile void *Ptr) noexcept {
517  return (const volatile __attribute__((opencl_private)) dataT *)
518  __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
520 }
521 
522 template <typename dataT>
524 __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;
525 template <typename dataT>
527 __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next,
528  uint32_t Delta) noexcept;
529 template <typename dataT>
531 __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current,
532  uint32_t Delta) noexcept;
533 template <typename dataT>
535 __spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept;
536 
537 template <typename dataT>
539 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
540  uint8_t *Ptr) noexcept;
541 
542 template <typename dataT>
544 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint8_t *Ptr,
545  dataT Data) noexcept;
546 
547 template <typename dataT>
549 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
550  uint16_t *Ptr) noexcept;
551 
552 template <typename dataT>
554 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr,
555  dataT Data) noexcept;
556 
557 template <typename dataT>
559 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
560  uint32_t *Ptr) noexcept;
561 
562 template <typename dataT>
564 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr,
565  dataT Data) noexcept;
566 
567 template <typename dataT>
569 __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global))
570  uint64_t *Ptr) noexcept;
571 
572 template <typename dataT>
574 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint64_t *Ptr,
575  dataT Data) noexcept;
576 template <int W, int rW>
577 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
578 __spirv_FixedSqrtINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
579  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
580 template <int W, int rW>
581 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
582 __spirv_FixedRecipINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
583  int32_t rI, int32_t Quantization = 0,
584  int32_t Overflow = 0) noexcept;
585 template <int W, int rW>
586 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
587 __spirv_FixedRsqrtINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
588  int32_t rI, int32_t Quantization = 0,
589  int32_t Overflow = 0) noexcept;
590 template <int W, int rW>
591 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
592 __spirv_FixedSinINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
593  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
594 template <int W, int rW>
595 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
596 __spirv_FixedCosINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
597  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
598 template <int W, int rW>
599 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW>
600 __spirv_FixedSinCosINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
601  int32_t rI, int32_t Quantization = 0,
602  int32_t Overflow = 0) noexcept;
603 template <int W, int rW>
604 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
605 __spirv_FixedSinPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
606  int32_t rI, int32_t Quantization = 0,
607  int32_t Overflow = 0) noexcept;
608 template <int W, int rW>
609 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
610 __spirv_FixedCosPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
611  int32_t rI, int32_t Quantization = 0,
612  int32_t Overflow = 0) noexcept;
613 template <int W, int rW>
614 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW>
615 __spirv_FixedSinCosPiINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I,
616  int32_t rI, int32_t Quantization = 0,
617  int32_t Overflow = 0) noexcept;
618 template <int W, int rW>
619 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
620 __spirv_FixedLogINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
621  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
622 template <int W, int rW>
623 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<rW>
624 __spirv_FixedExpINTEL(sycl::detail::ap_int<W> a, bool S, int32_t I, int32_t rI,
625  int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
626 
627 // In the following built-ins width of arbitrary precision integer type for
628 // a floating point variable should be equal to sum of corresponding
629 // exponent width E, mantissa width M and 1 for sign bit. I.e. WA = EA + MA + 1.
630 template <int WA, int Wout>
631 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
632 __spirv_ArbitraryFloatCastINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
633  int32_t Mout, int32_t EnableSubnormals = 0,
634  int32_t RoundingMode = 0,
635  int32_t RoundingAccuracy = 0) noexcept;
636 
637 template <int WA, int Wout>
638 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
639 __spirv_ArbitraryFloatCastFromIntINTEL(sycl::detail::ap_int<WA> A, int32_t Mout,
640  bool FromSign = false,
641  int32_t EnableSubnormals = 0,
642  int32_t RoundingMode = 0,
643  int32_t RoundingAccuracy = 0) noexcept;
644 
645 template <int WA, int Wout>
646 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
647 __spirv_ArbitraryFloatCastToIntINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
648  bool ToSign = false,
649  int32_t EnableSubnormals = 0,
650  int32_t RoundingMode = 0,
651  int32_t RoundingAccuracy = 0) noexcept;
652 
653 template <int WA, int WB, int Wout>
654 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
655 __spirv_ArbitraryFloatAddINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
656  sycl::detail::ap_int<WB> B, int32_t MB,
657  int32_t Mout, int32_t EnableSubnormals = 0,
658  int32_t RoundingMode = 0,
659  int32_t RoundingAccuracy = 0) noexcept;
660 
661 template <int WA, int WB, int Wout>
662 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
663 __spirv_ArbitraryFloatSubINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
664  sycl::detail::ap_int<WB> B, int32_t MB,
665  int32_t Mout, int32_t EnableSubnormals = 0,
666  int32_t RoundingMode = 0,
667  int32_t RoundingAccuracy = 0) noexcept;
668 
669 template <int WA, int WB, int Wout>
670 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
671 __spirv_ArbitraryFloatMulINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
672  sycl::detail::ap_int<WB> B, int32_t MB,
673  int32_t Mout, int32_t EnableSubnormals = 0,
674  int32_t RoundingMode = 0,
675  int32_t RoundingAccuracy = 0) noexcept;
676 
677 template <int WA, int WB, int Wout>
678 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
679 __spirv_ArbitraryFloatDivINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
680  sycl::detail::ap_int<WB> B, int32_t MB,
681  int32_t Mout, int32_t EnableSubnormals = 0,
682  int32_t RoundingMode = 0,
683  int32_t RoundingAccuracy = 0) noexcept;
684 
685 // Comparison built-ins don't use Subnormal Support, Rounding Mode and
686 // Rounding Accuracy.
687 template <int WA, int WB>
688 extern __DPCPP_SYCL_EXTERNAL bool
689 __spirv_ArbitraryFloatGTINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
690  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
691 
692 template <int WA, int WB>
693 extern __DPCPP_SYCL_EXTERNAL bool
694 __spirv_ArbitraryFloatGEINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
695  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
696 
697 template <int WA, int WB>
698 extern __DPCPP_SYCL_EXTERNAL bool
699 __spirv_ArbitraryFloatLTINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
700  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
701 
702 template <int WA, int WB>
703 extern __DPCPP_SYCL_EXTERNAL bool
704 __spirv_ArbitraryFloatLEINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
705  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
706 
707 template <int WA, int WB>
708 extern __DPCPP_SYCL_EXTERNAL bool
709 __spirv_ArbitraryFloatEQINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
710  sycl::detail::ap_int<WB> B, int32_t MB) noexcept;
711 
712 template <int WA, int Wout>
713 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
714 __spirv_ArbitraryFloatRecipINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
715  int32_t Mout, int32_t EnableSubnormals = 0,
716  int32_t RoundingMode = 0,
717  int32_t RoundingAccuracy = 0) noexcept;
718 
719 template <int WA, int Wout>
720 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
721 __spirv_ArbitraryFloatRSqrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
722  int32_t Mout, int32_t EnableSubnormals = 0,
723  int32_t RoundingMode = 0,
724  int32_t RoundingAccuracy = 0) noexcept;
725 
726 template <int WA, int Wout>
727 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
728 __spirv_ArbitraryFloatCbrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
729  int32_t Mout, int32_t EnableSubnormals = 0,
730  int32_t RoundingMode = 0,
731  int32_t RoundingAccuracy = 0) noexcept;
732 
733 template <int WA, int WB, int Wout>
734 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
735 __spirv_ArbitraryFloatHypotINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
736  sycl::detail::ap_int<WB> B, int32_t MB,
737  int32_t Mout, int32_t EnableSubnormals = 0,
738  int32_t RoundingMode = 0,
739  int32_t RoundingAccuracy = 0) noexcept;
740 
741 template <int WA, int Wout>
742 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
743 __spirv_ArbitraryFloatSqrtINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
744  int32_t Mout, int32_t EnableSubnormals = 0,
745  int32_t RoundingMode = 0,
746  int32_t RoundingAccuracy = 0) noexcept;
747 
748 template <int WA, int Wout>
749 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
750 __spirv_ArbitraryFloatLogINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
751  int32_t Mout, int32_t EnableSubnormals = 0,
752  int32_t RoundingMode = 0,
753  int32_t RoundingAccuracy = 0) noexcept;
754 
755 template <int WA, int Wout>
756 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
757 __spirv_ArbitraryFloatLog2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
758  int32_t Mout, int32_t EnableSubnormals = 0,
759  int32_t RoundingMode = 0,
760  int32_t RoundingAccuracy = 0) noexcept;
761 
762 template <int WA, int Wout>
763 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
764 __spirv_ArbitraryFloatLog10INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
765  int32_t Mout, int32_t EnableSubnormals = 0,
766  int32_t RoundingMode = 0,
767  int32_t RoundingAccuracy = 0) noexcept;
768 
769 template <int WA, int Wout>
770 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
771 __spirv_ArbitraryFloatLog1pINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
772  int32_t Mout, int32_t EnableSubnormals = 0,
773  int32_t RoundingMode = 0,
774  int32_t RoundingAccuracy = 0) noexcept;
775 
776 template <int WA, int Wout>
777 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
778 __spirv_ArbitraryFloatExpINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
779  int32_t Mout, int32_t EnableSubnormals = 0,
780  int32_t RoundingMode = 0,
781  int32_t RoundingAccuracy = 0) noexcept;
782 
783 template <int WA, int Wout>
784 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
785 __spirv_ArbitraryFloatExp2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
786  int32_t Mout, int32_t EnableSubnormals = 0,
787  int32_t RoundingMode = 0,
788  int32_t RoundingAccuracy = 0) noexcept;
789 
790 template <int WA, int Wout>
791 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
792 __spirv_ArbitraryFloatExp10INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
793  int32_t Mout, int32_t EnableSubnormals = 0,
794  int32_t RoundingMode = 0,
795  int32_t RoundingAccuracy = 0) noexcept;
796 
797 template <int WA, int Wout>
798 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
799 __spirv_ArbitraryFloatExpm1INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
800  int32_t Mout, int32_t EnableSubnormals = 0,
801  int32_t RoundingMode = 0,
802  int32_t RoundingAccuracy = 0) noexcept;
803 
804 template <int WA, int Wout>
805 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
806 __spirv_ArbitraryFloatSinINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
807  int32_t Mout, int32_t EnableSubnormals = 0,
808  int32_t RoundingMode = 0,
809  int32_t RoundingAccuracy = 0) noexcept;
810 
811 template <int WA, int Wout>
812 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
813 __spirv_ArbitraryFloatCosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
814  int32_t Mout, int32_t EnableSubnormals = 0,
815  int32_t RoundingMode = 0,
816  int32_t RoundingAccuracy = 0) noexcept;
817 
818 // Result value contains both values of sine and cosine and so has the size of
819 // 2 * Wout where Wout is equal to (1 + Eout + Mout).
820 template <int WA, int Wout>
821 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout>
822 __spirv_ArbitraryFloatSinCosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
823  int32_t Mout, int32_t EnableSubnormals = 0,
824  int32_t RoundingMode = 0,
825  int32_t RoundingAccuracy = 0) noexcept;
826 
827 template <int WA, int Wout>
828 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
829 __spirv_ArbitraryFloatSinPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
830  int32_t Mout, int32_t EnableSubnormals = 0,
831  int32_t RoundingMode = 0,
832  int32_t RoundingAccuracy = 0) noexcept;
833 
834 template <int WA, int Wout>
835 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
836 __spirv_ArbitraryFloatCosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
837  int32_t Mout, int32_t EnableSubnormals = 0,
838  int32_t RoundingMode = 0,
839  int32_t RoundingAccuracy = 0) noexcept;
840 
841 // Result value contains both values of sine(A*pi) and cosine(A*pi) and so has
842 // the size of 2 * Wout where Wout is equal to (1 + Eout + Mout).
843 template <int WA, int Wout>
844 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout>
845 __spirv_ArbitraryFloatSinCosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
846  int32_t Mout, int32_t EnableSubnormals = 0,
847  int32_t RoundingMode = 0,
848  int32_t RoundingAccuracy = 0) noexcept;
849 
850 template <int WA, int Wout>
851 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
852 __spirv_ArbitraryFloatASinINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
853  int32_t Mout, int32_t EnableSubnormals = 0,
854  int32_t RoundingMode = 0,
855  int32_t RoundingAccuracy = 0) noexcept;
856 
857 template <int WA, int Wout>
858 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
859 __spirv_ArbitraryFloatASinPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
860  int32_t Mout, int32_t EnableSubnormals = 0,
861  int32_t RoundingMode = 0,
862  int32_t RoundingAccuracy = 0) noexcept;
863 
864 template <int WA, int Wout>
865 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
866 __spirv_ArbitraryFloatACosINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
867  int32_t Mout, int32_t EnableSubnormals = 0,
868  int32_t RoundingMode = 0,
869  int32_t RoundingAccuracy = 0) noexcept;
870 
871 template <int WA, int Wout>
872 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
873 __spirv_ArbitraryFloatACosPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
874  int32_t Mout, int32_t EnableSubnormals = 0,
875  int32_t RoundingMode = 0,
876  int32_t RoundingAccuracy = 0) noexcept;
877 
878 template <int WA, int Wout>
879 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
880 __spirv_ArbitraryFloatATanINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
881  int32_t Mout, int32_t EnableSubnormals = 0,
882  int32_t RoundingMode = 0,
883  int32_t RoundingAccuracy = 0) noexcept;
884 
885 template <int WA, int Wout>
886 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
887 __spirv_ArbitraryFloatATanPiINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
888  int32_t Mout, int32_t EnableSubnormals = 0,
889  int32_t RoundingMode = 0,
890  int32_t RoundingAccuracy = 0) noexcept;
891 
892 template <int WA, int WB, int Wout>
893 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
894 __spirv_ArbitraryFloatATan2INTEL(sycl::detail::ap_int<WA> A, int32_t MA,
895  sycl::detail::ap_int<WB> B, int32_t MB,
896  int32_t Mout, int32_t EnableSubnormals = 0,
897  int32_t RoundingMode = 0,
898  int32_t RoundingAccuracy = 0) noexcept;
899 
900 template <int WA, int WB, int Wout>
901 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
902 __spirv_ArbitraryFloatPowINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
903  sycl::detail::ap_int<WB> B, int32_t MB,
904  int32_t Mout, int32_t EnableSubnormals = 0,
905  int32_t RoundingMode = 0,
906  int32_t RoundingAccuracy = 0) noexcept;
907 
908 template <int WA, int WB, int Wout>
909 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
910 __spirv_ArbitraryFloatPowRINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
911  sycl::detail::ap_int<WB> B, int32_t MB,
912  int32_t Mout, int32_t EnableSubnormals = 0,
913  int32_t RoundingMode = 0,
914  int32_t RoundingAccuracy = 0) noexcept;
915 
916 // PowN built-in calculates `A^B` where `A` is arbitrary precision floating
917 // point number and `B` is signed or unsigned arbitrary precision integer,
918 // i.e. its width doesn't depend on sum of exponent and mantissa.
919 template <int WA, int WB, int Wout>
920 extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<Wout>
921 __spirv_ArbitraryFloatPowNINTEL(sycl::detail::ap_int<WA> A, int32_t MA,
922  sycl::detail::ap_int<WB> B, bool SignOfB,
923  int32_t Mout, int32_t EnableSubnormals = 0,
924  int32_t RoundingMode = 0,
925  int32_t RoundingAccuracy = 0) noexcept;
926 
927 template <typename dataT>
928 extern __DPCPP_SYCL_EXTERNAL int32_t
929 __spirv_ReadPipe(__ocl_RPipeTy<dataT> Pipe, dataT *Data, int32_t Size,
930  int32_t Alignment) noexcept;
931 template <typename dataT>
932 extern __DPCPP_SYCL_EXTERNAL int32_t
933 __spirv_WritePipe(__ocl_WPipeTy<dataT> Pipe, const dataT *Data, int32_t Size,
934  int32_t Alignment) noexcept;
935 template <typename dataT>
936 extern __DPCPP_SYCL_EXTERNAL void
937 __spirv_ReadPipeBlockingINTEL(__ocl_RPipeTy<dataT> Pipe, dataT *Data,
938  int32_t Size, int32_t Alignment) noexcept;
939 template <typename dataT>
940 extern __DPCPP_SYCL_EXTERNAL void
941 __spirv_WritePipeBlockingINTEL(__ocl_WPipeTy<dataT> Pipe, const dataT *Data,
942  int32_t Size, int32_t Alignment) noexcept;
943 template <typename dataT>
944 extern __DPCPP_SYCL_EXTERNAL __ocl_RPipeTy<dataT>
945 __spirv_CreatePipeFromPipeStorage_read(
946  const ConstantPipeStorage *Storage) noexcept;
947 template <typename dataT>
948 extern __DPCPP_SYCL_EXTERNAL __ocl_WPipeTy<dataT>
949 __spirv_CreatePipeFromPipeStorage_write(
950  const ConstantPipeStorage *Storage) noexcept;
951 
952 extern __DPCPP_SYCL_EXTERNAL void
953 __spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr,
954  size_t NumBytes) noexcept;
955 
956 extern __DPCPP_SYCL_EXTERNAL uint16_t
957 __spirv_ConvertFToBF16INTEL(float) noexcept;
958 extern __DPCPP_SYCL_EXTERNAL float
959  __spirv_ConvertBF16ToFINTEL(uint16_t) noexcept;
960 
962  __SYCL_EXPORT __ocl_vec_t<uint32_t, 4>
963  __spirv_GroupNonUniformBallot(uint32_t Execution, bool Predicate) noexcept;
964 
965 // TODO: I'm not 100% sure that these NonUniform instructions should be
966 // convergent Following precedent set for GroupNonUniformBallot above
967 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT uint32_t
968 __spirv_GroupNonUniformBallotBitCount(__spv::Scope::Flag, int,
969  __ocl_vec_t<uint32_t, 4>) noexcept;
970 
971 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int
972  __spirv_GroupNonUniformBallotFindLSB(__spv::Scope::Flag,
973  __ocl_vec_t<uint32_t, 4>) noexcept;
974 
975 template <typename ValueT, typename IdT>
976 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
977  __spirv_GroupNonUniformBroadcast(__spv::Scope::Flag, ValueT, IdT);
978 
979 template <typename ValueT, typename IdT>
980 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
981  __spirv_GroupNonUniformShuffle(__spv::Scope::Flag, ValueT, IdT) noexcept;
982 
983 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
984 __spirv_GroupNonUniformAll(__spv::Scope::Flag, bool);
985 
986 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
987 __spirv_GroupNonUniformAny(__spv::Scope::Flag, bool);
988 
989 template <typename ValueT>
990 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
991 __spirv_GroupNonUniformSMin(__spv::Scope::Flag, unsigned int, ValueT);
992 
993 template <typename ValueT>
994 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
995 __spirv_GroupNonUniformUMin(__spv::Scope::Flag, unsigned int, ValueT);
996 
997 template <typename ValueT>
998 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
999 __spirv_GroupNonUniformFMin(__spv::Scope::Flag, unsigned int, ValueT);
1000 
1001 template <typename ValueT>
1002 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1003 __spirv_GroupNonUniformSMax(__spv::Scope::Flag, unsigned int, ValueT);
1004 
1005 template <typename ValueT>
1006 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1007 __spirv_GroupNonUniformUMax(__spv::Scope::Flag, unsigned int, ValueT);
1008 
1009 template <typename ValueT>
1010 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1011 __spirv_GroupNonUniformFMax(__spv::Scope::Flag, unsigned int, ValueT);
1012 
1013 template <typename ValueT>
1014 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1015 __spirv_GroupNonUniformIAdd(__spv::Scope::Flag, unsigned int, ValueT);
1016 
1017 template <typename ValueT>
1018 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1019 __spirv_GroupNonUniformFAdd(__spv::Scope::Flag, unsigned int, ValueT);
1020 
1021 template <typename ValueT>
1022 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1023 __spirv_GroupNonUniformIMul(__spv::Scope::Flag, unsigned int, ValueT);
1024 
1025 template <typename ValueT>
1026 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1027 __spirv_GroupNonUniformFMul(__spv::Scope::Flag, unsigned int, ValueT);
1028 
1029 template <typename ValueT>
1030 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1031 __spirv_GroupNonUniformBitwiseOr(__spv::Scope::Flag, unsigned int, ValueT);
1032 
1033 template <typename ValueT>
1034 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1035 __spirv_GroupNonUniformBitwiseXor(__spv::Scope::Flag, unsigned int, ValueT);
1036 
1037 template <typename ValueT>
1038 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1039 __spirv_GroupNonUniformBitwiseAnd(__spv::Scope::Flag, unsigned int, ValueT);
1040 
1041 template <typename ValueT>
1042 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1043 __spirv_GroupNonUniformSMin(__spv::Scope::Flag, unsigned int, ValueT,
1044  unsigned int);
1045 
1046 template <typename ValueT>
1047 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1048 __spirv_GroupNonUniformUMin(__spv::Scope::Flag, unsigned int, ValueT,
1049  unsigned int);
1050 
1051 template <typename ValueT>
1052 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1053 __spirv_GroupNonUniformFMin(__spv::Scope::Flag, unsigned int, ValueT,
1054  unsigned int);
1055 
1056 template <typename ValueT>
1057 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1058 __spirv_GroupNonUniformSMax(__spv::Scope::Flag, unsigned int, ValueT,
1059  unsigned int);
1060 
1061 template <typename ValueT>
1062 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1063 __spirv_GroupNonUniformUMax(__spv::Scope::Flag, unsigned int, ValueT,
1064  unsigned int);
1065 
1066 template <typename ValueT>
1067 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1068 __spirv_GroupNonUniformFMax(__spv::Scope::Flag, unsigned int, ValueT,
1069  unsigned int);
1070 
1071 template <typename ValueT>
1072 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1073 __spirv_GroupNonUniformIAdd(__spv::Scope::Flag, unsigned int, ValueT,
1074  unsigned int);
1075 
1076 template <typename ValueT>
1077 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1078 __spirv_GroupNonUniformFAdd(__spv::Scope::Flag, unsigned int, ValueT,
1079  unsigned int);
1080 
1081 template <typename ValueT>
1082 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1083 __spirv_GroupNonUniformIMul(__spv::Scope::Flag, unsigned int, ValueT,
1084  unsigned int);
1085 
1086 template <typename ValueT>
1087 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1088 __spirv_GroupNonUniformFMul(__spv::Scope::Flag, unsigned int, ValueT,
1089  unsigned int);
1090 
1091 template <typename ValueT>
1092 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1093 __spirv_GroupNonUniformBitwiseOr(__spv::Scope::Flag, unsigned int, ValueT,
1094  unsigned int);
1095 
1096 template <typename ValueT>
1097 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1098 __spirv_GroupNonUniformBitwiseXor(__spv::Scope::Flag, unsigned int, ValueT,
1099  unsigned int);
1100 
1101 template <typename ValueT>
1102 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT ValueT
1103 __spirv_GroupNonUniformBitwiseAnd(__spv::Scope::Flag, unsigned int, ValueT,
1104  unsigned int);
1105 
1106 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1107 __clc_BarrierInitialize(int64_t *state, int32_t expected_count) noexcept;
1108 
1109 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1110 __clc_BarrierInvalidate(int64_t *state) noexcept;
1111 
1112 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1113 __clc_BarrierArrive(int64_t *state) noexcept;
1114 
1115 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1116 __clc_BarrierArriveAndDrop(int64_t *state) noexcept;
1117 
1118 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1119 __clc_BarrierArriveNoComplete(int64_t *state, int32_t count) noexcept;
1120 
1121 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t
1122 __clc_BarrierArriveAndDropNoComplete(int64_t *state, int32_t count) noexcept;
1123 
1124 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1125 __clc_BarrierCopyAsyncArrive(int64_t *state) noexcept;
1126 
1127 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1128 __clc_BarrierCopyAsyncArriveNoInc(int64_t *state) noexcept;
1129 
1130 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1131 __clc_BarrierWait(int64_t *state, int64_t arrival) noexcept;
1132 
1133 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool
1134 __clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept;
1135 
1136 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1137 __clc_BarrierArriveAndWait(int64_t *state) noexcept;
1138 
1139 #ifdef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
1140 template <typename... Args>
1141 extern __DPCPP_SYCL_EXTERNAL int
1142 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format,
1143  Args... args);
1144 template <typename... Args>
1145 extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format,
1146  Args... args);
1147 #else
1148 extern __DPCPP_SYCL_EXTERNAL int
1149 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...);
1150 extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...);
1151 #endif
1152 
1153 // Native builtin extension
1154 
1155 extern __DPCPP_SYCL_EXTERNAL float __clc_native_tanh(float);
1156 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 2>
1157  __clc_native_tanh(__ocl_vec_t<float, 2>);
1158 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 3>
1159  __clc_native_tanh(__ocl_vec_t<float, 3>);
1160 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 4>
1161  __clc_native_tanh(__ocl_vec_t<float, 4>);
1162 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 8>
1163  __clc_native_tanh(__ocl_vec_t<float, 8>);
1164 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<float, 16>
1165  __clc_native_tanh(__ocl_vec_t<float, 16>);
1166 
1167 extern __DPCPP_SYCL_EXTERNAL _Float16 __clc_native_tanh(_Float16);
1168 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
1169  __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
1170 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
1171  __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
1172 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
1173  __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
1174 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
1175  __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
1176 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
1177  __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
1178 
1179 extern __DPCPP_SYCL_EXTERNAL _Float16 __clc_native_exp2(_Float16);
1180 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
1181  __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
1182 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
1183  __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
1184 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
1185  __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
1186 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
1187  __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
1188 extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
1189  __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
1190 
1191 #define __CLC_BF16(...) \
1192  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \
1193  __VA_ARGS__) noexcept; \
1194  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \
1195  __VA_ARGS__, __VA_ARGS__) noexcept; \
1196  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \
1197  __VA_ARGS__, __VA_ARGS__) noexcept; \
1198  extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \
1199  __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept;
1200 
1201 #define __CLC_BF16_SCAL_VEC(TYPE) \
1202  __CLC_BF16(TYPE) \
1203  __CLC_BF16(__ocl_vec_t<TYPE, 2>) \
1204  __CLC_BF16(__ocl_vec_t<TYPE, 3>) \
1205  __CLC_BF16(__ocl_vec_t<TYPE, 4>) \
1206  __CLC_BF16(__ocl_vec_t<TYPE, 8>) \
1207  __CLC_BF16(__ocl_vec_t<TYPE, 16>)
1208 
1209 __CLC_BF16_SCAL_VEC(uint16_t)
1210 __CLC_BF16_SCAL_VEC(uint32_t)
1211 
1212 #undef __CLC_BF16_SCAL_VEC
1213 #undef __CLC_BF16
1214 
1215 extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInGlobalHWThreadIDINTEL();
1216 extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInSubDeviceIDINTEL();
1217 
1218 template <typename from, typename to>
1219 extern __DPCPP_SYCL_EXTERNAL
1220  std::enable_if_t<std::is_integral_v<to> && std::is_unsigned_v<to>, to>
1221  __spirv_ConvertPtrToU(from val) noexcept;
1222 
1223 #else // if !__SYCL_DEVICE_ONLY__
1224 
1225 template <typename dataT>
1228  const dataT *Src, size_t NumElements,
1229  size_t Stride, __ocl_event_t) noexcept {
1230  for (size_t i = 0; i < NumElements; i++) {
1231  Dest[i] = Src[i * Stride];
1232  }
1233  // A real instance of the class is not needed, return dummy pointer.
1234  return nullptr;
1235 }
1236 
1237 template <typename dataT>
1240  const dataT *Src, size_t NumElements,
1241  size_t Stride, __ocl_event_t) noexcept {
1242  for (size_t i = 0; i < NumElements; i++) {
1243  Dest[i * Stride] = Src[i];
1244  }
1245  // A real instance of the class is not needed, return dummy pointer.
1246  return nullptr;
1247 }
1248 
1249 extern __SYCL_EXPORT void __spirv_ocl_prefetch(const char *Ptr,
1250  size_t NumBytes) noexcept;
1251 
1252 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1254  uint32_t Semantics) noexcept;
1255 
1256 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1257 __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept;
1258 
1259 __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void
1260 __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents,
1261  __ocl_event_t *WaitEvents) noexcept;
1262 #endif // !__SYCL_DEVICE_ONLY__
__spv::StorageClass::Function
@ Function
Definition: spirv_types.hpp:52
__spv::MatrixLayout::RowMajor
@ RowMajor
__spirv_ControlBarrier
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
__spv
Definition: spirv_types.hpp:24
sycl::_V1::detail::int64_t
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
__SYCL_OpGroupAsyncCopyGlobalToLocal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1227
detail
---— Error handling, matching OpenCL plugin semantics.
Definition: common.hpp:44
__spirv_ocl_prefetch
void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept
Definition: spirv_ops.cpp:47
__spv::__spirv_JointMatrixINTEL
Definition: spirv_types.hpp:136
sycl
Definition: access.hpp:18
__spirv_MemoryBarrier
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:36
__ocl_event_t
void * __ocl_event_t
Definition: spirv_types.hpp:179
__spv::MatrixUse
MatrixUse
Definition: spirv_types.hpp:130
__SYCL_OpGroupAsyncCopyLocalToGlobal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1239
__spirv_GroupWaitEvents
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, __ocl_event_t *WaitEvents) noexcept
Definition: spirv_ops.cpp:18
__spv::Scope::Flag
Flag
Definition: spirv_types.hpp:28
export.hpp
defines_elementary.hpp
__SYCL_CONVERGENT__
#define __SYCL_CONVERGENT__
Definition: spirv_ops.hpp:22
sycl::_V1::ext::oneapi::experimental::detail::Alignment
@ Alignment
Definition: property.hpp:193
__spv::StorageClass::CrossWorkgroup
@ CrossWorkgroup
Definition: spirv_types.hpp:50
__spv::StorageClass::Workgroup
@ Workgroup
Definition: spirv_types.hpp:49
__spv::MatrixLayout
MatrixLayout
Definition: spirv_types.hpp:114
__spv::Scope
Definition: spirv_types.hpp:26
sycl::_V1::ext::oneapi::experimental::__attribute__
__attribute__((always_inline)) auto invoke_simd(sycl
The invoke_simd free function invokes a SIMD function using all work-items in a sub_group.
Definition: invoke_simd.hpp:460
__DPCPP_SYCL_EXTERNAL
#define __DPCPP_SYCL_EXTERNAL
Definition: defines_elementary.hpp:33
__ocl_sampler_t
void * __ocl_sampler_t
Definition: spirv_types.hpp:180
spirv_types.hpp