19 #ifdef __SYCL_DEVICE_ONLY__
20 #define __SYCL_CONVERGENT__ __attribute__((convergent))
22 #define __SYCL_CONVERGENT__
25 #ifdef __SYCL_DEVICE_ONLY__
27 #if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
29 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
35 __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
39 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
48 template <
typename T1,
typename T2, std::size_t M, std::size_t K, std::size_t N,
56 __spirv_JointMatrixMadINTEL(
62 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
71 __spirv_JointMatrixUUMadINTEL(
77 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
86 __spirv_JointMatrixUSMadINTEL(
92 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
101 __spirv_JointMatrixSUMadINTEL(
107 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
113 __spirv_CompositeConstruct(
const T v);
119 __spirv_JointMatrixGetElementCoordINTEL(
128 template <
typename Ts,
typename T, std::size_t R, std::size_t C,
135 template <
typename Ts,
typename T, std::size_t R, std::size_t C,
143 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
147 __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
151 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
159 template <
typename T1,
typename T2, std::size_t M, std::size_t K, std::size_t N,
165 __spirv_JointMatrixMadINTEL(
171 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
177 __spirv_JointMatrixUUMadINTEL(
183 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
189 __spirv_JointMatrixUSMadINTEL(
195 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
201 __spirv_JointMatrixSUMadINTEL(
207 template <
typename T, std::size_t R, std::size_t C,
211 __spirv_CompositeConstruct(
const T v);
213 template <
typename T, std::size_t R, std::size_t C,
219 template <
typename T, std::size_t R, std::size_t C,
225 template <
typename T, std::size_t R, std::size_t C,
231 #endif // SYCL_EXT_ONEAPI_MATRIX_VERSION
233 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
235 "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag."
238 template <
typename RetT,
typename ImageT>
241 template <
typename RetT,
typename ImageT>
244 template <
typename RetT,
typename ImageT>
247 template <
typename ImageT,
typename CoordT,
typename ValT>
250 template <
class RetT,
typename ImageT,
typename TempArgT>
253 template <
typename ImageT,
typename SampledType>
257 template <
typename SampledType,
typename TempRetT,
typename TempArgT>
259 __spirv_ImageSampleExplicitLod(SampledType, TempArgT,
int,
float);
261 template <
typename SampledType,
typename TempRetT,
typename TempArgT>
263 __spirv_ImageSampleExplicitLod(SampledType, TempArgT,
int, TempArgT, TempArgT);
265 #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy
266 #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy
269 #define __SPIRV_ATOMIC_LOAD(AS, Type) \
270 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \
271 AS const Type *P, __spv::Scope::Flag S, \
272 __spv::MemorySemanticsMask::Flag O);
273 #define __SPIRV_ATOMIC_STORE(AS, Type) \
274 extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \
275 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
277 #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \
278 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \
279 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
281 #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
282 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \
283 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \
284 __spv::MemorySemanticsMask::Flag U, Type V, Type C);
285 #define __SPIRV_ATOMIC_IADD(AS, Type) \
286 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \
287 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
289 #define __SPIRV_ATOMIC_ISUB(AS, Type) \
290 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \
291 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
293 #define __SPIRV_ATOMIC_FADD(AS, Type) \
294 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \
295 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
297 #define __SPIRV_ATOMIC_SMIN(AS, Type) \
298 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \
299 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
301 #define __SPIRV_ATOMIC_UMIN(AS, Type) \
302 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \
303 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
305 #define __SPIRV_ATOMIC_FMIN(AS, Type) \
306 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
307 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
309 #define __SPIRV_ATOMIC_SMAX(AS, Type) \
310 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \
311 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
313 #define __SPIRV_ATOMIC_UMAX(AS, Type) \
314 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \
315 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
317 #define __SPIRV_ATOMIC_FMAX(AS, Type) \
318 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
319 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
321 #define __SPIRV_ATOMIC_AND(AS, Type) \
322 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \
323 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
325 #define __SPIRV_ATOMIC_OR(AS, Type) \
326 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \
327 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
329 #define __SPIRV_ATOMIC_XOR(AS, Type) \
330 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \
331 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
334 #define __SPIRV_ATOMIC_FLOAT(AS, Type) \
335 __SPIRV_ATOMIC_FADD(AS, Type) \
336 __SPIRV_ATOMIC_FMIN(AS, Type) \
337 __SPIRV_ATOMIC_FMAX(AS, Type) \
338 __SPIRV_ATOMIC_LOAD(AS, Type) \
339 __SPIRV_ATOMIC_STORE(AS, Type) \
340 __SPIRV_ATOMIC_EXCHANGE(AS, Type)
342 #define __SPIRV_ATOMIC_BASE(AS, Type) \
343 __SPIRV_ATOMIC_FLOAT(AS, Type) \
344 __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
345 __SPIRV_ATOMIC_IADD(AS, Type) \
346 __SPIRV_ATOMIC_ISUB(AS, Type) \
347 __SPIRV_ATOMIC_AND(AS, Type) \
348 __SPIRV_ATOMIC_OR(AS, Type) \
349 __SPIRV_ATOMIC_XOR(AS, Type)
351 #define __SPIRV_ATOMIC_SIGNED(AS, Type) \
352 __SPIRV_ATOMIC_BASE(AS, Type) \
353 __SPIRV_ATOMIC_SMIN(AS, Type) \
354 __SPIRV_ATOMIC_SMAX(AS, Type)
356 #define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \
357 __SPIRV_ATOMIC_BASE(AS, Type) \
358 __SPIRV_ATOMIC_UMIN(AS, Type) \
359 __SPIRV_ATOMIC_UMAX(AS, Type)
363 #define __SPIRV_ATOMIC_MINMAX(AS, Op) \
364 template <typename T> \
365 typename std::enable_if_t< \
366 std::is_integral<T>::value && std::is_signed<T>::value, T> \
367 __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
368 __spv::MemorySemanticsMask::Flag Semantics, \
370 return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
372 template <typename T> \
373 typename std::enable_if_t< \
374 std::is_integral<T>::value && !std::is_signed<T>::value, T> \
375 __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
376 __spv::MemorySemanticsMask::Flag Semantics, \
378 return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
380 template <typename T> \
381 typename std::enable_if_t<std::is_floating_point<T>::value, T> \
382 __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
383 __spv::MemorySemanticsMask::Flag Semantics, \
385 return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
388 #define __SPIRV_ATOMICS(macro, Arg) \
389 macro(__attribute__((opencl_global)), Arg) \
390 macro(__attribute__((opencl_local)), Arg) macro(, Arg)
392 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT,
float)
393 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT,
double)
394 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED,
int)
395 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED,
long)
396 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED,
long long)
397 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED,
unsigned int)
398 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED,
unsigned long)
399 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED,
unsigned long long)
400 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
401 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
403 #undef __SPIRV_ATOMICS
404 #undef __SPIRV_ATOMIC_AND
405 #undef __SPIRV_ATOMIC_BASE
406 #undef __SPIRV_ATOMIC_CMP_EXCHANGE
407 #undef __SPIRV_ATOMIC_EXCHANGE
408 #undef __SPIRV_ATOMIC_FADD
409 #undef __SPIRV_ATOMIC_FLOAT
410 #undef __SPIRV_ATOMIC_FMAX
411 #undef __SPIRV_ATOMIC_FMIN
412 #undef __SPIRV_ATOMIC_IADD
413 #undef __SPIRV_ATOMIC_ISUB
414 #undef __SPIRV_ATOMIC_LOAD
415 #undef __SPIRV_ATOMIC_MINMAX
416 #undef __SPIRV_ATOMIC_OR
417 #undef __SPIRV_ATOMIC_SIGNED
418 #undef __SPIRV_ATOMIC_SMAX
419 #undef __SPIRV_ATOMIC_SMIN
420 #undef __SPIRV_ATOMIC_STORE
421 #undef __SPIRV_ATOMIC_UMAX
422 #undef __SPIRV_ATOMIC_UMIN
423 #undef __SPIRV_ATOMIC_UNSIGNED
424 #undef __SPIRV_ATOMIC_XOR
426 template <
typename dataT>
428 __SYCL_GenericCastToPtrExplicit_ToGlobal(
void *Ptr) noexcept {
430 __spirv_GenericCastToPtrExplicit_ToGlobal(
434 template <
typename dataT>
436 __SYCL_GenericCastToPtrExplicit_ToGlobal(
const void *Ptr) noexcept {
438 __spirv_GenericCastToPtrExplicit_ToGlobal(
442 template <
typename dataT>
444 __SYCL_GenericCastToPtrExplicit_ToGlobal(
volatile void *Ptr) noexcept {
446 __spirv_GenericCastToPtrExplicit_ToGlobal(
450 template <
typename dataT>
452 __SYCL_GenericCastToPtrExplicit_ToGlobal(
const volatile void *Ptr) noexcept {
453 return (
const volatile __attribute__((opencl_global)) dataT *)
454 __spirv_GenericCastToPtrExplicit_ToGlobal(
458 template <
typename dataT>
460 __SYCL_GenericCastToPtrExplicit_ToLocal(
void *Ptr) noexcept {
462 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
466 template <
typename dataT>
468 __SYCL_GenericCastToPtrExplicit_ToLocal(
const void *Ptr) noexcept {
470 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
474 template <
typename dataT>
476 __SYCL_GenericCastToPtrExplicit_ToLocal(
volatile void *Ptr) noexcept {
478 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
482 template <
typename dataT>
484 __SYCL_GenericCastToPtrExplicit_ToLocal(
const volatile void *Ptr) noexcept {
485 return (
const volatile __attribute__((opencl_local)) dataT *)
486 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
490 template <
typename dataT>
492 __SYCL_GenericCastToPtrExplicit_ToPrivate(
void *Ptr) noexcept {
494 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
498 template <
typename dataT>
500 __SYCL_GenericCastToPtrExplicit_ToPrivate(
const void *Ptr) noexcept {
502 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
506 template <
typename dataT>
508 __SYCL_GenericCastToPtrExplicit_ToPrivate(
volatile void *Ptr) noexcept {
510 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
514 template <
typename dataT>
515 extern const volatile __attribute__((opencl_private)) dataT *
516 __SYCL_GenericCastToPtrExplicit_ToPrivate(
const volatile void *Ptr) noexcept {
517 return (
const volatile __attribute__((opencl_private)) dataT *)
518 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
522 template <
typename dataT>
524 __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;
525 template <
typename dataT>
527 __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next,
528 uint32_t Delta) noexcept;
529 template <
typename dataT>
531 __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current,
532 uint32_t Delta) noexcept;
533 template <
typename dataT>
535 __spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept;
537 template <
typename dataT>
539 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
540 uint8_t *Ptr) noexcept;
542 template <
typename dataT>
544 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint8_t *Ptr,
545 dataT Data) noexcept;
547 template <
typename dataT>
549 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
550 uint16_t *Ptr) noexcept;
552 template <
typename dataT>
554 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint16_t *Ptr,
555 dataT Data) noexcept;
557 template <
typename dataT>
559 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
560 uint32_t *Ptr) noexcept;
562 template <
typename dataT>
564 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint32_t *Ptr,
565 dataT Data) noexcept;
567 template <
typename dataT>
569 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
570 uint64_t *Ptr) noexcept;
572 template <
typename dataT>
574 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint64_t *Ptr,
575 dataT Data) noexcept;
576 template <
int W,
int rW>
578 __spirv_FixedSqrtINTEL(sycl::detail::ap_int<W> a,
bool S, int32_t I, int32_t rI,
579 int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
580 template <
int W,
int rW>
582 __spirv_FixedRecipINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I,
583 int32_t rI, int32_t Quantization = 0,
584 int32_t Overflow = 0) noexcept;
585 template <
int W,
int rW>
587 __spirv_FixedRsqrtINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I,
588 int32_t rI, int32_t Quantization = 0,
589 int32_t Overflow = 0) noexcept;
590 template <
int W,
int rW>
592 __spirv_FixedSinINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I, int32_t rI,
593 int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
594 template <
int W,
int rW>
596 __spirv_FixedCosINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I, int32_t rI,
597 int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
598 template <
int W,
int rW>
600 __spirv_FixedSinCosINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I,
601 int32_t rI, int32_t Quantization = 0,
602 int32_t Overflow = 0) noexcept;
603 template <
int W,
int rW>
605 __spirv_FixedSinPiINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I,
606 int32_t rI, int32_t Quantization = 0,
607 int32_t Overflow = 0) noexcept;
608 template <
int W,
int rW>
610 __spirv_FixedCosPiINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I,
611 int32_t rI, int32_t Quantization = 0,
612 int32_t Overflow = 0) noexcept;
613 template <
int W,
int rW>
615 __spirv_FixedSinCosPiINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I,
616 int32_t rI, int32_t Quantization = 0,
617 int32_t Overflow = 0) noexcept;
618 template <
int W,
int rW>
620 __spirv_FixedLogINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I, int32_t rI,
621 int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
622 template <
int W,
int rW>
624 __spirv_FixedExpINTEL(
sycl::
detail::ap_int<W> a,
bool S, int32_t I, int32_t rI,
625 int32_t Quantization = 0, int32_t Overflow = 0) noexcept;
630 template <
int WA,
int Wout>
632 __spirv_ArbitraryFloatCastINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
633 int32_t Mout, int32_t EnableSubnormals = 0,
634 int32_t RoundingMode = 0,
635 int32_t RoundingAccuracy = 0) noexcept;
637 template <
int WA,
int Wout>
639 __spirv_ArbitraryFloatCastFromIntINTEL(
sycl::
detail::ap_int<WA> A, int32_t Mout,
640 bool FromSign = false,
641 int32_t EnableSubnormals = 0,
642 int32_t RoundingMode = 0,
643 int32_t RoundingAccuracy = 0) noexcept;
645 template <
int WA,
int Wout>
647 __spirv_ArbitraryFloatCastToIntINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
649 int32_t EnableSubnormals = 0,
650 int32_t RoundingMode = 0,
651 int32_t RoundingAccuracy = 0) noexcept;
653 template <
int WA,
int WB,
int Wout>
655 __spirv_ArbitraryFloatAddINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
657 int32_t Mout, int32_t EnableSubnormals = 0,
658 int32_t RoundingMode = 0,
659 int32_t RoundingAccuracy = 0) noexcept;
661 template <
int WA,
int WB,
int Wout>
663 __spirv_ArbitraryFloatSubINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
665 int32_t Mout, int32_t EnableSubnormals = 0,
666 int32_t RoundingMode = 0,
667 int32_t RoundingAccuracy = 0) noexcept;
669 template <
int WA,
int WB,
int Wout>
671 __spirv_ArbitraryFloatMulINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
673 int32_t Mout, int32_t EnableSubnormals = 0,
674 int32_t RoundingMode = 0,
675 int32_t RoundingAccuracy = 0) noexcept;
677 template <
int WA,
int WB,
int Wout>
679 __spirv_ArbitraryFloatDivINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
681 int32_t Mout, int32_t EnableSubnormals = 0,
682 int32_t RoundingMode = 0,
683 int32_t RoundingAccuracy = 0) noexcept;
687 template <
int WA,
int WB>
689 __spirv_ArbitraryFloatGTINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
690 sycl::
detail::ap_int<WB> B, int32_t MB) noexcept;
692 template <
int WA,
int WB>
694 __spirv_ArbitraryFloatGEINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
695 sycl::
detail::ap_int<WB> B, int32_t MB) noexcept;
697 template <
int WA,
int WB>
699 __spirv_ArbitraryFloatLTINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
700 sycl::
detail::ap_int<WB> B, int32_t MB) noexcept;
702 template <
int WA,
int WB>
704 __spirv_ArbitraryFloatLEINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
705 sycl::
detail::ap_int<WB> B, int32_t MB) noexcept;
707 template <
int WA,
int WB>
709 __spirv_ArbitraryFloatEQINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
710 sycl::
detail::ap_int<WB> B, int32_t MB) noexcept;
712 template <
int WA,
int Wout>
714 __spirv_ArbitraryFloatRecipINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
715 int32_t Mout, int32_t EnableSubnormals = 0,
716 int32_t RoundingMode = 0,
717 int32_t RoundingAccuracy = 0) noexcept;
719 template <
int WA,
int Wout>
721 __spirv_ArbitraryFloatRSqrtINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
722 int32_t Mout, int32_t EnableSubnormals = 0,
723 int32_t RoundingMode = 0,
724 int32_t RoundingAccuracy = 0) noexcept;
726 template <
int WA,
int Wout>
728 __spirv_ArbitraryFloatCbrtINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
729 int32_t Mout, int32_t EnableSubnormals = 0,
730 int32_t RoundingMode = 0,
731 int32_t RoundingAccuracy = 0) noexcept;
733 template <
int WA,
int WB,
int Wout>
735 __spirv_ArbitraryFloatHypotINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
737 int32_t Mout, int32_t EnableSubnormals = 0,
738 int32_t RoundingMode = 0,
739 int32_t RoundingAccuracy = 0) noexcept;
741 template <
int WA,
int Wout>
743 __spirv_ArbitraryFloatSqrtINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
744 int32_t Mout, int32_t EnableSubnormals = 0,
745 int32_t RoundingMode = 0,
746 int32_t RoundingAccuracy = 0) noexcept;
748 template <
int WA,
int Wout>
750 __spirv_ArbitraryFloatLogINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
751 int32_t Mout, int32_t EnableSubnormals = 0,
752 int32_t RoundingMode = 0,
753 int32_t RoundingAccuracy = 0) noexcept;
755 template <
int WA,
int Wout>
757 __spirv_ArbitraryFloatLog2INTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
758 int32_t Mout, int32_t EnableSubnormals = 0,
759 int32_t RoundingMode = 0,
760 int32_t RoundingAccuracy = 0) noexcept;
762 template <
int WA,
int Wout>
764 __spirv_ArbitraryFloatLog10INTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
765 int32_t Mout, int32_t EnableSubnormals = 0,
766 int32_t RoundingMode = 0,
767 int32_t RoundingAccuracy = 0) noexcept;
769 template <
int WA,
int Wout>
771 __spirv_ArbitraryFloatLog1pINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
772 int32_t Mout, int32_t EnableSubnormals = 0,
773 int32_t RoundingMode = 0,
774 int32_t RoundingAccuracy = 0) noexcept;
776 template <
int WA,
int Wout>
778 __spirv_ArbitraryFloatExpINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
779 int32_t Mout, int32_t EnableSubnormals = 0,
780 int32_t RoundingMode = 0,
781 int32_t RoundingAccuracy = 0) noexcept;
783 template <
int WA,
int Wout>
785 __spirv_ArbitraryFloatExp2INTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
786 int32_t Mout, int32_t EnableSubnormals = 0,
787 int32_t RoundingMode = 0,
788 int32_t RoundingAccuracy = 0) noexcept;
790 template <
int WA,
int Wout>
792 __spirv_ArbitraryFloatExp10INTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
793 int32_t Mout, int32_t EnableSubnormals = 0,
794 int32_t RoundingMode = 0,
795 int32_t RoundingAccuracy = 0) noexcept;
797 template <
int WA,
int Wout>
799 __spirv_ArbitraryFloatExpm1INTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
800 int32_t Mout, int32_t EnableSubnormals = 0,
801 int32_t RoundingMode = 0,
802 int32_t RoundingAccuracy = 0) noexcept;
804 template <
int WA,
int Wout>
806 __spirv_ArbitraryFloatSinINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
807 int32_t Mout, int32_t EnableSubnormals = 0,
808 int32_t RoundingMode = 0,
809 int32_t RoundingAccuracy = 0) noexcept;
811 template <
int WA,
int Wout>
813 __spirv_ArbitraryFloatCosINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
814 int32_t Mout, int32_t EnableSubnormals = 0,
815 int32_t RoundingMode = 0,
816 int32_t RoundingAccuracy = 0) noexcept;
820 template <
int WA,
int Wout>
822 __spirv_ArbitraryFloatSinCosINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
823 int32_t Mout, int32_t EnableSubnormals = 0,
824 int32_t RoundingMode = 0,
825 int32_t RoundingAccuracy = 0) noexcept;
827 template <
int WA,
int Wout>
829 __spirv_ArbitraryFloatSinPiINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
830 int32_t Mout, int32_t EnableSubnormals = 0,
831 int32_t RoundingMode = 0,
832 int32_t RoundingAccuracy = 0) noexcept;
834 template <
int WA,
int Wout>
836 __spirv_ArbitraryFloatCosPiINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
837 int32_t Mout, int32_t EnableSubnormals = 0,
838 int32_t RoundingMode = 0,
839 int32_t RoundingAccuracy = 0) noexcept;
843 template <
int WA,
int Wout>
845 __spirv_ArbitraryFloatSinCosPiINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
846 int32_t Mout, int32_t EnableSubnormals = 0,
847 int32_t RoundingMode = 0,
848 int32_t RoundingAccuracy = 0) noexcept;
850 template <
int WA,
int Wout>
852 __spirv_ArbitraryFloatASinINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
853 int32_t Mout, int32_t EnableSubnormals = 0,
854 int32_t RoundingMode = 0,
855 int32_t RoundingAccuracy = 0) noexcept;
857 template <
int WA,
int Wout>
859 __spirv_ArbitraryFloatASinPiINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
860 int32_t Mout, int32_t EnableSubnormals = 0,
861 int32_t RoundingMode = 0,
862 int32_t RoundingAccuracy = 0) noexcept;
864 template <
int WA,
int Wout>
866 __spirv_ArbitraryFloatACosINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
867 int32_t Mout, int32_t EnableSubnormals = 0,
868 int32_t RoundingMode = 0,
869 int32_t RoundingAccuracy = 0) noexcept;
871 template <
int WA,
int Wout>
873 __spirv_ArbitraryFloatACosPiINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
874 int32_t Mout, int32_t EnableSubnormals = 0,
875 int32_t RoundingMode = 0,
876 int32_t RoundingAccuracy = 0) noexcept;
878 template <
int WA,
int Wout>
880 __spirv_ArbitraryFloatATanINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
881 int32_t Mout, int32_t EnableSubnormals = 0,
882 int32_t RoundingMode = 0,
883 int32_t RoundingAccuracy = 0) noexcept;
885 template <
int WA,
int Wout>
887 __spirv_ArbitraryFloatATanPiINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
888 int32_t Mout, int32_t EnableSubnormals = 0,
889 int32_t RoundingMode = 0,
890 int32_t RoundingAccuracy = 0) noexcept;
892 template <
int WA,
int WB,
int Wout>
894 __spirv_ArbitraryFloatATan2INTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
896 int32_t Mout, int32_t EnableSubnormals = 0,
897 int32_t RoundingMode = 0,
898 int32_t RoundingAccuracy = 0) noexcept;
900 template <
int WA,
int WB,
int Wout>
902 __spirv_ArbitraryFloatPowINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
904 int32_t Mout, int32_t EnableSubnormals = 0,
905 int32_t RoundingMode = 0,
906 int32_t RoundingAccuracy = 0) noexcept;
908 template <
int WA,
int WB,
int Wout>
910 __spirv_ArbitraryFloatPowRINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
912 int32_t Mout, int32_t EnableSubnormals = 0,
913 int32_t RoundingMode = 0,
914 int32_t RoundingAccuracy = 0) noexcept;
919 template <
int WA,
int WB,
int Wout>
921 __spirv_ArbitraryFloatPowNINTEL(
sycl::
detail::ap_int<WA> A, int32_t MA,
923 int32_t Mout, int32_t EnableSubnormals = 0,
924 int32_t RoundingMode = 0,
925 int32_t RoundingAccuracy = 0) noexcept;
927 template <typename dataT>
929 __spirv_ReadPipe(__ocl_RPipeTy<dataT> Pipe, dataT *Data, int32_t Size,
931 template <typename dataT>
933 __spirv_WritePipe(__ocl_WPipeTy<dataT> Pipe, const dataT *Data, int32_t Size,
935 template <typename dataT>
937 __spirv_ReadPipeBlockingINTEL(__ocl_RPipeTy<dataT> Pipe, dataT *Data,
938 int32_t Size, int32_t
Alignment) noexcept;
939 template <typename dataT>
941 __spirv_WritePipeBlockingINTEL(__ocl_WPipeTy<dataT> Pipe, const dataT *Data,
942 int32_t Size, int32_t
Alignment) noexcept;
943 template <typename dataT>
945 __spirv_CreatePipeFromPipeStorage_read(
946 const ConstantPipeStorage *Storage) noexcept;
947 template <typename dataT>
949 __spirv_CreatePipeFromPipeStorage_write(
950 const ConstantPipeStorage *Storage) noexcept;
954 size_t NumBytes) noexcept;
957 __spirv_ConvertFToBF16INTEL(
float) noexcept;
959 __spirv_ConvertBF16ToFINTEL(uint16_t) noexcept;
962 __SYCL_EXPORT __ocl_vec_t<uint32_t, 4>
963 __spirv_GroupNonUniformBallot(uint32_t Execution,
bool Predicate) noexcept;
968 __spirv_GroupNonUniformBallotBitCount(
__spv::Scope::Flag,
int,
969 __ocl_vec_t<uint32_t, 4>) noexcept;
972 __spirv_GroupNonUniformBallotFindLSB(
__spv::Scope::Flag,
973 __ocl_vec_t<uint32_t, 4>) noexcept;
975 template <typename ValueT, typename IdT>
977 __spirv_GroupNonUniformBroadcast(
__spv::Scope::Flag, ValueT, IdT);
979 template <typename ValueT, typename IdT>
981 __spirv_GroupNonUniformShuffle(
__spv::Scope::Flag, ValueT, IdT) noexcept;
984 __spirv_GroupNonUniformAll(
__spv::Scope::Flag,
bool);
987 __spirv_GroupNonUniformAny(
__spv::Scope::Flag,
bool);
989 template <typename ValueT>
991 __spirv_GroupNonUniformSMin(
__spv::Scope::Flag,
unsigned int, ValueT);
993 template <typename ValueT>
995 __spirv_GroupNonUniformUMin(
__spv::Scope::Flag,
unsigned int, ValueT);
997 template <typename ValueT>
999 __spirv_GroupNonUniformFMin(
__spv::Scope::Flag,
unsigned int, ValueT);
1001 template <typename ValueT>
1003 __spirv_GroupNonUniformSMax(
__spv::Scope::Flag,
unsigned int, ValueT);
1005 template <typename ValueT>
1007 __spirv_GroupNonUniformUMax(
__spv::Scope::Flag,
unsigned int, ValueT);
1009 template <typename ValueT>
1011 __spirv_GroupNonUniformFMax(
__spv::Scope::Flag,
unsigned int, ValueT);
1013 template <typename ValueT>
1015 __spirv_GroupNonUniformIAdd(
__spv::Scope::Flag,
unsigned int, ValueT);
1017 template <typename ValueT>
1019 __spirv_GroupNonUniformFAdd(
__spv::Scope::Flag,
unsigned int, ValueT);
1021 template <typename ValueT>
1023 __spirv_GroupNonUniformIMul(
__spv::Scope::Flag,
unsigned int, ValueT);
1025 template <typename ValueT>
1027 __spirv_GroupNonUniformFMul(
__spv::Scope::Flag,
unsigned int, ValueT);
1029 template <typename ValueT>
1031 __spirv_GroupNonUniformBitwiseOr(
__spv::Scope::Flag,
unsigned int, ValueT);
1033 template <typename ValueT>
1035 __spirv_GroupNonUniformBitwiseXor(
__spv::Scope::Flag,
unsigned int, ValueT);
1037 template <typename ValueT>
1039 __spirv_GroupNonUniformBitwiseAnd(
__spv::Scope::Flag,
unsigned int, ValueT);
1041 template <typename ValueT>
1043 __spirv_GroupNonUniformSMin(
__spv::Scope::Flag,
unsigned int, ValueT,
1046 template <typename ValueT>
1048 __spirv_GroupNonUniformUMin(
__spv::Scope::Flag,
unsigned int, ValueT,
1051 template <typename ValueT>
1053 __spirv_GroupNonUniformFMin(
__spv::Scope::Flag,
unsigned int, ValueT,
1056 template <typename ValueT>
1058 __spirv_GroupNonUniformSMax(
__spv::Scope::Flag,
unsigned int, ValueT,
1061 template <typename ValueT>
1063 __spirv_GroupNonUniformUMax(
__spv::Scope::Flag,
unsigned int, ValueT,
1066 template <typename ValueT>
1068 __spirv_GroupNonUniformFMax(
__spv::Scope::Flag,
unsigned int, ValueT,
1071 template <typename ValueT>
1073 __spirv_GroupNonUniformIAdd(
__spv::Scope::Flag,
unsigned int, ValueT,
1076 template <typename ValueT>
1078 __spirv_GroupNonUniformFAdd(
__spv::Scope::Flag,
unsigned int, ValueT,
1081 template <typename ValueT>
1083 __spirv_GroupNonUniformIMul(
__spv::Scope::Flag,
unsigned int, ValueT,
1086 template <typename ValueT>
1088 __spirv_GroupNonUniformFMul(
__spv::Scope::Flag,
unsigned int, ValueT,
1091 template <typename ValueT>
1093 __spirv_GroupNonUniformBitwiseOr(
__spv::Scope::Flag,
unsigned int, ValueT,
1096 template <typename ValueT>
1098 __spirv_GroupNonUniformBitwiseXor(
__spv::Scope::Flag,
unsigned int, ValueT,
1101 template <typename ValueT>
1103 __spirv_GroupNonUniformBitwiseAnd(
__spv::Scope::Flag,
unsigned int, ValueT,
1107 __clc_BarrierInitialize(
int64_t *state, int32_t expected_count) noexcept;
1110 __clc_BarrierInvalidate(
int64_t *state) noexcept;
1113 __clc_BarrierArrive(
int64_t *state) noexcept;
1116 __clc_BarrierArriveAndDrop(
int64_t *state) noexcept;
1119 __clc_BarrierArriveNoComplete(
int64_t *state, int32_t count) noexcept;
1122 __clc_BarrierArriveAndDropNoComplete(
int64_t *state, int32_t count) noexcept;
1125 __clc_BarrierCopyAsyncArrive(
int64_t *state) noexcept;
1128 __clc_BarrierCopyAsyncArriveNoInc(
int64_t *state) noexcept;
1137 __clc_BarrierArriveAndWait(
int64_t *state) noexcept;
1139 #ifdef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
1140 template <
typename... Args>
1142 __spirv_ocl_printf(
const __attribute__((opencl_constant))
char *Format,
1144 template <
typename... Args>
1149 __spirv_ocl_printf(
const __attribute__((opencl_constant))
char *Format, ...);
1157 __clc_native_tanh(__ocl_vec_t<float, 2>);
1159 __clc_native_tanh(__ocl_vec_t<float, 3>);
1161 __clc_native_tanh(__ocl_vec_t<float, 4>);
1163 __clc_native_tanh(__ocl_vec_t<float, 8>);
1165 __clc_native_tanh(__ocl_vec_t<float, 16>);
1169 __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
1171 __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
1173 __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
1175 __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
1177 __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
1181 __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
1183 __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
1185 __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
1187 __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
1189 __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
1191 #define __CLC_BF16(...) \
1192 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \
1193 __VA_ARGS__) noexcept; \
1194 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \
1195 __VA_ARGS__, __VA_ARGS__) noexcept; \
1196 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \
1197 __VA_ARGS__, __VA_ARGS__) noexcept; \
1198 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \
1199 __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept;
1201 #define __CLC_BF16_SCAL_VEC(TYPE) \
1203 __CLC_BF16(__ocl_vec_t<TYPE, 2>) \
1204 __CLC_BF16(__ocl_vec_t<TYPE, 3>) \
1205 __CLC_BF16(__ocl_vec_t<TYPE, 4>) \
1206 __CLC_BF16(__ocl_vec_t<TYPE, 8>) \
1207 __CLC_BF16(__ocl_vec_t<TYPE, 16>)
1209 __CLC_BF16_SCAL_VEC(uint16_t)
1210 __CLC_BF16_SCAL_VEC(uint32_t)
1212 #undef __CLC_BF16_SCAL_VEC
1218 template <
typename from,
typename to>
1220 std::enable_if_t<std::is_integral_v<to> && std::is_unsigned_v<to>, to>
1221 __spirv_ConvertPtrToU(from val) noexcept;
1223 #else // if !__SYCL_DEVICE_ONLY__
1225 template <
typename dataT>
1228 const dataT *Src,
size_t NumElements,
1230 for (
size_t i = 0; i < NumElements; i++) {
1231 Dest[i] = Src[i * Stride];
1237 template <
typename dataT>
1240 const dataT *Src,
size_t NumElements,
1242 for (
size_t i = 0; i < NumElements; i++) {
1243 Dest[i * Stride] = Src[i];
1250 size_t NumBytes) noexcept;
1254 uint32_t Semantics) noexcept;
1262 #endif // !__SYCL_DEVICE_ONLY__