17 #include <type_traits>
20 #ifdef __SYCL_DEVICE_ONLY__
21 #define __SYCL_CONVERGENT__ __attribute__((convergent))
23 #define __SYCL_CONVERGENT__
26 #ifdef __SYCL_DEVICE_ONLY__
30 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
36 __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
40 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
49 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
55 __spirv_CooperativeMatrixConstructCheckedINTEL(int32_t CoordX,
61 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
67 __spirv_CooperativeMatrixLoadCheckedINTEL(
69 uint32_t Height = 0, uint32_t Width = 0, std::size_t Stride = 0,
72 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
77 T *Ptr, int32_t CoordX, int32_t CoordY,
80 std::size_t Stride = 0,
int MemOperand = 0);
82 template <
typename TA,
typename TB,
typename TC, std::size_t M, std::size_t K,
91 __spirv_JointMatrixMadINTEL(
97 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
106 __spirv_JointMatrixUUMadINTEL(
112 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
121 __spirv_JointMatrixUSMadINTEL(
127 template <
typename T1,
typename T2,
typename T3, std::size_t M, std::size_t K,
136 __spirv_JointMatrixSUMadINTEL(
142 template <
typename T,
typename Tp, std::size_t R, std::size_t C,
148 __spirv_CompositeConstruct(
const T v);
154 __spirv_JointMatrixGetElementCoordINTEL(
163 template <
typename Ts,
typename T, std::size_t R, std::size_t C,
170 template <
typename Ts,
typename T, std::size_t R, std::size_t C,
178 template <
typename T>
180 T *Ptr, uint32_t NumRows, uint32_t NumCols,
unsigned int CacheLevel,
183 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
185 "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag."
188 template <
typename RetT,
typename ImageT>
191 template <
typename RetT,
typename ImageT>
194 template <
typename RetT,
typename ImageT>
197 template <
typename ImageT,
typename CoordT,
typename ValT>
200 template <
class RetT,
typename ImageT,
typename TempArgT>
203 template <
class RetT,
typename ImageT,
typename TempArgT>
206 template <
class RetT,
typename ImageT,
typename TempArgT>
209 template <
class RetT,
typename ImageT,
typename TempArgT>
213 template <
class RetT,
typename ImageT,
typename TempArgT>
217 template <
class RetT,
typename ImageT,
typename TempArgT>
220 template <
typename ImageT,
typename CoordT,
typename ValT>
224 template <
typename ImageT,
typename SampledType>
228 template <
typename SampledType,
typename TempRetT,
typename TempArgT>
230 __spirv_ImageSampleExplicitLod(SampledType, TempArgT,
int,
float);
232 template <
typename SampledType,
typename TempRetT,
typename TempArgT>
234 __spirv_ImageSampleExplicitLod(SampledType, TempArgT,
int, TempArgT, TempArgT);
236 template <
typename SampledType,
typename TempRetT,
typename TempArgT>
240 template <
typename RetT,
class HandleT>
243 template <
typename RetT,
class HandleT>
246 template <
typename RetT,
class HandleT>
248 RetT __spirv_ConvertHandleToSampledImageINTEL(HandleT);
250 #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy
251 #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy
254 #define __SPIRV_ATOMIC_LOAD(AS, Type) \
255 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \
256 AS const Type *P, __spv::Scope::Flag S, \
257 __spv::MemorySemanticsMask::Flag O);
258 #define __SPIRV_ATOMIC_STORE(AS, Type) \
259 extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \
260 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
262 #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \
263 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \
264 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
266 #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
267 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \
268 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \
269 __spv::MemorySemanticsMask::Flag U, Type V, Type C);
270 #define __SPIRV_ATOMIC_IADD(AS, Type) \
271 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \
272 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
274 #define __SPIRV_ATOMIC_ISUB(AS, Type) \
275 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \
276 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
278 #define __SPIRV_ATOMIC_FADD(AS, Type) \
279 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \
280 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
282 #define __SPIRV_ATOMIC_SMIN(AS, Type) \
283 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \
284 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
286 #define __SPIRV_ATOMIC_UMIN(AS, Type) \
287 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \
288 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
290 #define __SPIRV_ATOMIC_FMIN(AS, Type) \
291 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
292 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
294 #define __SPIRV_ATOMIC_SMAX(AS, Type) \
295 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \
296 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
298 #define __SPIRV_ATOMIC_UMAX(AS, Type) \
299 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \
300 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
302 #define __SPIRV_ATOMIC_FMAX(AS, Type) \
303 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
304 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
306 #define __SPIRV_ATOMIC_AND(AS, Type) \
307 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \
308 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
310 #define __SPIRV_ATOMIC_OR(AS, Type) \
311 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \
312 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
314 #define __SPIRV_ATOMIC_XOR(AS, Type) \
315 extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \
316 AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
319 #define __SPIRV_ATOMIC_FLOAT(AS, Type) \
320 __SPIRV_ATOMIC_FADD(AS, Type) \
321 __SPIRV_ATOMIC_FMIN(AS, Type) \
322 __SPIRV_ATOMIC_FMAX(AS, Type) \
323 __SPIRV_ATOMIC_LOAD(AS, Type) \
324 __SPIRV_ATOMIC_STORE(AS, Type) \
325 __SPIRV_ATOMIC_EXCHANGE(AS, Type)
327 #define __SPIRV_ATOMIC_BASE(AS, Type) \
328 __SPIRV_ATOMIC_FLOAT(AS, Type) \
329 __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
330 __SPIRV_ATOMIC_IADD(AS, Type) \
331 __SPIRV_ATOMIC_ISUB(AS, Type) \
332 __SPIRV_ATOMIC_AND(AS, Type) \
333 __SPIRV_ATOMIC_OR(AS, Type) \
334 __SPIRV_ATOMIC_XOR(AS, Type)
336 #define __SPIRV_ATOMIC_SIGNED(AS, Type) \
337 __SPIRV_ATOMIC_BASE(AS, Type) \
338 __SPIRV_ATOMIC_SMIN(AS, Type) \
339 __SPIRV_ATOMIC_SMAX(AS, Type)
341 #define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \
342 __SPIRV_ATOMIC_BASE(AS, Type) \
343 __SPIRV_ATOMIC_UMIN(AS, Type) \
344 __SPIRV_ATOMIC_UMAX(AS, Type)
348 #define __SPIRV_ATOMIC_MINMAX(AS, Op) \
349 template <typename T> \
350 typename std::enable_if_t< \
351 std::is_integral<T>::value && std::is_signed<T>::value, T> \
352 __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
353 __spv::MemorySemanticsMask::Flag Semantics, \
355 return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
357 template <typename T> \
358 typename std::enable_if_t< \
359 std::is_integral<T>::value && !std::is_signed<T>::value, T> \
360 __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
361 __spv::MemorySemanticsMask::Flag Semantics, \
363 return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
365 template <typename T> \
366 typename std::enable_if_t<std::is_floating_point<T>::value, T> \
367 __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
368 __spv::MemorySemanticsMask::Flag Semantics, \
370 return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
373 #define __SPIRV_ATOMICS(macro, Arg) \
374 macro(__attribute__((opencl_global)), Arg) \
375 macro(__attribute__((opencl_local)), Arg) macro(, Arg)
377 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT,
float)
378 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT,
double)
379 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED,
int)
380 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED,
long)
381 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED,
long long)
382 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED,
unsigned int)
383 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED,
unsigned long)
384 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED,
unsigned long long)
385 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
386 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
388 #undef __SPIRV_ATOMICS
389 #undef __SPIRV_ATOMIC_AND
390 #undef __SPIRV_ATOMIC_BASE
391 #undef __SPIRV_ATOMIC_CMP_EXCHANGE
392 #undef __SPIRV_ATOMIC_EXCHANGE
393 #undef __SPIRV_ATOMIC_FADD
394 #undef __SPIRV_ATOMIC_FLOAT
395 #undef __SPIRV_ATOMIC_FMAX
396 #undef __SPIRV_ATOMIC_FMIN
397 #undef __SPIRV_ATOMIC_IADD
398 #undef __SPIRV_ATOMIC_ISUB
399 #undef __SPIRV_ATOMIC_LOAD
400 #undef __SPIRV_ATOMIC_MINMAX
401 #undef __SPIRV_ATOMIC_OR
402 #undef __SPIRV_ATOMIC_SIGNED
403 #undef __SPIRV_ATOMIC_SMAX
404 #undef __SPIRV_ATOMIC_SMIN
405 #undef __SPIRV_ATOMIC_STORE
406 #undef __SPIRV_ATOMIC_UMAX
407 #undef __SPIRV_ATOMIC_UMIN
408 #undef __SPIRV_ATOMIC_UNSIGNED
409 #undef __SPIRV_ATOMIC_XOR
411 template <
typename dataT>
413 __SYCL_GenericCastToPtrExplicit_ToGlobal(
void *Ptr)
noexcept {
415 __spirv_GenericCastToPtrExplicit_ToGlobal(
419 template <
typename dataT>
421 __SYCL_GenericCastToPtrExplicit_ToGlobal(
const void *Ptr)
noexcept {
423 __spirv_GenericCastToPtrExplicit_ToGlobal(
427 template <
typename dataT>
429 __SYCL_GenericCastToPtrExplicit_ToGlobal(
volatile void *Ptr)
noexcept {
431 __spirv_GenericCastToPtrExplicit_ToGlobal(
435 template <
typename dataT>
437 __SYCL_GenericCastToPtrExplicit_ToGlobal(
const volatile void *Ptr)
noexcept {
438 return (
const volatile __attribute__((opencl_global)) dataT *)
439 __spirv_GenericCastToPtrExplicit_ToGlobal(
443 template <
typename dataT>
445 __SYCL_GenericCastToPtrExplicit_ToLocal(
void *Ptr)
noexcept {
447 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
451 template <
typename dataT>
453 __SYCL_GenericCastToPtrExplicit_ToLocal(
const void *Ptr)
noexcept {
455 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
459 template <
typename dataT>
461 __SYCL_GenericCastToPtrExplicit_ToLocal(
volatile void *Ptr)
noexcept {
463 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
467 template <
typename dataT>
469 __SYCL_GenericCastToPtrExplicit_ToLocal(
const volatile void *Ptr)
noexcept {
470 return (
const volatile __attribute__((opencl_local)) dataT *)
471 __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
475 template <
typename dataT>
477 __SYCL_GenericCastToPtrExplicit_ToPrivate(
void *Ptr)
noexcept {
479 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
483 template <
typename dataT>
485 __SYCL_GenericCastToPtrExplicit_ToPrivate(
const void *Ptr)
noexcept {
487 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
491 template <
typename dataT>
493 __SYCL_GenericCastToPtrExplicit_ToPrivate(
volatile void *Ptr)
noexcept {
495 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
499 template <
typename dataT>
500 extern const volatile __attribute__((opencl_private)) dataT *
501 __SYCL_GenericCastToPtrExplicit_ToPrivate(
const volatile void *Ptr)
noexcept {
502 return (
const volatile __attribute__((opencl_private)) dataT *)
503 __spirv_GenericCastToPtrExplicit_ToPrivate(Ptr,
507 template <
typename dataT>
509 __SYCL_GenericCastToPtr_ToGlobal(
void *Ptr)
noexcept {
511 __spirv_GenericCastToPtr_ToGlobal(Ptr,
515 template <
typename dataT>
517 __SYCL_GenericCastToPtr_ToGlobal(
const void *Ptr)
noexcept {
519 __spirv_GenericCastToPtr_ToGlobal(Ptr,
523 template <
typename dataT>
525 __SYCL_GenericCastToPtr_ToGlobal(
volatile void *Ptr)
noexcept {
527 __spirv_GenericCastToPtr_ToGlobal(Ptr,
531 template <
typename dataT>
533 __SYCL_GenericCastToPtr_ToGlobal(
const volatile void *Ptr)
noexcept {
534 return (
const volatile __attribute__((opencl_global)) dataT *)
535 __spirv_GenericCastToPtr_ToGlobal(Ptr,
539 template <
typename dataT>
541 __SYCL_GenericCastToPtr_ToLocal(
void *Ptr)
noexcept {
546 template <
typename dataT>
548 __SYCL_GenericCastToPtr_ToLocal(
const void *Ptr)
noexcept {
553 template <
typename dataT>
555 __SYCL_GenericCastToPtr_ToLocal(
volatile void *Ptr)
noexcept {
560 template <
typename dataT>
562 __SYCL_GenericCastToPtr_ToLocal(
const volatile void *Ptr)
noexcept {
563 return (
const volatile __attribute__((opencl_local)) dataT *)
567 template <
typename dataT>
569 __SYCL_GenericCastToPtr_ToPrivate(
void *Ptr)
noexcept {
574 template <
typename dataT>
576 __SYCL_GenericCastToPtr_ToPrivate(
const void *Ptr)
noexcept {
581 template <
typename dataT>
583 __SYCL_GenericCastToPtr_ToPrivate(
volatile void *Ptr)
noexcept {
588 template <
typename dataT>
589 extern const volatile __attribute__((opencl_private)) dataT *
590 __SYCL_GenericCastToPtr_ToPrivate(
const volatile void *Ptr)
noexcept {
591 return (
const volatile __attribute__((opencl_private)) dataT *)
595 template <
typename dataT>
597 __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId)
noexcept;
598 template <
typename dataT>
600 __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next,
602 template <
typename dataT>
604 __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current,
606 template <
typename dataT>
608 __spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value)
noexcept;
610 template <
typename dataT>
612 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
615 template <
typename dataT>
617 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint8_t *Ptr,
620 template <
typename dataT>
622 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
625 template <
typename dataT>
627 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint16_t *Ptr,
630 template <
typename dataT>
632 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
635 template <
typename dataT>
637 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint32_t *Ptr,
640 template <
typename dataT>
642 __spirv_SubgroupBlockReadINTEL(
const __attribute__((opencl_global))
645 template <
typename dataT>
647 __spirv_SubgroupBlockWriteINTEL(
__attribute__((opencl_global)) uint64_t *Ptr,
649 template <
int W,
int rW>
651 __spirv_FixedSqrtINTEL(sycl::detail::ap_int<W>
a,
bool S, int32_t I, int32_t rI,
652 int32_t Quantization = 0, int32_t Overflow = 0)
noexcept;
653 template <
int W,
int rW>
655 __spirv_FixedRecipINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I,
656 int32_t rI, int32_t Quantization = 0,
658 template <
int W,
int rW>
660 __spirv_FixedRsqrtINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I,
661 int32_t rI, int32_t Quantization = 0,
663 template <
int W,
int rW>
665 __spirv_FixedSinINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I, int32_t rI,
666 int32_t Quantization = 0, int32_t Overflow = 0)
noexcept;
667 template <
int W,
int rW>
669 __spirv_FixedCosINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I, int32_t rI,
670 int32_t Quantization = 0, int32_t Overflow = 0)
noexcept;
671 template <
int W,
int rW>
673 __spirv_FixedSinCosINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I,
674 int32_t rI, int32_t Quantization = 0,
676 template <
int W,
int rW>
678 __spirv_FixedSinPiINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I,
679 int32_t rI, int32_t Quantization = 0,
681 template <
int W,
int rW>
683 __spirv_FixedCosPiINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I,
684 int32_t rI, int32_t Quantization = 0,
686 template <
int W,
int rW>
688 __spirv_FixedSinCosPiINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I,
689 int32_t rI, int32_t Quantization = 0,
691 template <
int W,
int rW>
693 __spirv_FixedLogINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I, int32_t rI,
694 int32_t Quantization = 0, int32_t Overflow = 0)
noexcept;
695 template <
int W,
int rW>
697 __spirv_FixedExpINTEL(
sycl::detail::ap_int<W>
a,
bool S, int32_t I, int32_t rI,
698 int32_t Quantization = 0, int32_t Overflow = 0)
noexcept;
703 template <
int WA,
int Wout>
705 __spirv_ArbitraryFloatCastINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
706 int32_t Mout, int32_t EnableSubnormals = 0,
707 int32_t RoundingMode = 0,
708 int32_t RoundingAccuracy = 0)
noexcept;
710 template <
int WA,
int Wout>
712 __spirv_ArbitraryFloatCastFromIntINTEL(
sycl::detail::ap_int<WA> A, int32_t Mout,
713 bool FromSign = false,
714 int32_t EnableSubnormals = 0,
715 int32_t RoundingMode = 0,
716 int32_t RoundingAccuracy = 0)
noexcept;
718 template <
int WA,
int Wout>
720 __spirv_ArbitraryFloatCastToIntINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
722 int32_t EnableSubnormals = 0,
723 int32_t RoundingMode = 0,
724 int32_t RoundingAccuracy = 0)
noexcept;
726 template <
int WA,
int WB,
int Wout>
728 __spirv_ArbitraryFloatAddINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
729 sycl::detail::ap_int<WB> B, int32_t MB,
730 int32_t Mout, int32_t EnableSubnormals = 0,
731 int32_t RoundingMode = 0,
732 int32_t RoundingAccuracy = 0)
noexcept;
734 template <
int WA,
int WB,
int Wout>
736 __spirv_ArbitraryFloatSubINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
737 sycl::detail::ap_int<WB> B, int32_t MB,
738 int32_t Mout, int32_t EnableSubnormals = 0,
739 int32_t RoundingMode = 0,
740 int32_t RoundingAccuracy = 0)
noexcept;
742 template <
int WA,
int WB,
int Wout>
744 __spirv_ArbitraryFloatMulINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
745 sycl::detail::ap_int<WB> B, int32_t MB,
746 int32_t Mout, int32_t EnableSubnormals = 0,
747 int32_t RoundingMode = 0,
748 int32_t RoundingAccuracy = 0)
noexcept;
750 template <
int WA,
int WB,
int Wout>
752 __spirv_ArbitraryFloatDivINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
753 sycl::detail::ap_int<WB> B, int32_t MB,
754 int32_t Mout, int32_t EnableSubnormals = 0,
755 int32_t RoundingMode = 0,
756 int32_t RoundingAccuracy = 0)
noexcept;
760 template <
int WA,
int WB>
762 __spirv_ArbitraryFloatGTINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
765 template <
int WA,
int WB>
767 __spirv_ArbitraryFloatGEINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
770 template <
int WA,
int WB>
772 __spirv_ArbitraryFloatLTINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
775 template <
int WA,
int WB>
777 __spirv_ArbitraryFloatLEINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
780 template <
int WA,
int WB>
782 __spirv_ArbitraryFloatEQINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
785 template <
int WA,
int Wout>
787 __spirv_ArbitraryFloatRecipINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
788 int32_t Mout, int32_t EnableSubnormals = 0,
789 int32_t RoundingMode = 0,
790 int32_t RoundingAccuracy = 0)
noexcept;
792 template <
int WA,
int Wout>
794 __spirv_ArbitraryFloatRSqrtINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
795 int32_t Mout, int32_t EnableSubnormals = 0,
796 int32_t RoundingMode = 0,
797 int32_t RoundingAccuracy = 0)
noexcept;
799 template <
int WA,
int Wout>
801 __spirv_ArbitraryFloatCbrtINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
802 int32_t Mout, int32_t EnableSubnormals = 0,
803 int32_t RoundingMode = 0,
804 int32_t RoundingAccuracy = 0)
noexcept;
806 template <
int WA,
int WB,
int Wout>
808 __spirv_ArbitraryFloatHypotINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
809 sycl::detail::ap_int<WB> B, int32_t MB,
810 int32_t Mout, int32_t EnableSubnormals = 0,
811 int32_t RoundingMode = 0,
812 int32_t RoundingAccuracy = 0)
noexcept;
814 template <
int WA,
int Wout>
816 __spirv_ArbitraryFloatSqrtINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
817 int32_t Mout, int32_t EnableSubnormals = 0,
818 int32_t RoundingMode = 0,
819 int32_t RoundingAccuracy = 0)
noexcept;
821 template <
int WA,
int Wout>
823 __spirv_ArbitraryFloatLogINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
824 int32_t Mout, int32_t EnableSubnormals = 0,
825 int32_t RoundingMode = 0,
826 int32_t RoundingAccuracy = 0)
noexcept;
828 template <
int WA,
int Wout>
830 __spirv_ArbitraryFloatLog2INTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
831 int32_t Mout, int32_t EnableSubnormals = 0,
832 int32_t RoundingMode = 0,
833 int32_t RoundingAccuracy = 0)
noexcept;
835 template <
int WA,
int Wout>
837 __spirv_ArbitraryFloatLog10INTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
838 int32_t Mout, int32_t EnableSubnormals = 0,
839 int32_t RoundingMode = 0,
840 int32_t RoundingAccuracy = 0)
noexcept;
842 template <
int WA,
int Wout>
844 __spirv_ArbitraryFloatLog1pINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
845 int32_t Mout, int32_t EnableSubnormals = 0,
846 int32_t RoundingMode = 0,
847 int32_t RoundingAccuracy = 0)
noexcept;
849 template <
int WA,
int Wout>
851 __spirv_ArbitraryFloatExpINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
852 int32_t Mout, int32_t EnableSubnormals = 0,
853 int32_t RoundingMode = 0,
854 int32_t RoundingAccuracy = 0)
noexcept;
856 template <
int WA,
int Wout>
858 __spirv_ArbitraryFloatExp2INTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
859 int32_t Mout, int32_t EnableSubnormals = 0,
860 int32_t RoundingMode = 0,
861 int32_t RoundingAccuracy = 0)
noexcept;
863 template <
int WA,
int Wout>
865 __spirv_ArbitraryFloatExp10INTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
866 int32_t Mout, int32_t EnableSubnormals = 0,
867 int32_t RoundingMode = 0,
868 int32_t RoundingAccuracy = 0)
noexcept;
870 template <
int WA,
int Wout>
872 __spirv_ArbitraryFloatExpm1INTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
873 int32_t Mout, int32_t EnableSubnormals = 0,
874 int32_t RoundingMode = 0,
875 int32_t RoundingAccuracy = 0)
noexcept;
877 template <
int WA,
int Wout>
879 __spirv_ArbitraryFloatSinINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
880 int32_t Mout, int32_t EnableSubnormals = 0,
881 int32_t RoundingMode = 0,
882 int32_t RoundingAccuracy = 0)
noexcept;
884 template <
int WA,
int Wout>
886 __spirv_ArbitraryFloatCosINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
887 int32_t Mout, int32_t EnableSubnormals = 0,
888 int32_t RoundingMode = 0,
889 int32_t RoundingAccuracy = 0)
noexcept;
893 template <
int WA,
int Wout>
895 __spirv_ArbitraryFloatSinCosINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
896 int32_t Mout, int32_t EnableSubnormals = 0,
897 int32_t RoundingMode = 0,
898 int32_t RoundingAccuracy = 0)
noexcept;
900 template <
int WA,
int Wout>
902 __spirv_ArbitraryFloatSinPiINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
903 int32_t Mout, int32_t EnableSubnormals = 0,
904 int32_t RoundingMode = 0,
905 int32_t RoundingAccuracy = 0)
noexcept;
907 template <
int WA,
int Wout>
909 __spirv_ArbitraryFloatCosPiINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
910 int32_t Mout, int32_t EnableSubnormals = 0,
911 int32_t RoundingMode = 0,
912 int32_t RoundingAccuracy = 0)
noexcept;
916 template <
int WA,
int Wout>
918 __spirv_ArbitraryFloatSinCosPiINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
919 int32_t Mout, int32_t EnableSubnormals = 0,
920 int32_t RoundingMode = 0,
921 int32_t RoundingAccuracy = 0)
noexcept;
923 template <
int WA,
int Wout>
925 __spirv_ArbitraryFloatASinINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
926 int32_t Mout, int32_t EnableSubnormals = 0,
927 int32_t RoundingMode = 0,
928 int32_t RoundingAccuracy = 0)
noexcept;
930 template <
int WA,
int Wout>
932 __spirv_ArbitraryFloatASinPiINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
933 int32_t Mout, int32_t EnableSubnormals = 0,
934 int32_t RoundingMode = 0,
935 int32_t RoundingAccuracy = 0)
noexcept;
937 template <
int WA,
int Wout>
939 __spirv_ArbitraryFloatACosINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
940 int32_t Mout, int32_t EnableSubnormals = 0,
941 int32_t RoundingMode = 0,
942 int32_t RoundingAccuracy = 0)
noexcept;
944 template <
int WA,
int Wout>
946 __spirv_ArbitraryFloatACosPiINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
947 int32_t Mout, int32_t EnableSubnormals = 0,
948 int32_t RoundingMode = 0,
949 int32_t RoundingAccuracy = 0)
noexcept;
951 template <
int WA,
int Wout>
953 __spirv_ArbitraryFloatATanINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
954 int32_t Mout, int32_t EnableSubnormals = 0,
955 int32_t RoundingMode = 0,
956 int32_t RoundingAccuracy = 0)
noexcept;
958 template <
int WA,
int Wout>
960 __spirv_ArbitraryFloatATanPiINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
961 int32_t Mout, int32_t EnableSubnormals = 0,
962 int32_t RoundingMode = 0,
963 int32_t RoundingAccuracy = 0)
noexcept;
965 template <
int WA,
int WB,
int Wout>
967 __spirv_ArbitraryFloatATan2INTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
968 sycl::detail::ap_int<WB> B, int32_t MB,
969 int32_t Mout, int32_t EnableSubnormals = 0,
970 int32_t RoundingMode = 0,
971 int32_t RoundingAccuracy = 0)
noexcept;
973 template <
int WA,
int WB,
int Wout>
975 __spirv_ArbitraryFloatPowINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
976 sycl::detail::ap_int<WB> B, int32_t MB,
977 int32_t Mout, int32_t EnableSubnormals = 0,
978 int32_t RoundingMode = 0,
979 int32_t RoundingAccuracy = 0)
noexcept;
981 template <
int WA,
int WB,
int Wout>
983 __spirv_ArbitraryFloatPowRINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
984 sycl::detail::ap_int<WB> B, int32_t MB,
985 int32_t Mout, int32_t EnableSubnormals = 0,
986 int32_t RoundingMode = 0,
987 int32_t RoundingAccuracy = 0)
noexcept;
992 template <
int WA,
int WB,
int Wout>
994 __spirv_ArbitraryFloatPowNINTEL(
sycl::detail::ap_int<WA> A, int32_t MA,
995 sycl::detail::ap_int<WB> B,
bool SignOfB,
996 int32_t Mout, int32_t EnableSubnormals = 0,
997 int32_t RoundingMode = 0,
998 int32_t RoundingAccuracy = 0)
noexcept;
1000 template <typename dataT>
1002 __spirv_ReadPipe(__ocl_RPipeTy<dataT> Pipe, dataT *Data, int32_t Size,
1004 template <typename dataT>
1006 __spirv_WritePipe(__ocl_WPipeTy<dataT> Pipe, const dataT *Data, int32_t Size,
1008 template <typename dataT>
1010 __spirv_ReadPipeBlockingINTEL(__ocl_RPipeTy<dataT> Pipe, dataT *Data,
1012 template <typename dataT>
1014 __spirv_WritePipeBlockingINTEL(__ocl_WPipeTy<dataT> Pipe, const dataT *Data,
1016 template <typename dataT>
1018 __spirv_CreatePipeFromPipeStorage_read(
1019 const ConstantPipeStorage *Storage)
noexcept;
1020 template <typename dataT>
1022 __spirv_CreatePipeFromPipeStorage_write(
1023 const ConstantPipeStorage *Storage)
noexcept;
1030 __spirv_ConvertBF16ToFINTEL(uint16_t)
noexcept;
1032 __spirv_ConvertFToBF16INTEL(
float)
noexcept;
1035 __spirv_ConvertBF16ToFINTEL(__ocl_vec_t<uint16_t, N>)
noexcept;
1038 __spirv_ConvertFToBF16INTEL(__ocl_vec_t<
float, N>)
noexcept;
1041 __SYCL_EXPORT __ocl_vec_t<uint32_t, 4>
1042 __spirv_GroupNonUniformBallot(uint32_t Execution,
bool Predicate)
noexcept;
1047 __spirv_GroupNonUniformBallotBitCount(
__spv::Scope::Flag,
int,
1048 __ocl_vec_t<uint32_t, 4>)
noexcept;
1051 __spirv_GroupNonUniformBallotFindLSB(
__spv::Scope::Flag,
1052 __ocl_vec_t<uint32_t, 4>)
noexcept;
1054 template <typename ValueT, typename IdT>
1056 __spirv_GroupNonUniformBroadcast(
__spv::Scope::Flag, ValueT, IdT);
1058 template <typename ValueT, typename IdT>
1060 __spirv_GroupNonUniformShuffle(
__spv::Scope::Flag, ValueT, IdT)
noexcept;
1063 __spirv_GroupNonUniformAll(
__spv::Scope::Flag,
bool);
1066 __spirv_GroupNonUniformAny(
__spv::Scope::Flag,
bool);
1068 template <typename ValueT>
1070 __spirv_GroupNonUniformSMin(
__spv::Scope::Flag,
unsigned int, ValueT);
1072 template <typename ValueT>
1074 __spirv_GroupNonUniformUMin(
__spv::Scope::Flag,
unsigned int, ValueT);
1076 template <typename ValueT>
1078 __spirv_GroupNonUniformFMin(
__spv::Scope::Flag,
unsigned int, ValueT);
1080 template <typename ValueT>
1082 __spirv_GroupNonUniformSMax(
__spv::Scope::Flag,
unsigned int, ValueT);
1084 template <typename ValueT>
1086 __spirv_GroupNonUniformUMax(
__spv::Scope::Flag,
unsigned int, ValueT);
1088 template <typename ValueT>
1090 __spirv_GroupNonUniformFMax(
__spv::Scope::Flag,
unsigned int, ValueT);
1092 template <typename ValueT>
1094 __spirv_GroupNonUniformIAdd(
__spv::Scope::Flag,
unsigned int, ValueT);
1096 template <typename ValueT>
1098 __spirv_GroupNonUniformFAdd(
__spv::Scope::Flag,
unsigned int, ValueT);
1100 template <typename ValueT>
1102 __spirv_GroupNonUniformIMul(
__spv::Scope::Flag,
unsigned int, ValueT);
1104 template <typename ValueT>
1106 __spirv_GroupNonUniformFMul(
__spv::Scope::Flag,
unsigned int, ValueT);
1108 template <typename ValueT>
1110 __spirv_GroupNonUniformBitwiseOr(
__spv::Scope::Flag,
unsigned int, ValueT);
1112 template <typename ValueT>
1114 __spirv_GroupNonUniformBitwiseXor(
__spv::Scope::Flag,
unsigned int, ValueT);
1116 template <typename ValueT>
1118 __spirv_GroupNonUniformBitwiseAnd(
__spv::Scope::Flag,
unsigned int, ValueT);
1120 template <typename ValueT>
1122 __spirv_GroupNonUniformLogicalOr(
__spv::Scope::Flag,
unsigned int, ValueT);
1124 template <typename ValueT>
1126 __spirv_GroupNonUniformLogicalAnd(
__spv::Scope::Flag,
unsigned int, ValueT);
1128 template <typename ValueT>
1130 __spirv_GroupNonUniformSMin(
__spv::Scope::Flag,
unsigned int, ValueT,
1133 template <typename ValueT>
1135 __spirv_GroupNonUniformUMin(
__spv::Scope::Flag,
unsigned int, ValueT,
1138 template <typename ValueT>
1140 __spirv_GroupNonUniformFMin(
__spv::Scope::Flag,
unsigned int, ValueT,
1143 template <typename ValueT>
1145 __spirv_GroupNonUniformSMax(
__spv::Scope::Flag,
unsigned int, ValueT,
1148 template <typename ValueT>
1150 __spirv_GroupNonUniformUMax(
__spv::Scope::Flag,
unsigned int, ValueT,
1153 template <typename ValueT>
1155 __spirv_GroupNonUniformFMax(
__spv::Scope::Flag,
unsigned int, ValueT,
1158 template <typename ValueT>
1160 __spirv_GroupNonUniformIAdd(
__spv::Scope::Flag,
unsigned int, ValueT,
1163 template <typename ValueT>
1165 __spirv_GroupNonUniformFAdd(
__spv::Scope::Flag,
unsigned int, ValueT,
1168 template <typename ValueT>
1170 __spirv_GroupNonUniformIMul(
__spv::Scope::Flag,
unsigned int, ValueT,
1173 template <typename ValueT>
1175 __spirv_GroupNonUniformFMul(
__spv::Scope::Flag,
unsigned int, ValueT,
1178 template <typename ValueT>
1180 __spirv_GroupNonUniformBitwiseOr(
__spv::Scope::Flag,
unsigned int, ValueT,
1183 template <typename ValueT>
1185 __spirv_GroupNonUniformBitwiseXor(
__spv::Scope::Flag,
unsigned int, ValueT,
1188 template <typename ValueT>
1190 __spirv_GroupNonUniformBitwiseAnd(
__spv::Scope::Flag,
unsigned int, ValueT,
1193 template <typename ValueT>
1195 __spirv_GroupNonUniformLogicalOr(
__spv::Scope::Flag,
unsigned int, ValueT,
1198 template <typename ValueT>
1200 __spirv_GroupNonUniformLogicalAnd(
__spv::Scope::Flag,
unsigned int, ValueT,
1204 __clc_BarrierInitialize(
int64_t *state, int32_t expected_count)
noexcept;
1216 __clc_BarrierArriveNoComplete(
int64_t *state, int32_t count)
noexcept;
1219 __clc_BarrierArriveAndDropNoComplete(
int64_t *state, int32_t count)
noexcept;
1236 #ifdef __SYCL_USE_VARIADIC_SPIRV_OCL_PRINTF__
1238 __spirv_ocl_printf(
const __attribute__((opencl_constant))
char *Format, ...);
1241 template <
typename... Args>
1243 __spirv_ocl_printf(
const __attribute__((opencl_constant))
char *Format,
1245 template <
typename... Args>
1254 __clc_native_tanh(__ocl_vec_t<float, 2>);
1256 __clc_native_tanh(__ocl_vec_t<float, 3>);
1258 __clc_native_tanh(__ocl_vec_t<float, 4>);
1260 __clc_native_tanh(__ocl_vec_t<float, 8>);
1262 __clc_native_tanh(__ocl_vec_t<float, 16>);
1266 __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
1268 __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
1270 __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
1272 __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
1274 __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
1278 __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
1280 __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
1282 __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
1284 __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
1286 __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
1288 #define __CLC_BF16(...) \
1289 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \
1290 __VA_ARGS__) noexcept; \
1291 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \
1292 __VA_ARGS__, __VA_ARGS__) noexcept; \
1293 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \
1294 __VA_ARGS__, __VA_ARGS__) noexcept; \
1295 extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \
1296 __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept;
1298 #define __CLC_BF16_SCAL_VEC(TYPE) \
1300 __CLC_BF16(__ocl_vec_t<TYPE, 2>) \
1301 __CLC_BF16(__ocl_vec_t<TYPE, 3>) \
1302 __CLC_BF16(__ocl_vec_t<TYPE, 4>) \
1303 __CLC_BF16(__ocl_vec_t<TYPE, 8>) \
1304 __CLC_BF16(__ocl_vec_t<TYPE, 16>)
1306 __CLC_BF16_SCAL_VEC(uint16_t)
1307 __CLC_BF16_SCAL_VEC(uint32_t)
1309 #undef __CLC_BF16_SCAL_VEC
1316 template <
typename from,
typename to>
1318 std::enable_if_t<std::is_integral_v<to> && std::is_unsigned_v<to>, to>
1319 __spirv_ConvertPtrToU(from val)
noexcept;
1321 template <
typename RetT,
typename... ArgsT>
1323 __spirv_TaskSequenceCreateINTEL(RetT (*f)(ArgsT...),
int Pipelined = -1,
1324 int ClusterMode = -1,
1328 template <typename... ArgsT>
1330 __spirv_TaskSequenceAsyncINTEL(
__spv::__spirv_TaskSequenceINTEL *TaskSequence,
1333 template <typename RetT>
1342 template <
typename dataT>
1345 const dataT *Src,
size_t NumElements,
1347 for (
size_t i = 0; i < NumElements; i++) {
1348 Dest[i] = Src[i * Stride];
1354 template <
typename dataT>
1357 const dataT *Src,
size_t NumElements,
1359 for (
size_t i = 0; i < NumElements; i++) {
1360 Dest[i * Stride] = Src[i];
#define __DPCPP_SYCL_EXTERNAL
conditional< sizeof(long)==8, long, long long >::type int64_t
__attribute__((always_inline)) auto invoke_simd(sycl
The invoke_simd free function invokes a SIMD function using all work-items in a sub_group.
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, __ocl_event_t *WaitEvents) noexcept
#define __SYCL_CONVERGENT__
void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
_Abi const simd< _Tp, _Abi > & noexcept