DPC++ Runtime
Runtime libraries for oneAPI DPC++
spirv_ops.hpp
Go to the documentation of this file.
1 //==----------- spirv_ops.hpp --- SPIRV operations -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
14 #include <cstddef>
15 #include <cstdint>
16 
17 // Convergent attribute
18 #ifdef __SYCL_DEVICE_ONLY__
19 #define __SYCL_CONVERGENT__ __attribute__((convergent))
20 #else
21 #define __SYCL_CONVERGENT__
22 #endif
23 
24 #ifdef __SYCL_DEVICE_ONLY__
25 template <typename T, std::size_t R, std::size_t C,
27  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
29 __spirv_JointMatrixLoadINTEL(T *Ptr, std::size_t Stride,
30  __spv::MatrixLayout Layout = L,
31  __spv::Scope::Flag Sc = S, int MemOperand = 0);
32 
33 template <typename T, std::size_t R, std::size_t C,
35  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
36 extern SYCL_EXTERNAL void __spirv_JointMatrixStoreINTEL(
38  std::size_t Stride, __spv::MatrixLayout Layout = L,
39  __spv::Scope::Flag Sc = S, int MemOperand = 0);
40 
41 template <typename T1, typename T2, std::size_t M, std::size_t K, std::size_t N,
45  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
47 __spirv_JointMatrixMadINTEL(
51  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
52 
53 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
57  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
59 __spirv_JointMatrixUUMadINTEL(
63  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
64 
65 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
69  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
71 __spirv_JointMatrixUSMadINTEL(
75  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
76 
77 template <typename T1, typename T2, typename T3, std::size_t M, std::size_t K,
81  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
83 __spirv_JointMatrixSUMadINTEL(
87  __spv::Scope::Flag Sc = __spv::Scope::Flag::Subgroup);
88 
89 template <typename T, std::size_t R, std::size_t C,
91  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
93 __spirv_CompositeConstruct(const T v);
94 
95 template <typename T, std::size_t R, std::size_t C, __spv::MatrixLayout U,
96  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
97 extern SYCL_EXTERNAL size_t __spirv_JointMatrixWorkItemLengthINTEL(
99 
100 template <typename T, std::size_t R, std::size_t C, __spv::MatrixLayout U,
101  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
102 extern SYCL_EXTERNAL T __spirv_VectorExtractDynamic(
104 
105 template <typename T, std::size_t R, std::size_t C, __spv::MatrixLayout U,
106  __spv::Scope::Flag S = __spv::Scope::Flag::Subgroup>
108 __spirv_VectorInsertDynamic(__spv::__spirv_JointMatrixINTEL<T, R, C, U, S> *,
109  T val, size_t i);
110 
111 #ifndef __SPIRV_BUILTIN_DECLARATIONS__
112 #error \
113  "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag."
114 #endif
115 
116 template <typename RetT, typename ImageT>
117 extern SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT);
118 
119 template <typename RetT, typename ImageT>
120 extern SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT);
121 
122 template <typename RetT, typename ImageT>
123 extern SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT);
124 
125 template <typename ImageT, typename CoordT, typename ValT>
126 extern SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT);
127 
128 template <class RetT, typename ImageT, typename TempArgT>
129 extern SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT);
130 
131 template <typename ImageT, typename SampledType>
132 extern SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, __ocl_sampler_t);
133 
134 template <typename SampledType, typename TempRetT, typename TempArgT>
135 extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType,
136  TempArgT, int,
137  float);
138 
139 #define __SYCL_OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy
140 #define __SYCL_OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy
141 
142 // Atomic SPIR-V builtins
143 #define __SPIRV_ATOMIC_LOAD(AS, Type) \
144  extern SYCL_EXTERNAL Type __spirv_AtomicLoad( \
145  AS const Type *P, __spv::Scope::Flag S, \
146  __spv::MemorySemanticsMask::Flag O);
147 #define __SPIRV_ATOMIC_STORE(AS, Type) \
148  extern SYCL_EXTERNAL void __spirv_AtomicStore( \
149  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
150  Type V);
151 #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \
152  extern SYCL_EXTERNAL Type __spirv_AtomicExchange( \
153  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
154  Type V);
155 #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
156  extern SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \
157  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \
158  __spv::MemorySemanticsMask::Flag U, Type V, Type C);
159 #define __SPIRV_ATOMIC_IADD(AS, Type) \
160  extern SYCL_EXTERNAL Type __spirv_AtomicIAdd( \
161  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
162  Type V);
163 #define __SPIRV_ATOMIC_ISUB(AS, Type) \
164  extern SYCL_EXTERNAL Type __spirv_AtomicISub( \
165  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
166  Type V);
167 #define __SPIRV_ATOMIC_FADD(AS, Type) \
168  extern SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \
169  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
170  Type V);
171 #define __SPIRV_ATOMIC_SMIN(AS, Type) \
172  extern SYCL_EXTERNAL Type __spirv_AtomicSMin( \
173  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
174  Type V);
175 #define __SPIRV_ATOMIC_UMIN(AS, Type) \
176  extern SYCL_EXTERNAL Type __spirv_AtomicUMin( \
177  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
178  Type V);
179 #define __SPIRV_ATOMIC_FMIN(AS, Type) \
180  extern SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \
181  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
182  Type V);
183 #define __SPIRV_ATOMIC_SMAX(AS, Type) \
184  extern SYCL_EXTERNAL Type __spirv_AtomicSMax( \
185  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
186  Type V);
187 #define __SPIRV_ATOMIC_UMAX(AS, Type) \
188  extern SYCL_EXTERNAL Type __spirv_AtomicUMax( \
189  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
190  Type V);
191 #define __SPIRV_ATOMIC_FMAX(AS, Type) \
192  extern SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \
193  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
194  Type V);
195 #define __SPIRV_ATOMIC_AND(AS, Type) \
196  extern SYCL_EXTERNAL Type __spirv_AtomicAnd( \
197  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
198  Type V);
199 #define __SPIRV_ATOMIC_OR(AS, Type) \
200  extern SYCL_EXTERNAL Type __spirv_AtomicOr( \
201  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
202  Type V);
203 #define __SPIRV_ATOMIC_XOR(AS, Type) \
204  extern SYCL_EXTERNAL Type __spirv_AtomicXor( \
205  AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \
206  Type V);
207 
208 #define __SPIRV_ATOMIC_FLOAT(AS, Type) \
209  __SPIRV_ATOMIC_FADD(AS, Type) \
210  __SPIRV_ATOMIC_FMIN(AS, Type) \
211  __SPIRV_ATOMIC_FMAX(AS, Type) \
212  __SPIRV_ATOMIC_LOAD(AS, Type) \
213  __SPIRV_ATOMIC_STORE(AS, Type) \
214  __SPIRV_ATOMIC_EXCHANGE(AS, Type)
215 
216 #define __SPIRV_ATOMIC_BASE(AS, Type) \
217  __SPIRV_ATOMIC_FLOAT(AS, Type) \
218  __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \
219  __SPIRV_ATOMIC_IADD(AS, Type) \
220  __SPIRV_ATOMIC_ISUB(AS, Type) \
221  __SPIRV_ATOMIC_AND(AS, Type) \
222  __SPIRV_ATOMIC_OR(AS, Type) \
223  __SPIRV_ATOMIC_XOR(AS, Type)
224 
225 #define __SPIRV_ATOMIC_SIGNED(AS, Type) \
226  __SPIRV_ATOMIC_BASE(AS, Type) \
227  __SPIRV_ATOMIC_SMIN(AS, Type) \
228  __SPIRV_ATOMIC_SMAX(AS, Type)
229 
230 #define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \
231  __SPIRV_ATOMIC_BASE(AS, Type) \
232  __SPIRV_ATOMIC_UMIN(AS, Type) \
233  __SPIRV_ATOMIC_UMAX(AS, Type)
234 
235 // Helper atomic operations which select correct signed/unsigned version
236 // of atomic min/max based on the type
237 #define __SPIRV_ATOMIC_MINMAX(AS, Op) \
238  template <typename T> \
239  typename cl::sycl::detail::enable_if_t< \
240  std::is_integral<T>::value && std::is_signed<T>::value, T> \
241  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
242  __spv::MemorySemanticsMask::Flag Semantics, \
243  T Value) { \
244  return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \
245  } \
246  template <typename T> \
247  typename cl::sycl::detail::enable_if_t< \
248  std::is_integral<T>::value && !std::is_signed<T>::value, T> \
249  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
250  __spv::MemorySemanticsMask::Flag Semantics, \
251  T Value) { \
252  return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \
253  } \
254  template <typename T> \
255  typename cl::sycl::detail::enable_if_t<std::is_floating_point<T>::value, T> \
256  __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \
257  __spv::MemorySemanticsMask::Flag Semantics, \
258  T Value) { \
259  return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \
260  }
261 
262 #define __SPIRV_ATOMICS(macro, Arg) \
263  macro(__attribute__((opencl_global)), Arg) \
264  macro(__attribute__((opencl_local)), Arg) macro(, Arg)
265 
266 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float)
267 __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double)
268 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int)
269 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long)
270 __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long)
271 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int)
272 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long)
273 __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long)
274 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
275 __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
276 
277 #undef __SPIRV_ATOMICS
278 #undef __SPIRV_ATOMIC_AND
279 #undef __SPIRV_ATOMIC_BASE
280 #undef __SPIRV_ATOMIC_CMP_EXCHANGE
281 #undef __SPIRV_ATOMIC_EXCHANGE
282 #undef __SPIRV_ATOMIC_FADD
283 #undef __SPIRV_ATOMIC_FLOAT
284 #undef __SPIRV_ATOMIC_FMAX
285 #undef __SPIRV_ATOMIC_FMIN
286 #undef __SPIRV_ATOMIC_IADD
287 #undef __SPIRV_ATOMIC_ISUB
288 #undef __SPIRV_ATOMIC_LOAD
289 #undef __SPIRV_ATOMIC_MINMAX
290 #undef __SPIRV_ATOMIC_OR
291 #undef __SPIRV_ATOMIC_SIGNED
292 #undef __SPIRV_ATOMIC_SMAX
293 #undef __SPIRV_ATOMIC_SMIN
294 #undef __SPIRV_ATOMIC_STORE
295 #undef __SPIRV_ATOMIC_UMAX
296 #undef __SPIRV_ATOMIC_UMIN
297 #undef __SPIRV_ATOMIC_UNSIGNED
298 #undef __SPIRV_ATOMIC_XOR
299 
300 extern SYCL_EXTERNAL __attribute__((opencl_global)) void *
301 __spirv_GenericCastToPtrExplicit_ToGlobal(const void *Ptr,
302  __spv::StorageClass::Flag S) noexcept;
303 
304 extern SYCL_EXTERNAL __attribute__((opencl_local)) void *
305 __spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr,
306  __spv::StorageClass::Flag S) noexcept;
307 
308 template <typename dataT>
309 extern __attribute__((opencl_global)) dataT *
310 __SYCL_GenericCastToPtrExplicit_ToGlobal(const void *Ptr) noexcept {
311  return (__attribute__((opencl_global)) dataT *)
312  __spirv_GenericCastToPtrExplicit_ToGlobal(
314 }
315 
316 template <typename dataT>
317 extern __attribute__((opencl_local)) dataT *
318 __SYCL_GenericCastToPtrExplicit_ToLocal(const void *Ptr) noexcept {
319  return (__attribute__((opencl_local)) dataT *)
320  __spirv_GenericCastToPtrExplicit_ToLocal(Ptr,
322 }
323 
324 template <typename dataT>
326 __spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;
327 template <typename dataT>
328 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupShuffleDownINTEL(
329  dataT Current, dataT Next, uint32_t Delta) noexcept;
330 template <typename dataT>
331 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupShuffleUpINTEL(
332  dataT Previous, dataT Current, uint32_t Delta) noexcept;
333 template <typename dataT>
335 __spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept;
336 
337 template <typename dataT>
338 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(
339  const __attribute__((opencl_global)) uint8_t *Ptr) noexcept;
340 
341 template <typename dataT>
343 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint8_t *Ptr,
344  dataT Data) noexcept;
345 
346 template <typename dataT>
347 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(
348  const __attribute__((opencl_global)) uint16_t *Ptr) noexcept;
349 
350 template <typename dataT>
352 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr,
353  dataT Data) noexcept;
354 
355 template <typename dataT>
356 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(
357  const __attribute__((opencl_global)) uint32_t *Ptr) noexcept;
358 
359 template <typename dataT>
361 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr,
362  dataT Data) noexcept;
363 
364 template <typename dataT>
365 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(
366  const __attribute__((opencl_global)) uint64_t *Ptr) noexcept;
367 
368 template <typename dataT>
370 __spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint64_t *Ptr,
371  dataT Data) noexcept;
372 template <int W, int rW>
373 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
374 __spirv_FixedSqrtINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
375  int32_t rI, int32_t Quantization = 0,
376  int32_t Overflow = 0) noexcept;
377 template <int W, int rW>
378 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
379 __spirv_FixedRecipINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
380  int32_t rI, int32_t Quantization = 0,
381  int32_t Overflow = 0) noexcept;
382 template <int W, int rW>
383 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
384 __spirv_FixedRsqrtINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
385  int32_t rI, int32_t Quantization = 0,
386  int32_t Overflow = 0) noexcept;
387 template <int W, int rW>
388 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
389 __spirv_FixedSinINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
390  int32_t rI, int32_t Quantization = 0,
391  int32_t Overflow = 0) noexcept;
392 template <int W, int rW>
393 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
394 __spirv_FixedCosINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
395  int32_t rI, int32_t Quantization = 0,
396  int32_t Overflow = 0) noexcept;
397 template <int W, int rW>
398 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<2 * rW>
399 __spirv_FixedSinCosINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
400  int32_t rI, int32_t Quantization = 0,
401  int32_t Overflow = 0) noexcept;
402 template <int W, int rW>
403 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
404 __spirv_FixedSinPiINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
405  int32_t rI, int32_t Quantization = 0,
406  int32_t Overflow = 0) noexcept;
407 template <int W, int rW>
408 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
409 __spirv_FixedCosPiINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
410  int32_t rI, int32_t Quantization = 0,
411  int32_t Overflow = 0) noexcept;
412 template <int W, int rW>
413 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<2 * rW>
414 __spirv_FixedSinCosPiINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
415  int32_t rI, int32_t Quantization = 0,
416  int32_t Overflow = 0) noexcept;
417 template <int W, int rW>
418 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
419 __spirv_FixedLogINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
420  int32_t rI, int32_t Quantization = 0,
421  int32_t Overflow = 0) noexcept;
422 template <int W, int rW>
423 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<rW>
424 __spirv_FixedExpINTEL(cl::sycl::detail::ap_int<W> a, bool S, int32_t I,
425  int32_t rI, int32_t Quantization = 0,
426  int32_t Overflow = 0) noexcept;
427 
428 // In the following built-ins width of arbitrary precision integer type for
429 // a floating point variable should be equal to sum of corresponding
430 // exponent width E, mantissa width M and 1 for sign bit. I.e. WA = EA + MA + 1.
431 template <int WA, int Wout>
432 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
433 __spirv_ArbitraryFloatCastINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
434  int32_t Mout, int32_t EnableSubnormals = 0,
435  int32_t RoundingMode = 0,
436  int32_t RoundingAccuracy = 0) noexcept;
437 
438 template <int WA, int Wout>
439 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
440 __spirv_ArbitraryFloatCastFromIntINTEL(cl::sycl::detail::ap_int<WA> A,
441  int32_t Mout, bool FromSign = false,
442  int32_t EnableSubnormals = 0,
443  int32_t RoundingMode = 0,
444  int32_t RoundingAccuracy = 0) noexcept;
445 
446 template <int WA, int Wout>
447 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
448 __spirv_ArbitraryFloatCastToIntINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
449  bool ToSign = false,
450  int32_t EnableSubnormals = 0,
451  int32_t RoundingMode = 0,
452  int32_t RoundingAccuracy = 0) noexcept;
453 
454 template <int WA, int WB, int Wout>
455 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
456 __spirv_ArbitraryFloatAddINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
457  cl::sycl::detail::ap_int<WB> B, int32_t MB,
458  int32_t Mout, int32_t EnableSubnormals = 0,
459  int32_t RoundingMode = 0,
460  int32_t RoundingAccuracy = 0) noexcept;
461 
462 template <int WA, int WB, int Wout>
463 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
464 __spirv_ArbitraryFloatSubINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
465  cl::sycl::detail::ap_int<WB> B, int32_t MB,
466  int32_t Mout, int32_t EnableSubnormals = 0,
467  int32_t RoundingMode = 0,
468  int32_t RoundingAccuracy = 0) noexcept;
469 
470 template <int WA, int WB, int Wout>
471 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
472 __spirv_ArbitraryFloatMulINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
473  cl::sycl::detail::ap_int<WB> B, int32_t MB,
474  int32_t Mout, int32_t EnableSubnormals = 0,
475  int32_t RoundingMode = 0,
476  int32_t RoundingAccuracy = 0) noexcept;
477 
478 template <int WA, int WB, int Wout>
479 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
480 __spirv_ArbitraryFloatDivINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
481  cl::sycl::detail::ap_int<WB> B, int32_t MB,
482  int32_t Mout, int32_t EnableSubnormals = 0,
483  int32_t RoundingMode = 0,
484  int32_t RoundingAccuracy = 0) noexcept;
485 
486 // Comparison built-ins don't use Subnormal Support, Rounding Mode and
487 // Rounding Accuracy.
488 template <int WA, int WB>
489 extern SYCL_EXTERNAL bool
490 __spirv_ArbitraryFloatGTINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
491  cl::sycl::detail::ap_int<WB> B,
492  int32_t MB) noexcept;
493 
494 template <int WA, int WB>
495 extern SYCL_EXTERNAL bool
496 __spirv_ArbitraryFloatGEINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
497  cl::sycl::detail::ap_int<WB> B,
498  int32_t MB) noexcept;
499 
500 template <int WA, int WB>
501 extern SYCL_EXTERNAL bool
502 __spirv_ArbitraryFloatLTINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
503  cl::sycl::detail::ap_int<WB> B,
504  int32_t MB) noexcept;
505 
506 template <int WA, int WB>
507 extern SYCL_EXTERNAL bool
508 __spirv_ArbitraryFloatLEINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
509  cl::sycl::detail::ap_int<WB> B,
510  int32_t MB) noexcept;
511 
512 template <int WA, int WB>
513 extern SYCL_EXTERNAL bool
514 __spirv_ArbitraryFloatEQINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
515  cl::sycl::detail::ap_int<WB> B,
516  int32_t MB) noexcept;
517 
518 template <int WA, int Wout>
519 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
520 __spirv_ArbitraryFloatRecipINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
521  int32_t Mout, int32_t EnableSubnormals = 0,
522  int32_t RoundingMode = 0,
523  int32_t RoundingAccuracy = 0) noexcept;
524 
525 template <int WA, int Wout>
526 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
527 __spirv_ArbitraryFloatRSqrtINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
528  int32_t Mout, int32_t EnableSubnormals = 0,
529  int32_t RoundingMode = 0,
530  int32_t RoundingAccuracy = 0) noexcept;
531 
532 template <int WA, int Wout>
533 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
534 __spirv_ArbitraryFloatCbrtINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
535  int32_t Mout, int32_t EnableSubnormals = 0,
536  int32_t RoundingMode = 0,
537  int32_t RoundingAccuracy = 0) noexcept;
538 
539 template <int WA, int WB, int Wout>
540 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
541 __spirv_ArbitraryFloatHypotINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
542  cl::sycl::detail::ap_int<WB> B, int32_t MB,
543  int32_t Mout, int32_t EnableSubnormals = 0,
544  int32_t RoundingMode = 0,
545  int32_t RoundingAccuracy = 0) noexcept;
546 
547 template <int WA, int Wout>
548 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
549 __spirv_ArbitraryFloatSqrtINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
550  int32_t Mout, int32_t EnableSubnormals = 0,
551  int32_t RoundingMode = 0,
552  int32_t RoundingAccuracy = 0) noexcept;
553 
554 template <int WA, int Wout>
555 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
556 __spirv_ArbitraryFloatLogINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
557  int32_t Mout, int32_t EnableSubnormals = 0,
558  int32_t RoundingMode = 0,
559  int32_t RoundingAccuracy = 0) noexcept;
560 
561 template <int WA, int Wout>
562 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
563 __spirv_ArbitraryFloatLog2INTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
564  int32_t Mout, int32_t EnableSubnormals = 0,
565  int32_t RoundingMode = 0,
566  int32_t RoundingAccuracy = 0) noexcept;
567 
568 template <int WA, int Wout>
569 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
570 __spirv_ArbitraryFloatLog10INTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
571  int32_t Mout, int32_t EnableSubnormals = 0,
572  int32_t RoundingMode = 0,
573  int32_t RoundingAccuracy = 0) noexcept;
574 
575 template <int WA, int Wout>
576 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
577 __spirv_ArbitraryFloatLog1pINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
578  int32_t Mout, int32_t EnableSubnormals = 0,
579  int32_t RoundingMode = 0,
580  int32_t RoundingAccuracy = 0) noexcept;
581 
582 template <int WA, int Wout>
583 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
584 __spirv_ArbitraryFloatExpINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
585  int32_t Mout, int32_t EnableSubnormals = 0,
586  int32_t RoundingMode = 0,
587  int32_t RoundingAccuracy = 0) noexcept;
588 
589 template <int WA, int Wout>
590 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
591 __spirv_ArbitraryFloatExp2INTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
592  int32_t Mout, int32_t EnableSubnormals = 0,
593  int32_t RoundingMode = 0,
594  int32_t RoundingAccuracy = 0) noexcept;
595 
596 template <int WA, int Wout>
597 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
598 __spirv_ArbitraryFloatExp10INTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
599  int32_t Mout, int32_t EnableSubnormals = 0,
600  int32_t RoundingMode = 0,
601  int32_t RoundingAccuracy = 0) noexcept;
602 
603 template <int WA, int Wout>
604 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
605 __spirv_ArbitraryFloatExpm1INTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
606  int32_t Mout, int32_t EnableSubnormals = 0,
607  int32_t RoundingMode = 0,
608  int32_t RoundingAccuracy = 0) noexcept;
609 
610 template <int WA, int Wout>
611 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
612 __spirv_ArbitraryFloatSinINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
613  int32_t Mout, int32_t EnableSubnormals = 0,
614  int32_t RoundingMode = 0,
615  int32_t RoundingAccuracy = 0) noexcept;
616 
617 template <int WA, int Wout>
618 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
619 __spirv_ArbitraryFloatCosINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
620  int32_t Mout, int32_t EnableSubnormals = 0,
621  int32_t RoundingMode = 0,
622  int32_t RoundingAccuracy = 0) noexcept;
623 
624 // Result value contains both values of sine and cosine and so has the size of
625 // 2 * Wout where Wout is equal to (1 + Eout + Mout).
626 template <int WA, int Wout>
627 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<2 * Wout>
628 __spirv_ArbitraryFloatSinCosINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
629  int32_t Mout, int32_t EnableSubnormals = 0,
630  int32_t RoundingMode = 0,
631  int32_t RoundingAccuracy = 0) noexcept;
632 
633 template <int WA, int Wout>
634 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
635 __spirv_ArbitraryFloatSinPiINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
636  int32_t Mout, int32_t EnableSubnormals = 0,
637  int32_t RoundingMode = 0,
638  int32_t RoundingAccuracy = 0) noexcept;
639 
640 template <int WA, int Wout>
641 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
642 __spirv_ArbitraryFloatCosPiINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
643  int32_t Mout, int32_t EnableSubnormals = 0,
644  int32_t RoundingMode = 0,
645  int32_t RoundingAccuracy = 0) noexcept;
646 
647 // Result value contains both values of sine(A*pi) and cosine(A*pi) and so has
648 // the size of 2 * Wout where Wout is equal to (1 + Eout + Mout).
649 template <int WA, int Wout>
650 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<2 * Wout>
651 __spirv_ArbitraryFloatSinCosPiINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
652  int32_t Mout, int32_t EnableSubnormals = 0,
653  int32_t RoundingMode = 0,
654  int32_t RoundingAccuracy = 0) noexcept;
655 
656 template <int WA, int Wout>
657 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
658 __spirv_ArbitraryFloatASinINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
659  int32_t Mout, int32_t EnableSubnormals = 0,
660  int32_t RoundingMode = 0,
661  int32_t RoundingAccuracy = 0) noexcept;
662 
663 template <int WA, int Wout>
664 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
665 __spirv_ArbitraryFloatASinPiINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
666  int32_t Mout, int32_t EnableSubnormals = 0,
667  int32_t RoundingMode = 0,
668  int32_t RoundingAccuracy = 0) noexcept;
669 
670 template <int WA, int Wout>
671 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
672 __spirv_ArbitraryFloatACosINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
673  int32_t Mout, int32_t EnableSubnormals = 0,
674  int32_t RoundingMode = 0,
675  int32_t RoundingAccuracy = 0) noexcept;
676 
677 template <int WA, int Wout>
678 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
679 __spirv_ArbitraryFloatACosPiINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
680  int32_t Mout, int32_t EnableSubnormals = 0,
681  int32_t RoundingMode = 0,
682  int32_t RoundingAccuracy = 0) noexcept;
683 
684 template <int WA, int Wout>
685 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
686 __spirv_ArbitraryFloatATanINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
687  int32_t Mout, int32_t EnableSubnormals = 0,
688  int32_t RoundingMode = 0,
689  int32_t RoundingAccuracy = 0) noexcept;
690 
691 template <int WA, int Wout>
692 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
693 __spirv_ArbitraryFloatATanPiINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
694  int32_t Mout, int32_t EnableSubnormals = 0,
695  int32_t RoundingMode = 0,
696  int32_t RoundingAccuracy = 0) noexcept;
697 
698 template <int WA, int WB, int Wout>
699 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
700 __spirv_ArbitraryFloatATan2INTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
701  cl::sycl::detail::ap_int<WB> B, int32_t MB,
702  int32_t Mout, int32_t EnableSubnormals = 0,
703  int32_t RoundingMode = 0,
704  int32_t RoundingAccuracy = 0) noexcept;
705 
706 template <int WA, int WB, int Wout>
707 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
708 __spirv_ArbitraryFloatPowINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
709  cl::sycl::detail::ap_int<WB> B, int32_t MB,
710  int32_t Mout, int32_t EnableSubnormals = 0,
711  int32_t RoundingMode = 0,
712  int32_t RoundingAccuracy = 0) noexcept;
713 
714 template <int WA, int WB, int Wout>
715 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
716 __spirv_ArbitraryFloatPowRINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
717  cl::sycl::detail::ap_int<WB> B, int32_t MB,
718  int32_t Mout, int32_t EnableSubnormals = 0,
719  int32_t RoundingMode = 0,
720  int32_t RoundingAccuracy = 0) noexcept;
721 
722 // PowN built-in calculates `A^B` where `A` is arbitrary precision floating
723 // point number and `B` is signed or unsigned arbitrary precision integer,
724 // i.e. its width doesn't depend on sum of exponent and mantissa.
725 template <int WA, int WB, int Wout>
726 extern SYCL_EXTERNAL cl::sycl::detail::ap_int<Wout>
727 __spirv_ArbitraryFloatPowNINTEL(cl::sycl::detail::ap_int<WA> A, int32_t MA,
728  cl::sycl::detail::ap_int<WB> B, bool SignOfB,
729  int32_t Mout, int32_t EnableSubnormals = 0,
730  int32_t RoundingMode = 0,
731  int32_t RoundingAccuracy = 0) noexcept;
732 
733 template <typename dataT>
734 extern SYCL_EXTERNAL int32_t __spirv_ReadPipe(__ocl_RPipeTy<dataT> Pipe,
735  dataT *Data, int32_t Size,
736  int32_t Alignment) noexcept;
737 template <typename dataT>
738 extern SYCL_EXTERNAL int32_t __spirv_WritePipe(__ocl_WPipeTy<dataT> Pipe,
739  const dataT *Data, int32_t Size,
740  int32_t Alignment) noexcept;
741 template <typename dataT>
742 extern SYCL_EXTERNAL void
743 __spirv_ReadPipeBlockingINTEL(__ocl_RPipeTy<dataT> Pipe, dataT *Data,
744  int32_t Size, int32_t Alignment) noexcept;
745 template <typename dataT>
746 extern SYCL_EXTERNAL void
747 __spirv_WritePipeBlockingINTEL(__ocl_WPipeTy<dataT> Pipe, const dataT *Data,
748  int32_t Size, int32_t Alignment) noexcept;
749 template <typename dataT>
750 extern SYCL_EXTERNAL __ocl_RPipeTy<dataT>
751 __spirv_CreatePipeFromPipeStorage_read(
752  const ConstantPipeStorage *Storage) noexcept;
753 template <typename dataT>
754 extern SYCL_EXTERNAL __ocl_WPipeTy<dataT>
755 __spirv_CreatePipeFromPipeStorage_write(
756  const ConstantPipeStorage *Storage) noexcept;
757 
758 extern SYCL_EXTERNAL void
759 __spirv_ocl_prefetch(const __attribute__((opencl_global)) char *Ptr,
760  size_t NumBytes) noexcept;
761 
762 extern SYCL_EXTERNAL uint16_t __spirv_ConvertFToBF16INTEL(float) noexcept;
763 extern SYCL_EXTERNAL float __spirv_ConvertBF16ToFINTEL(uint16_t) noexcept;
764 
765 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT __ocl_vec_t<uint32_t, 4>
766 __spirv_GroupNonUniformBallot(uint32_t Execution, bool Predicate) noexcept;
767 
768 extern SYCL_EXTERNAL __SYCL_EXPORT void
769 __clc_BarrierInitialize(int64_t *state, int32_t expected_count) noexcept;
770 
771 extern SYCL_EXTERNAL __SYCL_EXPORT void
772 __clc_BarrierInvalidate(int64_t *state) noexcept;
773 
774 extern SYCL_EXTERNAL __SYCL_EXPORT int64_t
775 __clc_BarrierArrive(int64_t *state) noexcept;
776 
777 extern SYCL_EXTERNAL __SYCL_EXPORT int64_t
778 __clc_BarrierArriveAndDrop(int64_t *state) noexcept;
779 
780 extern SYCL_EXTERNAL __SYCL_EXPORT int64_t
781 __clc_BarrierArriveNoComplete(int64_t *state, int32_t count) noexcept;
782 
783 extern SYCL_EXTERNAL __SYCL_EXPORT int64_t
784 __clc_BarrierArriveAndDropNoComplete(int64_t *state, int32_t count) noexcept;
785 
786 extern SYCL_EXTERNAL __SYCL_EXPORT void
787 __clc_BarrierCopyAsyncArrive(int64_t *state) noexcept;
788 
789 extern SYCL_EXTERNAL __SYCL_EXPORT void
790 __clc_BarrierCopyAsyncArriveNoInc(int64_t *state) noexcept;
791 
792 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT void
793 __clc_BarrierWait(int64_t *state, int64_t arrival) noexcept;
794 
795 extern SYCL_EXTERNAL __SYCL_EXPORT bool
796 __clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept;
797 
798 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT void
799 __clc_BarrierArriveAndWait(int64_t *state) noexcept;
800 
801 #ifdef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
802 template <typename... Args>
803 extern SYCL_EXTERNAL int
804 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format,
805  Args... args);
806 template <typename... Args>
807 extern SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, Args... args);
808 #else
809 extern SYCL_EXTERNAL int
810 __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...);
811 extern SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...);
812 #endif
813 
814 // Native builtin extension
815 
816 extern SYCL_EXTERNAL float __clc_native_tanh(float);
817 extern SYCL_EXTERNAL __ocl_vec_t<float, 2>
818  __clc_native_tanh(__ocl_vec_t<float, 2>);
819 extern SYCL_EXTERNAL __ocl_vec_t<float, 3>
820  __clc_native_tanh(__ocl_vec_t<float, 3>);
821 extern SYCL_EXTERNAL __ocl_vec_t<float, 4>
822  __clc_native_tanh(__ocl_vec_t<float, 4>);
823 extern SYCL_EXTERNAL __ocl_vec_t<float, 8>
824  __clc_native_tanh(__ocl_vec_t<float, 8>);
825 extern SYCL_EXTERNAL __ocl_vec_t<float, 16>
826  __clc_native_tanh(__ocl_vec_t<float, 16>);
827 
828 extern SYCL_EXTERNAL _Float16 __clc_native_tanh(_Float16);
829 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
830  __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
831 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
832  __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
833 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
834  __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
835 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
836  __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
837 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
838  __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
839 
840 extern SYCL_EXTERNAL _Float16 __clc_native_exp2(_Float16);
841 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
842  __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
843 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
844  __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
845 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
846  __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
847 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
848  __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
849 extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
850  __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
851 
852 #define __CLC_BF16(...) \
853  extern SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \
854  __VA_ARGS__) noexcept; \
855  extern SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \
856  __VA_ARGS__, __VA_ARGS__) noexcept; \
857  extern SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \
858  __VA_ARGS__, __VA_ARGS__) noexcept; \
859  extern SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \
860  __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept;
861 
862 #define __CLC_BF16_SCAL_VEC(TYPE) \
863  __CLC_BF16(TYPE) \
864  __CLC_BF16(__ocl_vec_t<TYPE, 2>) \
865  __CLC_BF16(__ocl_vec_t<TYPE, 3>) \
866  __CLC_BF16(__ocl_vec_t<TYPE, 4>) \
867  __CLC_BF16(__ocl_vec_t<TYPE, 8>) \
868  __CLC_BF16(__ocl_vec_t<TYPE, 16>)
869 
870 __CLC_BF16_SCAL_VEC(uint16_t)
871 __CLC_BF16_SCAL_VEC(uint32_t)
872 
873 #undef __CLC_BF16_SCAL_VEC
874 #undef __CLC_BF16
875 
876 #else // if !__SYCL_DEVICE_ONLY__
877 
878 template <typename dataT>
881  dataT *Src, size_t NumElements,
882  size_t Stride, __ocl_event_t) noexcept {
883  for (size_t i = 0; i < NumElements; i++) {
884  Dest[i] = Src[i * Stride];
885  }
886  // A real instance of the class is not needed, return dummy pointer.
887  return nullptr;
888 }
889 
890 template <typename dataT>
893  dataT *Src, size_t NumElements,
894  size_t Stride, __ocl_event_t) noexcept {
895  for (size_t i = 0; i < NumElements; i++) {
896  Dest[i * Stride] = Src[i];
897  }
898  // A real instance of the class is not needed, return dummy pointer.
899  return nullptr;
900 }
901 
902 extern __SYCL_EXPORT void __spirv_ocl_prefetch(const char *Ptr,
903  size_t NumBytes) noexcept;
904 
905 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT void
907  uint32_t Semantics) noexcept;
908 
909 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT void
910 __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept;
911 
912 __SYCL_CONVERGENT__ extern SYCL_EXTERNAL __SYCL_EXPORT void
913 __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents,
914  __ocl_event_t *WaitEvents) noexcept;
915 
916 #endif // !__SYCL_DEVICE_ONLY__
__spirv_GroupWaitEvents
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_GroupWaitEvents(__spv::Scope Execution, uint32_t NumEvents, __ocl_event_t *WaitEvents) noexcept
Definition: spirv_ops.cpp:18
__spirv_ControlBarrier
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
T
__spv::MatrixLayout::RowMajor
@ RowMajor
SYCL_EXTERNAL
#define SYCL_EXTERNAL
Definition: defines_elementary.hpp:34
__SYCL_OpGroupAsyncCopyGlobalToLocal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:880
__spirv_ocl_prefetch
void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept
Definition: spirv_ops.cpp:47
__spv::__spirv_JointMatrixINTEL
Definition: spirv_types.hpp:134
sycl
Definition: invoke_simd.hpp:68
__ocl_event_t
void * __ocl_event_t
Definition: spirv_types.hpp:174
stl_type_traits.hpp
__spv::Scope::Flag
Flag
Definition: spirv_types.hpp:27
__spv::StorageClass::Flag
Flag
Definition: spirv_types.hpp:43
export.hpp
defines.hpp
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
__SYCL_CONVERGENT__
#define __SYCL_CONVERGENT__
Definition: spirv_ops.hpp:21
__spirv_MemoryBarrier
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:36
__SYCL_OpGroupAsyncCopyLocalToGlobal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:892
__spv::StorageClass::CrossWorkgroup
@ CrossWorkgroup
Definition: spirv_types.hpp:49
__spv::StorageClass::Workgroup
@ Workgroup
Definition: spirv_types.hpp:48
sycl::ext::oneapi::experimental::__attribute__
__attribute__((always_inline)) auto invoke_simd(sycl
The invoke_simd free function invokes a SIMD function using all work-items in a sub_group.
Definition: invoke_simd.hpp:293
__spv::MatrixLayout
MatrixLayout
Definition: spirv_types.hpp:111
__spv::Scope
Definition: spirv_types.hpp:25
__ocl_sampler_t
void * __ocl_sampler_t
Definition: spirv_types.hpp:175
spirv_types.hpp