DPC++ Runtime
Runtime libraries for oneAPI DPC++
builtins.hpp
Go to the documentation of this file.
1 //==------ builtins.hpp - Non-standard SYCL built-in functions -------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/builtins.hpp>
12 #include <sycl/detail/builtins.hpp>
16 
17 #include <CL/__spirv/spirv_ops.hpp>
19 
20 // TODO Decide whether to mark functions with this attribute.
21 #define __NOEXC /*noexcept*/
22 
23 #ifdef __SYCL_DEVICE_ONLY__
24 #define __SYCL_CONSTANT_AS __attribute__((opencl_constant))
25 #else
26 #define __SYCL_CONSTANT_AS
27 #endif
28 
29 namespace sycl {
31 namespace ext::oneapi::experimental {
32 
33 // Provides functionality to print data from kernels in a C way:
34 // - On non-host devices this function is directly mapped to printf from
35 // OpenCL C
36 // - On host device, this function should be equivalent to standard printf
37 // function from C/C++.
38 //
39 // Please refer to corresponding section in OpenCL C specification to find
40 // information about format string and its differences from standard C rules.
41 //
42 // This function is placed under 'experimental' namespace on purpose, because it
43 // has too much caveats you need to be aware of before using it. Please find
44 // them below and read carefully before using it:
45 //
46 // - According to the OpenCL spec, the format string must be
47 // resolvable at compile time i.e. cannot be dynamically created by the
48 // executing program.
49 //
50 // - According to the OpenCL spec, the format string must reside in constant
51 // address space. The constant address space declarations might get "tricky",
52 // see test/built-ins/printf.cpp for examples.
53 // In simple cases (compile-time known string contents, direct declaration of
54 // the format literal inside the printf call, etc.), the compiler should handle
55 // the automatic address space conversion.
56 // FIXME: Once the extension to generic address space is fully supported, the
57 // constant AS version may need to be deprecated.
58 //
59 // - The format string is interpreted according to the OpenCL C spec, where all
60 // data types has fixed size, opposed to C++ types which doesn't guarantee
61 // the exact width of particular data types (except, may be, char). This might
62 // lead to unexpected result, for example: %ld in OpenCL C means that printed
63 // argument has 'long' type which is 64-bit wide by the OpenCL C spec. However,
64 // by C++ spec long is just at least 32-bit wide, so, you need to ensure (by
65 // performing a cast, for example) that if you use %ld specifier, you pass
66 // 64-bit argument to the sycl::experimental::printf
67 //
68 // - OpenCL spec defines several additional features, like, for example, 'v'
69 // modifier which allows to print OpenCL vectors: note that these features are
70 // not available on host device and therefore their usage should be either
71 // guarded using __SYCL_DEVICE_ONLY__ preprocessor macro or avoided in favor
72 // of more portable solutions if needed
73 //
74 template <typename FormatT, typename... Args>
75 int printf(const FormatT *__format, Args... args) {
76 #if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__)
77  return __spirv_ocl_printf(__format, args...);
78 #else
79  return ::printf(__format, args...);
80 #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__)
81 }
82 
83 namespace native {
84 
85 // genfloatfh tanh (genfloatfh x)
86 // sycl::native::tanh is only implemented on nvptx backend so far. For other
87 // backends we revert to the sycl::tanh impl.
88 template <typename T>
90  sycl::detail::enable_if_t<sycl::detail::is_svgenfloatf<T>::value ||
91  sycl::detail::is_svgenfloath<T>::value,
92  T>
93  tanh(T x) __NOEXC {
94 #if defined(__NVPTX__)
95  using _ocl_T = sycl::detail::ConvertToOpenCLType_t<T>;
96  _ocl_T arg1 = sycl::detail::convertDataToType<T, _ocl_T>(x);
97  return sycl::detail::convertDataToType<_ocl_T, T>(__clc_native_tanh(arg1));
98 #else
99  return __sycl_std::__invoke_tanh<T>(x);
100 #endif
101 }
102 
103 // The marray math function implementations use vectorizations of
104 // size two as a simple general optimization. A more complex implementation
105 // using larger vectorizations for large marray sizes is possible; however more
106 // testing is required in order to ascertain the performance implications for
107 // all backends.
108 // sycl::native::tanh is only implemented on nvptx backend so far. For other
109 // backends we revert to the sycl::tanh impl.
110 template <typename T, size_t N>
112  std::enable_if_t<std::is_same_v<T, half> || std::is_same_v<T, float>,
113  sycl::marray<T, N>>
114  tanh(sycl::marray<T, N> x) __NOEXC {
115  sycl::marray<T, N> res;
116 
117  for (size_t i = 0; i < N / 2; i++) {
118 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
119  auto partial_res = native::tanh(sycl::detail::to_vec2(x, i * 2));
120 #else
121  auto partial_res = __sycl_std::__invoke_tanh<sycl::vec<T, 2>>(
122  sycl::detail::to_vec2(x, i * 2));
123 #endif
124  std::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>));
125  }
126  if (N % 2) {
127 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
128  res[N - 1] = native::tanh(x[N - 1]);
129 #else
130  res[N - 1] = __sycl_std::__invoke_tanh<T>(x[N - 1]);
131 #endif
132  }
133 
134  return res;
135 }
136 
137 // genfloath exp2 (genfloath x)
138 // sycl::native::exp2 (using half) is only implemented on nvptx backend so far.
139 // For other backends we revert to the sycl::exp2 impl.
140 template <typename T>
142  sycl::detail::enable_if_t<sycl::detail::is_svgenfloath<T>::value, T>
143  exp2(T x) __NOEXC {
144 #if defined(__NVPTX__)
145  using _ocl_T = sycl::detail::ConvertToOpenCLType_t<T>;
146  _ocl_T arg1 = sycl::detail::convertDataToType<T, _ocl_T>(x);
147  return sycl::detail::convertDataToType<_ocl_T, T>(__clc_native_exp2(arg1));
148 #else
149  return __sycl_std::__invoke_exp2<T>(x);
150 #endif
151 }
152 
153 // sycl::native::exp2 (using half) is only implemented on nvptx backend so far.
154 // For other backends we revert to the sycl::exp2 impl.
155 template <size_t N>
156 inline __SYCL_ALWAYS_INLINE sycl::marray<half, N>
157 exp2(sycl::marray<half, N> x) __NOEXC {
158  sycl::marray<half, N> res;
159 
160  for (size_t i = 0; i < N / 2; i++) {
161 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
162  auto partial_res = native::exp2(sycl::detail::to_vec2(x, i * 2));
163 #else
164  auto partial_res = __sycl_std::__invoke_exp2<sycl::vec<half, 2>>(
165  sycl::detail::to_vec2(x, i * 2));
166 #endif
167  std::memcpy(&res[i * 2], &partial_res, sizeof(vec<half, 2>));
168  }
169  if (N % 2) {
170 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
171  res[N - 1] = native::exp2(x[N - 1]);
172 #else
173  res[N - 1] = __sycl_std::__invoke_exp2<half>(x[N - 1]);
174 #endif
175  }
176  return res;
177 }
178 
179 } // namespace native
180 
181 } // namespace ext::oneapi::experimental
182 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
183 } // namespace sycl
184 
185 #undef __SYCL_CONSTANT_AS
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: types.hpp:558
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL_ALWAYS_INLINE
#define __NOEXC
Definition: builtins.hpp:21
vec< T, 2 > to_vec2(marray< T, N > x, size_t start)
Definition: builtins.hpp:25
void memcpy(void *Dst, const void *Src, std::size_t Size)
__SYCL_ALWAYS_INLINE std::enable_if_t< std::is_same_v< T, half >||std::is_same_v< T, float >, sycl::marray< T, N > > tanh(sycl::marray< T, N > x) __NOEXC
Definition: builtins.hpp:114
__SYCL_ALWAYS_INLINE sycl::marray< half, N > exp2(sycl::marray< half, N > x) __NOEXC
Definition: builtins.hpp:157
int printf(const FormatT *__format, Args... args)
Definition: builtins.hpp:75
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14