DPC++ Runtime
Runtime libraries for oneAPI DPC++
builtins.hpp
Go to the documentation of this file.
1 //==------ builtins.hpp - Non-standard SYCL built-in functions -------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/aliases.hpp> // for half
12 #include <sycl/builtins.hpp> // for to_vec2
13 #include <sycl/builtins_utils_vec.hpp> // for to_vec, to_marray...
14 #include <sycl/detail/defines_elementary.hpp> // for __SYCL_ALWAYS_INLINE
15 #include <sycl/detail/generic_type_traits.hpp> // for is_svgenfloath, is_sv...
16 #include <sycl/detail/memcpy.hpp> // detail::memcpy
17 #include <sycl/marray.hpp> // for marray
18 #include <sycl/types.hpp> // for vec
19 
20 #include <cstring> // for size_t
21 #include <stdio.h> // for printf
22 #include <type_traits> // for enable_if_t
23 
24 // TODO Decide whether to mark functions with this attribute.
25 #define __NOEXC /*noexcept*/
26 
27 #ifdef __SYCL_DEVICE_ONLY__
28 #define __SYCL_CONSTANT_AS __attribute__((opencl_constant))
29 #else
30 #define __SYCL_CONSTANT_AS
31 #endif
32 
33 namespace sycl {
34 inline namespace _V1 {
35 namespace ext::oneapi::experimental {
36 
37 // Provides functionality to print data from kernels in a C way:
38 // - On non-host devices this function is directly mapped to printf from
39 // OpenCL C
40 // - On host device, this function should be equivalent to standard printf
41 // function from C/C++.
42 //
43 // Please refer to corresponding section in OpenCL C specification to find
44 // information about format string and its differences from standard C rules.
45 //
46 // This function is placed under 'experimental' namespace on purpose, because it
47 // has too much caveats you need to be aware of before using it. Please find
48 // them below and read carefully before using it:
49 //
50 // - According to the OpenCL spec, the format string must be
51 // resolvable at compile time i.e. cannot be dynamically created by the
52 // executing program.
53 //
54 // - According to the OpenCL spec, the format string must reside in constant
55 // address space. The constant address space declarations might get "tricky",
56 // see test/built-ins/printf.cpp for examples.
57 // In simple cases (compile-time known string contents, direct declaration of
58 // the format literal inside the printf call, etc.), the compiler should handle
59 // the automatic address space conversion.
60 // FIXME: Once the extension to generic address space is fully supported, the
61 // constant AS version may need to be deprecated.
62 //
63 // - The format string is interpreted according to the OpenCL C spec, where all
64 // data types has fixed size, opposed to C++ types which doesn't guarantee
65 // the exact width of particular data types (except, may be, char). This might
66 // lead to unexpected result, for example: %ld in OpenCL C means that printed
67 // argument has 'long' type which is 64-bit wide by the OpenCL C spec. However,
68 // by C++ spec long is just at least 32-bit wide, so, you need to ensure (by
69 // performing a cast, for example) that if you use %ld specifier, you pass
70 // 64-bit argument to the sycl::experimental::printf
71 //
72 // - OpenCL spec defines several additional features, like, for example, 'v'
73 // modifier which allows to print OpenCL vectors: note that these features are
74 // not available on host device and therefore their usage should be either
75 // guarded using __SYCL_DEVICE_ONLY__ preprocessor macro or avoided in favor
76 // of more portable solutions if needed
77 //
78 template <typename FormatT, typename... Args>
79 int printf(const FormatT *__format, Args... args) {
80 #if defined(__SYCL_DEVICE_ONLY__) && (defined(__SPIR__) || defined(__SPIRV__))
81  return __spirv_ocl_printf(__format, args...);
82 #else
83  return ::printf(__format, args...);
84 #endif // defined(__SYCL_DEVICE_ONLY__) && (defined(__SPIR__) ||
85  // defined(__SPIRV__))
86 }
87 
88 namespace native {
89 
90 // genfloatfh tanh (genfloatfh x)
91 // sycl::native::tanh is only implemented on nvptx backend so far. For other
92 // backends we revert to the sycl::tanh impl.
93 template <typename T>
94 inline __SYCL_ALWAYS_INLINE std::enable_if_t<
95  sycl::detail::is_svgenfloatf_v<T> || sycl::detail::is_svgenfloath_v<T>, T>
96 tanh(T x) __NOEXC {
97 #if defined(__NVPTX__)
98  return sycl::detail::convertFromOpenCLTypeFor<T>(
99  __clc_native_tanh(sycl::detail::convertToOpenCLType(x)));
100 #else
101  return sycl::tanh(x);
102 #endif
103 }
104 
105 // The marray math function implementations use vectorizations of
106 // size two as a simple general optimization. A more complex implementation
107 // using larger vectorizations for large marray sizes is possible; however more
108 // testing is required in order to ascertain the performance implications for
109 // all backends.
110 // sycl::native::tanh is only implemented on nvptx backend so far. For other
111 // backends we revert to the sycl::tanh impl.
112 template <typename T, size_t N>
114  std::enable_if_t<std::is_same_v<T, half> || std::is_same_v<T, float>,
117  sycl::marray<T, N> res;
118 
119  for (size_t i = 0; i < N / 2; i++) {
120 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
121  auto partial_res = native::tanh(sycl::detail::to_vec2(x, i * 2));
122 #else
123  auto partial_res = sycl::tanh(sycl::detail::to_vec2(x, i * 2));
124 #endif
125  sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>));
126  }
127  if (N % 2) {
128 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
129  res[N - 1] = native::tanh(x[N - 1]);
130 #else
131  res[N - 1] = sycl::tanh(x[N - 1]);
132 #endif
133  }
134 
135  return res;
136 }
137 
138 // genfloath exp2 (genfloath x)
139 // sycl::native::exp2 (using half) is only implemented on nvptx backend so far.
140 // For other backends we revert to the sycl::exp2 impl.
141 template <typename T>
143  std::enable_if_t<sycl::detail::is_svgenfloath_v<T>, T>
144  exp2(T x) __NOEXC {
145 #if defined(__NVPTX__)
146  return sycl::detail::convertFromOpenCLTypeFor<T>(
147  __clc_native_exp2(sycl::detail::convertToOpenCLType(x)));
148 #else
149  return sycl::exp2(x);
150 #endif
151 }
152 
153 // sycl::native::exp2 (using half) is only implemented on nvptx backend so far.
154 // For other backends we revert to the sycl::exp2 impl.
155 template <size_t N>
159 
160  for (size_t i = 0; i < N / 2; i++) {
161 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
162  auto partial_res = native::exp2(sycl::detail::to_vec2(x, i * 2));
163 #else
164  auto partial_res = sycl::exp2(sycl::detail::to_vec2(x, i * 2));
165 #endif
166  sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(vec<half, 2>));
167  }
168  if (N % 2) {
169 #if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
170  res[N - 1] = native::exp2(x[N - 1]);
171 #else
172  res[N - 1] = sycl::exp2(x[N - 1]);
173 #endif
174  }
175  return res;
176 }
177 
178 } // namespace native
179 
180 } // namespace ext::oneapi::experimental
181 } // namespace _V1
182 } // namespace sycl
183 
184 #undef __SYCL_CONSTANT_AS
Provides a cross-platform math array class template that works on SYCL devices as well as in host C++...
Definition: marray.hpp:49
class sycl::vec ///////////////////////// Provides a cross-patform vector class template that works e...
Definition: vector.hpp:361
#define __SYCL_ALWAYS_INLINE
#define __NOEXC
Definition: builtins.hpp:25
vec< T, 2 > to_vec2(marray< T, N > X, size_t Start)
__SYCL_ALWAYS_INLINE std::enable_if_t< std::is_same_v< T, half >||std::is_same_v< T, float >, sycl::marray< T, N > > tanh(sycl::marray< T, N > x) __NOEXC
Definition: builtins.hpp:116
__SYCL_ALWAYS_INLINE std::enable_if_t< sycl::detail::is_svgenfloatf_v< T >||sycl::detail::is_svgenfloath_v< T >, T > tanh(T x) __NOEXC
Definition: builtins.hpp:96
__SYCL_ALWAYS_INLINE std::enable_if_t< sycl::detail::is_svgenfloath_v< T >, T > exp2(T x) __NOEXC
Definition: builtins.hpp:144
__SYCL_ALWAYS_INLINE sycl::marray< half, N > exp2(sycl::marray< half, N > x) __NOEXC
Definition: builtins.hpp:157
int printf(const FormatT *__format, Args... args)
Definition: builtins.hpp:79
autodecltype(x) x
Definition: access.hpp:18