DPC++ Runtime
Runtime libraries for oneAPI DPC++
builtins.hpp
Go to the documentation of this file.
1 //==------------------- builtins.hpp ---------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Implement SYCL builtin functions. This implementation is mainly driven by the
10 // requirement of not including <cmath> anywhere in the SYCL headers (i.e. from
11 // within <sycl/sycl.hpp>), because it pollutes global namespace. Note that we
12 // can avoid that using MSVC's STL as the pollution happens even from
13 // <vector>/<string> and other headers that have to be included per the SYCL
14 // specification. As such, an alternative approach might be to use math
15 // intrinsics with GCC/clang-based compilers and use <cmath> when using MSVC as
16 // a host compiler. That hasn't been tried/investigated.
17 //
18 // Current implementation splits builtins into several files following the SYCL
19 // 2020 (revision 8) split into common/math/geometric/relational/etc. functions.
20 // For each set, the implementation is split into a user-visible
21 // include/sycl/detail/builtins/*_functions.hpp providing full device-side
22 // implementation as well as defining user-visible APIs and defining ABI
23 // implemented under source/builtins/*_functions.cpp for the host side. We
24 // provide both scalar/vector overloads through symbols in the SYCL runtime
25 // library due to the <cmath> limitation above (for scalars) and due to
26 // performance reasons for vector overloads (to be able to benefit from
27 // vectorization).
28 //
29 // Providing declaration for the host side symbols contained in the library
30 // comes with its own challenges. One is compilation time - blindly providing
31 // all those declarations takes significant time (about 10% slowdown for
32 // "clang++ -fsycl" when compiling just "#include <sycl/sycl.hpp>"). Another
33 // issue is that return type for templates is part of the mangling (and as such
34 // SFINAE requirements too). To overcome that we structure host side
35 // implementation roughly like this (in most cases):
36 //
37 // math_function.cpp exports:
38 // float sycl::__sin_impl(float);
39 // float1 sycl::__sin_impl(float1);
40 // float2 sycl::__sin_impl(float2);
41 // ...
42 // /* same for other types */
43 //
44 // math_functions.hpp provide an implementation based on the following idea (in
45 // ::sycl namespace):
46 // float sin(float x) {
47 // extern __sin_impl(float);
48 // return __sin_impl(x);
49 // }
50 // template <typename T>
51 // enable_if_valid_type<T> sin(T x) {
52 // if constexpr (marray_or_swizzle) {
53 // ...
54 // call sycl::sin(vector_or_scalar)
55 // } else {
56 // extern T __sin_impl(T);
57 // return __sin_impl(x);
58 // }
59 // }
60 // That way we avoid having the full set of explicit declaration for the symbols
61 // in the library and instead only pay with compile time when those template
62 // instantiations actually happen.
63 
64 #pragma once
65 
67 
68 namespace sycl {
69 inline namespace _V1 {
70 namespace detail {
71 template <typename... Ts>
72 inline constexpr bool builtin_same_shape_v =
73  ((... && is_scalar_arithmetic_v<Ts>) || (... && is_marray_v<Ts>) ||
74  (... && is_vec_or_swizzle_v<Ts>)) &&
75  (... && (num_elements<Ts>::value ==
76  num_elements<typename first_type<Ts...>::type>::value));
77 
78 template <typename... Ts>
79 inline constexpr bool builtin_same_or_swizzle_v =
80  // Use builtin_same_shape_v to filter out types unrelated to builtins.
81  builtin_same_shape_v<Ts...> && all_same_v<simplify_if_swizzle_t<Ts>...>;
82 
83 namespace builtins {
84 #ifdef __SYCL_DEVICE_ONLY__
85 template <typename T> auto convert_arg(T &&x) {
86  using no_cv_ref = std::remove_cv_t<std::remove_reference_t<T>>;
87  if constexpr (is_vec_v<no_cv_ref>) {
88  using elem_type = get_elem_type_t<no_cv_ref>;
89  using converted_elem_type =
90  decltype(convert_arg(std::declval<elem_type>()));
91 
92  constexpr auto N = no_cv_ref::size();
93  using result_type = std::conditional_t<N == 1, converted_elem_type,
94  converted_elem_type
95  __attribute__((ext_vector_type(N)))>;
96  // TODO: We should have this bit_cast impl inside vec::convert.
97  return bit_cast<result_type>(static_cast<typename no_cv_ref::vector_t>(x));
98  } else if constexpr (is_swizzle_v<no_cv_ref>) {
99  return convert_arg(simplify_if_swizzle_t<no_cv_ref>{x});
100  } else {
101  static_assert(is_scalar_arithmetic_v<no_cv_ref> ||
102  is_multi_ptr_v<no_cv_ref> || std::is_pointer_v<no_cv_ref> ||
103  std::is_same_v<no_cv_ref, half>);
104  return convertToOpenCLType(std::forward<T>(x));
105  }
106 }
107 
108 template <typename RetTy, typename T> auto convert_result(T &&x) {
109  if constexpr (is_vec_v<RetTy>) {
110  return bit_cast<typename RetTy::vector_t>(x);
111  } else {
112  return std::forward<T>(x);
113  }
114 }
115 #endif
116 } // namespace builtins
117 
118 template <typename FuncTy, typename... Ts>
119 auto builtin_marray_impl(FuncTy F, const Ts &...x) {
120  using ret_elem_type = decltype(F(x[0]...));
121  using T = typename first_type<Ts...>::type;
122  marray<ret_elem_type, T::size()> Res;
123  constexpr auto N = T::size();
124  for (size_t I = 0; I < N / 2; ++I) {
125  auto PartialRes = [&]() {
126  using elem_ty = get_elem_type_t<T>;
127  if constexpr (std::is_integral_v<elem_ty>)
128  return F(to_vec2(x, I * 2)
129  .template as<vec<get_fixed_sized_int_t<elem_ty>, 2>>()...);
130  else
131  return F(to_vec2(x, I * 2)...);
132  }();
133  std::memcpy(&Res[I * 2], &PartialRes, sizeof(decltype(PartialRes)));
134  }
135  if (N % 2)
136  Res[N - 1] = F(x[N - 1]...);
137  return Res;
138 }
139 
140 template <typename FuncTy, typename... Ts>
141 auto builtin_default_host_impl(FuncTy F, const Ts &...x) {
142  // We implement support for marray/swizzle in the headers and export symbols
143  // for scalars/vector from the library binary. The reason is that scalar
144  // implementations mostly depend on <cmath> which pollutes global namespace,
145  // so we can't unconditionally include it from the SYCL headers. Vector
146  // overloads have to be implemented in the library next to scalar overloads in
147  // order to be vectorizable.
148  if constexpr ((... || is_marray_v<Ts>)) {
149  return builtin_marray_impl(F, x...);
150  } else {
151  return F(simplify_if_swizzle_t<Ts>{x}...);
152  }
153 }
154 
155 template <typename FuncTy, typename... Ts>
156 auto builtin_delegate_to_scalar(FuncTy F, const Ts &...x) {
157  using T = typename first_type<Ts...>::type;
158  static_assert(is_vec_or_swizzle_v<T> || is_marray_v<T>);
159 
160  constexpr auto Size = T::size();
161  using ret_elem_type = decltype(F(x[0]...));
162  std::conditional_t<is_marray_v<T>, marray<ret_elem_type, Size>,
164  r{};
165 
166  if constexpr (is_marray_v<T>) {
167  for (size_t i = 0; i < Size; ++i)
168  r[i] = F(x[i]...);
169  } else {
170  loop<Size>([&](auto idx) { r[idx] = F(x[idx]...); });
171  }
172 
173  return r;
174 }
175 
176 template <typename T>
178  : std::bool_constant<
179  check_type_in_v<get_elem_type_t<T>, float, double, half>> {};
180 template <typename T>
182  : std::bool_constant<check_type_in_v<get_elem_type_t<T>, float>> {};
183 
184 template <typename... Ts>
185 struct same_basic_shape : std::bool_constant<builtin_same_shape_v<Ts...>> {};
186 
187 template <typename... Ts>
188 struct same_elem_type : std::bool_constant<same_basic_shape<Ts...>::value &&
189  all_same_v<get_elem_type_t<Ts>...>> {
190 };
191 
192 template <typename> struct any_shape : std::true_type {};
193 
194 template <typename T>
195 struct scalar_only : std::bool_constant<is_scalar_arithmetic_v<T>> {};
196 
197 template <typename T>
198 struct non_scalar_only : std::bool_constant<!is_scalar_arithmetic_v<T>> {};
199 
200 template <typename T> struct default_ret_type {
201  using type = T;
202 };
203 
204 template <typename T> struct scalar_ret_type {
206 };
207 
208 template <template <typename> typename RetTypeTrait,
209  template <typename> typename ElemTypeChecker,
210  template <typename> typename ShapeChecker,
211  template <typename...> typename ExtraConditions, typename... Ts>
213  : std::enable_if<
214  ElemTypeChecker<typename first_type<Ts...>::type>::value &&
215  ShapeChecker<typename first_type<Ts...>::type>::value &&
216  ExtraConditions<Ts...>::value,
217  typename RetTypeTrait<
218  simplify_if_swizzle_t<typename first_type<Ts...>::type>>::type> {
219 };
220 #define BUILTIN_CREATE_ENABLER(NAME, RET_TYPE_TRAIT, ELEM_TYPE_CHECKER, \
221  SHAPE_CHECKER, EXTRA_CONDITIONS) \
222  namespace detail { \
223  template <typename... Ts> \
224  using NAME##_t = \
225  typename builtin_enable<RET_TYPE_TRAIT, ELEM_TYPE_CHECKER, \
226  SHAPE_CHECKER, EXTRA_CONDITIONS, Ts...>::type; \
227  }
228 } // namespace detail
229 } // namespace _V1
230 } // namespace sycl
231 
232 // The headers below are specifically implemented without including all the
233 // necessary headers to allow preprocessing them on their own and providing
234 // human-friendly result. One can use a command like this to achieve that:
235 // clang++ -[DU]__SYCL_DEVICE_ONLY__ -x c++ math_functions.inc \
236 // -I <..>/llvm/sycl/include -E -o - \
237 // | grep -v '^#' | clang-format > math_functions.{host|device}.ii
238 
Provides a cross-platform math array class template that works on SYCL devices as well as in host C++...
Definition: marray.hpp:49
class sycl::vec ///////////////////////// Provides a cross-patform vector class template that works e...
typename get_fixed_sized_int< T >::type get_fixed_sized_int_t
constexpr bool builtin_same_shape_v
Definition: builtins.hpp:72
auto builtin_delegate_to_scalar(FuncTy F, const Ts &...x)
Definition: builtins.hpp:156
auto builtin_marray_impl(FuncTy F, const Ts &...x)
Definition: builtins.hpp:119
typename get_elem_type< T >::type get_elem_type_t
auto builtin_default_host_impl(FuncTy F, const Ts &...x)
Definition: builtins.hpp:141
vec< T, 2 > to_vec2(marray< T, N > X, size_t Start)
__attribute__((destructor(110))) static void syclUnload()
typename simplify_if_swizzle< T >::type simplify_if_swizzle_t
constexpr bool builtin_same_or_swizzle_v
Definition: builtins.hpp:79
autodecltype(x) x
Definition: access.hpp:18