DPC++ Runtime
Runtime libraries for oneAPI DPC++
builtins_helper.hpp
Go to the documentation of this file.
1 //==----------- builtins_helper.hpp - SYCL built-in helper ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <sycl/detail/export.hpp>
11 #include <sycl/exception.hpp>
12 #include <sycl/pointers.hpp>
13 #include <sycl/types.hpp>
14 
15 // TODO Decide whether to mark functions with this attribute.
16 #define __NOEXC /*noexcept*/
17 
18 #define __MAKE_1V(Fun, Call, N, Ret, Arg1) \
19  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC(sycl::vec<Arg1, N> x) { \
20  sycl::vec<Ret, N> r; \
21  detail::helper<N - 1>().run_1v( \
22  r, [](Arg1 x) { return __host_std::Call(x); }, x); \
23  return r; \
24  }
25 
26 #define __MAKE_1V_2V(Fun, Call, N, Ret, Arg1, Arg2) \
27  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC(sycl::vec<Arg1, N> x, \
28  sycl::vec<Arg2, N> y) { \
29  sycl::vec<Ret, N> r; \
30  detail::helper<N - 1>().run_1v_2v( \
31  r, [](Arg1 x, Arg2 y) { return __host_std::Call(x, y); }, x, y); \
32  return r; \
33  }
34 
35 #define __MAKE_1V_2V_RS(Fun, Call, N, Ret, Arg1, Arg2) \
36  __SYCL_EXPORT Ret Fun __NOEXC(sycl::vec<Arg1, N> x, sycl::vec<Arg2, N> y) { \
37  Ret r = Ret(); \
38  detail::helper<N - 1>().run_1v_2v_rs( \
39  r, [](Ret &r, Arg1 x, Arg2 y) { return __host_std::Call(r, x, y); }, \
40  x, y); \
41  return r; \
42  }
43 
44 #define __MAKE_1V_RS(Fun, Call, N, Ret, Arg1) \
45  __SYCL_EXPORT Ret Fun __NOEXC(sycl::vec<Arg1, N> x) { \
46  Ret r = Ret(); \
47  detail::helper<N - 1>().run_1v_rs( \
48  r, [](Ret &r, Arg1 x) { return __host_std::Call(r, x); }, x); \
49  return r; \
50  }
51 
52 #define __MAKE_1V_2V_3V(Fun, Call, N, Ret, Arg1, Arg2, Arg3) \
53  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC( \
54  sycl::vec<Arg1, N> x, sycl::vec<Arg2, N> y, sycl::vec<Arg3, N> z) { \
55  sycl::vec<Ret, N> r; \
56  detail::helper<N - 1>().run_1v_2v_3v( \
57  r, [](Arg1 x, Arg2 y, Arg3 z) { return __host_std::Call(x, y, z); }, \
58  x, y, z); \
59  return r; \
60  }
61 
62 #define __MAKE_1V_2S_3S(Fun, N, Ret, Arg1, Arg2, Arg3) \
63  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC(sycl::vec<Arg1, N> x, Arg2 y, \
64  Arg3 z) { \
65  sycl::vec<Ret, N> r; \
66  detail::helper<N - 1>().run_1v_2s_3s( \
67  r, [](Arg1 x, Arg2 y, Arg3 z) { return __host_std::Fun(x, y, z); }, x, \
68  y, z); \
69  return r; \
70  }
71 
72 #define __MAKE_1V_2S(Fun, N, Ret, Arg1, Arg2) \
73  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC(sycl::vec<Arg1, N> x, Arg2 y) { \
74  sycl::vec<Ret, N> r; \
75  detail::helper<N - 1>().run_1v_2s( \
76  r, [](Arg1 x, Arg2 y) { return __host_std::Fun(x, y); }, x, y); \
77  return r; \
78  }
79 
80 #define __MAKE_SR_1V_AND(Fun, Call, N, Ret, Arg1) \
81  __SYCL_EXPORT Ret Fun __NOEXC(sycl::vec<Arg1, N> x) { \
82  Ret r; \
83  detail::helper<N - 1>().run_1v_sr_and( \
84  r, [](Arg1 x) { return __host_std::Call(x); }, x); \
85  return r; \
86  }
87 
88 #define __MAKE_SR_1V_OR(Fun, Call, N, Ret, Arg1) \
89  __SYCL_EXPORT Ret Fun __NOEXC(sycl::vec<Arg1, N> x) { \
90  Ret r; \
91  detail::helper<N - 1>().run_1v_sr_or( \
92  r, [](Arg1 x) { return __host_std::Call(x); }, x); \
93  return r; \
94  }
95 
96 #define __MAKE_1V_2P(Fun, N, Ret, Arg1, Arg2) \
97  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC(sycl::vec<Arg1, N> x, \
98  sycl::vec<Arg2, N> *y) { \
99  sycl::vec<Ret, N> r; \
100  detail::helper<N - 1>().run_1v_2p( \
101  r, [](Arg1 x, Arg2 *y) { return __host_std::Fun(x, y); }, x, y); \
102  return r; \
103  }
104 
105 #define __MAKE_1V_2V_3P(Fun, N, Ret, Arg1, Arg2, Arg3) \
106  __SYCL_EXPORT sycl::vec<Ret, N> Fun __NOEXC( \
107  sycl::vec<Arg1, N> x, sycl::vec<Arg2, N> y, sycl::vec<Arg3, N> *z) { \
108  sycl::vec<Ret, N> r; \
109  detail::helper<N - 1>().run_1v_2v_3p( \
110  r, [](Arg1 x, Arg2 y, Arg3 *z) { return __host_std::Fun(x, y, z); }, \
111  x, y, z); \
112  return r; \
113  }
114 
115 #define MAKE_1V(Fun, Ret, Arg1) MAKE_1V_FUNC(Fun, Fun, Ret, Arg1)
116 
117 #define MAKE_1V_FUNC(Fun, Call, Ret, Arg1) \
118  __MAKE_1V(Fun, Call, 1, Ret, Arg1) \
119  __MAKE_1V(Fun, Call, 2, Ret, Arg1) \
120  __MAKE_1V(Fun, Call, 3, Ret, Arg1) \
121  __MAKE_1V(Fun, Call, 4, Ret, Arg1) \
122  __MAKE_1V(Fun, Call, 8, Ret, Arg1) \
123  __MAKE_1V(Fun, Call, 16, Ret, Arg1)
124 
125 #define MAKE_1V_2V(Fun, Ret, Arg1, Arg2) \
126  MAKE_1V_2V_FUNC(Fun, Fun, Ret, Arg1, Arg2)
127 
128 #define MAKE_1V_2V_FUNC(Fun, Call, Ret, Arg1, Arg2) \
129  __MAKE_1V_2V(Fun, Call, 1, Ret, Arg1, Arg2) \
130  __MAKE_1V_2V(Fun, Call, 2, Ret, Arg1, Arg2) \
131  __MAKE_1V_2V(Fun, Call, 3, Ret, Arg1, Arg2) \
132  __MAKE_1V_2V(Fun, Call, 4, Ret, Arg1, Arg2) \
133  __MAKE_1V_2V(Fun, Call, 8, Ret, Arg1, Arg2) \
134  __MAKE_1V_2V(Fun, Call, 16, Ret, Arg1, Arg2)
135 
136 #define MAKE_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \
137  MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3)
138 
139 #define MAKE_1V_2V_3V_FUNC(Fun, Call, Ret, Arg1, Arg2, Arg3) \
140  __MAKE_1V_2V_3V(Fun, Call, 1, Ret, Arg1, Arg2, Arg3) \
141  __MAKE_1V_2V_3V(Fun, Call, 2, Ret, Arg1, Arg2, Arg3) \
142  __MAKE_1V_2V_3V(Fun, Call, 3, Ret, Arg1, Arg2, Arg3) \
143  __MAKE_1V_2V_3V(Fun, Call, 4, Ret, Arg1, Arg2, Arg3) \
144  __MAKE_1V_2V_3V(Fun, Call, 8, Ret, Arg1, Arg2, Arg3) \
145  __MAKE_1V_2V_3V(Fun, Call, 16, Ret, Arg1, Arg2, Arg3)
146 
147 #define MAKE_SC_1V_2V_3V(Fun, Ret, Arg1, Arg2, Arg3) \
148  MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \
149  MAKE_1V_2V_3V_FUNC(Fun, Fun, Ret, Arg1, Arg2, Arg3)
150 
151 #define MAKE_SC_FSC_1V_2V_3V_FV(FunSc, FunV, Ret, Arg1, Arg2, Arg3) \
152  MAKE_SC_3ARG(FunSc, Ret, Arg1, Arg2, Arg3) \
153  MAKE_1V_2V_3V_FUNC(FunSc, FunV, Ret, Arg1, Arg2, Arg3)
154 
155 #define MAKE_SC_3ARG(Fun, Ret, Arg1, Arg2, Arg3) \
156  __SYCL_EXPORT Ret Fun __NOEXC(Arg1 x, Arg2 y, Arg3 z) { \
157  return (Ret)__##Fun(x, y, z); \
158  }
159 
160 #define MAKE_1V_2S(Fun, Ret, Arg1, Arg2) \
161  __MAKE_1V_2S(Fun, 1, Ret, Arg1, Arg2) \
162  __MAKE_1V_2S(Fun, 2, Ret, Arg1, Arg2) \
163  __MAKE_1V_2S(Fun, 3, Ret, Arg1, Arg2) \
164  __MAKE_1V_2S(Fun, 4, Ret, Arg1, Arg2) \
165  __MAKE_1V_2S(Fun, 8, Ret, Arg1, Arg2) \
166  __MAKE_1V_2S(Fun, 16, Ret, Arg1, Arg2)
167 
168 #define MAKE_1V_2S_3S(Fun, Ret, Arg1, Arg2, Arg3) \
169  __MAKE_1V_2S_3S(Fun, 1, Ret, Arg1, Arg2, Arg3) \
170  __MAKE_1V_2S_3S(Fun, 2, Ret, Arg1, Arg2, Arg3) \
171  __MAKE_1V_2S_3S(Fun, 3, Ret, Arg1, Arg2, Arg3) \
172  __MAKE_1V_2S_3S(Fun, 4, Ret, Arg1, Arg2, Arg3) \
173  __MAKE_1V_2S_3S(Fun, 8, Ret, Arg1, Arg2, Arg3) \
174  __MAKE_1V_2S_3S(Fun, 16, Ret, Arg1, Arg2, Arg3)
175 
176 #define MAKE_SR_1V_AND(Fun, Call, Ret, Arg1) \
177  __MAKE_SR_1V_AND(Fun, Call, 1, Ret, Arg1) \
178  __MAKE_SR_1V_AND(Fun, Call, 2, Ret, Arg1) \
179  __MAKE_SR_1V_AND(Fun, Call, 3, Ret, Arg1) \
180  __MAKE_SR_1V_AND(Fun, Call, 4, Ret, Arg1) \
181  __MAKE_SR_1V_AND(Fun, Call, 8, Ret, Arg1) \
182  __MAKE_SR_1V_AND(Fun, Call, 16, Ret, Arg1)
183 
184 #define MAKE_SR_1V_OR(Fun, Call, Ret, Arg1) \
185  __MAKE_SR_1V_OR(Fun, Call, 1, Ret, Arg1) \
186  __MAKE_SR_1V_OR(Fun, Call, 2, Ret, Arg1) \
187  __MAKE_SR_1V_OR(Fun, Call, 3, Ret, Arg1) \
188  __MAKE_SR_1V_OR(Fun, Call, 4, Ret, Arg1) \
189  __MAKE_SR_1V_OR(Fun, Call, 8, Ret, Arg1) \
190  __MAKE_SR_1V_OR(Fun, Call, 16, Ret, Arg1)
191 
192 #define MAKE_1V_2P(Fun, Ret, Arg1, Arg2) \
193  __MAKE_1V_2P(Fun, 1, Ret, Arg1, Arg2) \
194  __MAKE_1V_2P(Fun, 2, Ret, Arg1, Arg2) \
195  __MAKE_1V_2P(Fun, 3, Ret, Arg1, Arg2) \
196  __MAKE_1V_2P(Fun, 4, Ret, Arg1, Arg2) \
197  __MAKE_1V_2P(Fun, 8, Ret, Arg1, Arg2) \
198  __MAKE_1V_2P(Fun, 16, Ret, Arg1, Arg2)
199 
200 #define MAKE_GEO_1V_2V_RS(Fun, Call, Ret, Arg1, Arg2) \
201  __MAKE_1V_2V_RS(Fun, Call, 1, Ret, Arg1, Arg2) \
202  __MAKE_1V_2V_RS(Fun, Call, 2, Ret, Arg1, Arg2) \
203  __MAKE_1V_2V_RS(Fun, Call, 3, Ret, Arg1, Arg2) \
204  __MAKE_1V_2V_RS(Fun, Call, 4, Ret, Arg1, Arg2) \
205  __MAKE_1V_2V_RS(Fun, Call, 8, Ret, Arg1, Arg2) \
206  __MAKE_1V_2V_RS(Fun, Call, 16, Ret, Arg1, Arg2)
207 
208 #define MAKE_1V_2V_3P(Fun, Ret, Arg1, Arg2, Arg3) \
209  __MAKE_1V_2V_3P(Fun, 1, Ret, Arg1, Arg2, Arg3) \
210  __MAKE_1V_2V_3P(Fun, 2, Ret, Arg1, Arg2, Arg3) \
211  __MAKE_1V_2V_3P(Fun, 3, Ret, Arg1, Arg2, Arg3) \
212  __MAKE_1V_2V_3P(Fun, 4, Ret, Arg1, Arg2, Arg3) \
213  __MAKE_1V_2V_3P(Fun, 8, Ret, Arg1, Arg2, Arg3) \
214  __MAKE_1V_2V_3P(Fun, 16, Ret, Arg1, Arg2, Arg3)
215 
216 namespace __host_std {
217 namespace detail {
218 
219 template <int N> struct helper {
220  template <typename Res, typename Op, typename T1>
221  inline void run_1v(Res &r, Op op, T1 x) {
222  helper<N - 1>().run_1v(r, op, x);
223  r.template swizzle<N>() = op(x.template swizzle<N>());
224  }
225 
226  template <typename Res, typename Op, typename T1, typename T2>
227  inline void run_1v_2v(Res &r, Op op, T1 x, T2 y) {
228  helper<N - 1>().run_1v_2v(r, op, x, y);
229  r.template swizzle<N>() =
230  op(x.template swizzle<N>(), y.template swizzle<N>());
231  }
232 
233  template <typename Res, typename Op, typename T1, typename T2>
234  inline void run_1v_2s(Res &r, Op op, T1 x, T2 y) {
235  helper<N - 1>().run_1v_2s(r, op, x, y);
236  r.template swizzle<N>() = op(x.template swizzle<N>(), y);
237  }
238 
239  template <typename Res, typename Op, typename T1, typename T2, typename T3>
240  inline void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) {
241  helper<N - 1>().run_1v_2s_3s(r, op, x, y, z);
242  r.template swizzle<N>() = op(x.template swizzle<N>(), y, z);
243  }
244 
245  template <typename Res, typename Op, typename T1, typename T2>
246  inline void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) {
247  helper<N - 1>().run_1v_2v_rs(r, op, x, y);
248  op(r, x.template swizzle<N>(), y.template swizzle<N>());
249  }
250 
251  template <typename Res, typename Op, typename T1>
252  inline void run_1v_rs(Res &r, Op op, T1 x) {
253  helper<N - 1>().run_1v_rs(r, op, x);
254  op(r, x.template swizzle<N>());
255  }
256 
257  template <typename Res, typename Op, typename T1, typename T2>
258  inline void run_1v_2p(Res &r, Op op, T1 x, T2 y) {
259  helper<N - 1>().run_1v_2p(r, op, x, y);
260  // TODO avoid creating a temporary variable
261  typename std::remove_pointer<T2>::type::element_type temp;
262  r.template swizzle<N>() = op(x.template swizzle<N>(), &temp);
263  y->template swizzle<N>() = temp;
264  }
265 
266  template <typename Res, typename Op, typename T1, typename T2, typename T3>
267  inline void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) {
268  helper<N - 1>().run_1v_2v_3p(r, op, x, y, z);
269  // TODO avoid creating a temporary variable
270  typename std::remove_pointer<T3>::type::element_type temp;
271  r.template swizzle<N>() =
272  op(x.template swizzle<N>(), y.template swizzle<N>(), &temp);
273  z->template swizzle<N>() = temp;
274  }
275 
276  template <typename Res, typename Op, typename T1, typename T2, typename T3>
277  inline void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) {
278  helper<N - 1>().run_1v_2v_3v(r, op, x, y, z);
279  r.template swizzle<N>() =
280  op(x.template swizzle<N>(), y.template swizzle<N>(),
281  z.template swizzle<N>());
282  }
283 
284  template <typename Res, typename Op, typename T1>
285  inline void run_1v_sr_or(Res &r, Op op, T1 x) {
286  helper<N - 1>().run_1v_sr_or(r, op, x);
287  r = (op(x.template swizzle<N>()) || r);
288  }
289 
290  template <typename Res, typename Op, typename T1>
291  inline void run_1v_sr_and(Res &r, Op op, T1 x) {
292  helper<N - 1>().run_1v_sr_and(r, op, x);
293  r = (op(x.template swizzle<N>()) && r);
294  }
295 };
296 
297 template <> struct helper<0> {
298  template <typename Res, typename Op, typename T1>
299  inline void run_1v(Res &r, Op op, T1 x) {
300  r.template swizzle<0>() = op(x.template swizzle<0>());
301  }
302 
303  template <typename Res, typename Op, typename T1, typename T2>
304  inline void run_1v_2v(Res &r, Op op, T1 x, T2 y) {
305  r.template swizzle<0>() =
306  op(x.template swizzle<0>(), y.template swizzle<0>());
307  }
308 
309  template <typename Res, typename Op, typename T1, typename T2>
310  inline void run_1v_2s(Res &r, Op op, T1 x, T2 y) {
311  r.template swizzle<0>() = op(x.template swizzle<0>(), y);
312  }
313 
314  template <typename Res, typename Op, typename T1, typename T2, typename T3>
315  inline void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z) {
316  r.template swizzle<0>() = op(x.template swizzle<0>(), y, z);
317  }
318 
319  template <typename Res, typename Op, typename T1, typename T2>
320  inline void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y) {
321  op(r, x.template swizzle<0>(), y.template swizzle<0>());
322  }
323 
324  template <typename Res, typename Op, typename T1>
325  inline void run_1v_rs(Res &r, Op op, T1 x) {
326  op(r, x.template swizzle<0>());
327  }
328 
329  template <typename Res, typename Op, typename T1, typename T2>
330  inline void run_1v_2p(Res &r, Op op, T1 x, T2 y) {
331  // TODO avoid creating a temporary variable
332  typename std::remove_pointer<T2>::type::element_type temp;
333  r.template swizzle<0>() = op(x.template swizzle<0>(), &temp);
334  y->template swizzle<0>() = temp;
335  }
336 
337  template <typename Res, typename Op, typename T1, typename T2, typename T3>
338  inline void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z) {
339  // TODO avoid creating a temporary variable
340  typename std::remove_pointer<T3>::type::element_type temp;
341  r.template swizzle<0>() =
342  op(x.template swizzle<0>(), y.template swizzle<0>(), &temp);
343  z->template swizzle<0>() = temp;
344  }
345 
346  template <typename Res, typename Op, typename T1, typename T2, typename T3>
347  inline void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z) {
348  r.template swizzle<0>() =
349  op(x.template swizzle<0>(), y.template swizzle<0>(),
350  z.template swizzle<0>());
351  }
352 
353  template <typename Res, typename Op, typename T1>
354  inline void run_1v_sr_or(Res &r, Op op, T1 x) {
355  r = op(x.template swizzle<0>());
356  }
357 
358  template <typename Res, typename Op, typename T1>
359  inline void run_1v_sr_and(Res &r, Op op, T1 x) {
360  r = op(x.template swizzle<0>());
361  }
362 };
363 
364 } // namespace detail
365 } // namespace __host_std
void run_1v_2s(Res &r, Op op, T1 x, T2 y)
void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y)
void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z)
void run_1v_rs(Res &r, Op op, T1 x)
void run_1v_2v(Res &r, Op op, T1 x, T2 y)
void run_1v_2p(Res &r, Op op, T1 x, T2 y)
void run_1v(Res &r, Op op, T1 x)
void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z)
void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z)
void run_1v_sr_and(Res &r, Op op, T1 x)
void run_1v_sr_or(Res &r, Op op, T1 x)
void run_1v_rs(Res &r, Op op, T1 x)
void run_1v_2s(Res &r, Op op, T1 x, T2 y)
void run_1v_sr_or(Res &r, Op op, T1 x)
void run_1v_sr_and(Res &r, Op op, T1 x)
void run_1v_2p(Res &r, Op op, T1 x, T2 y)
void run_1v_2v(Res &r, Op op, T1 x, T2 y)
void run_1v_2s_3s(Res &r, Op op, T1 x, T2 y, T3 z)
void run_1v_2v_3v(Res &r, Op op, T1 x, T2 y, T3 z)
void run_1v(Res &r, Op op, T1 x)
void run_1v_2v_3p(Res &r, Op op, T1 x, T2 y, T3 z)
void run_1v_2v_rs(Res &r, Op op, T1 x, T2 y)