DPC++ Runtime
Runtime libraries for oneAPI DPC++
helper_macros.hpp
Go to the documentation of this file.
1
//==-- helper_macros.hpp -- Utility macros to implement sycl builtins ------==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#pragma once
10
11
// Usage:
12
// #define HANDLE_TYPE(INVARIANT_ARG1, INVARIANT_ARG2, TYPE) ...
13
// FOR_EACH2(HANDLE_TYPE, A1, A2, TYPE1, TYPE2, ...)
14
// it will expand into
15
// HANDLE_TYPE(A1, A2, TYPE1)
16
// HANDLE_TYPE(A1, A2, TYPE2)
17
// ...
18
// Number of "invariant" arguments determines the numeric suffix for the
19
// FOR_EACHN. Only 0-4 are currently supported, and up to 15 types at most.
20
#define GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, \
21
_15, NAME, ...) \
22
NAME
23
#define FOR_EACH4_A1(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1) \
24
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG1)
25
#define FOR_EACH4_A2(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2) \
26
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG1) \
27
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG2)
28
#define FOR_EACH4_A3(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
29
ARG3) \
30
FOR_EACH4_A2(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2) \
31
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG3)
32
#define FOR_EACH4_A4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
33
ARG3, ARG4) \
34
FOR_EACH4_A3(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3) \
35
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG4)
36
#define FOR_EACH4_A5(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
37
ARG3, ARG4, ARG5) \
38
FOR_EACH4_A4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
39
ARG4) \
40
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG5)
41
#define FOR_EACH4_A6(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
42
ARG3, ARG4, ARG5, ARG6) \
43
FOR_EACH4_A5(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
44
ARG4, ARG5) \
45
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG6)
46
#define FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
47
ARG3, ARG4, ARG5, ARG6, ARG7) \
48
FOR_EACH4_A6(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
49
ARG4, ARG5, ARG6) \
50
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG7)
51
#define FOR_EACH4_A8(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
52
ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) \
53
FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
54
ARG4, ARG5, ARG6, ARG7) \
55
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG8)
56
#define FOR_EACH4_A11(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
57
ARG3, ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
58
FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
59
ARG4, ARG5, ARG6, ARG7) \
60
FOR_EACH4_A4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG8, ARG9, ARG10, \
61
ARG11)
62
#define FOR_EACH4_A14(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
63
ARG3, ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, \
64
ARG12, ARG13, ARG14) \
65
FOR_EACH4_A11(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
66
ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
67
FOR_EACH4_A3(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG12, ARG13, ARG14)
68
69
// https://stackoverflow.com/a/5134656
70
// Workaround for MSVC's non-standard preprocessor behavior. Alternatively,
71
// /Zc:preprocessor could be used to to fix that. Only necessary for custom host
72
// compiler scenario.
73
#define EXPAND(x) x
74
75
#define FOR_EACH4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ...) \
76
EXPAND(GET_MACRO( \
77
__VA_ARGS__, FOR_EACH4##_A15, FOR_EACH4##_A14, FOR_EACH4##_A13, \
78
FOR_EACH4##_A12, FOR_EACH4##_A11, FOR_EACH4##_A10, FOR_EACH4##_A9, \
79
FOR_EACH4##_A8, FOR_EACH4##_A7, FOR_EACH4##_A6, FOR_EACH4##_A5, \
80
FOR_EACH4##_A4, FOR_EACH4##_A3, FOR_EACH4##_A2, FOR_EACH4##_A1, \
81
_0, )(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, __VA_ARGS__))
82
83
#define FOR_EACH3_BASE(BASE_CASE, FIXED1, FIXED2, FIXED3, ARG1) \
84
BASE_CASE(FIXED1, FIXED2, FIXED3, ARG1)
85
#define FOR_EACH3(BASE_CASE, FIXED1, FIXED2, FIXED3, ...) \
86
FOR_EACH4(FOR_EACH3_BASE, BASE_CASE, FIXED1, FIXED2, FIXED3, __VA_ARGS__)
87
88
#define FOR_EACH2_BASE(BASE_CASE, FIXED1, FIXED2, ARG1) \
89
BASE_CASE(FIXED1, FIXED2, ARG1)
90
#define FOR_EACH2(BASE_CASE, FIXED1, FIXED2, ...) \
91
FOR_EACH3(FOR_EACH2_BASE, BASE_CASE, FIXED1, FIXED2, __VA_ARGS__)
92
93
#define FOR_EACH1_BASE(BASE_CASE, FIXED1, ARG1) BASE_CASE(FIXED1, ARG1)
94
#define FOR_EACH1(BASE_CASE, FIXED1, ...) \
95
FOR_EACH2(FOR_EACH1_BASE, BASE_CASE, FIXED1, __VA_ARGS__)
96
97
#define FOR_EACH0_BASE(BASE_CASE, ARG1) BASE_CASE(ARG1)
98
#define FOR_EACH0(BASE_CASE, ...) \
99
FOR_EACH1(FOR_EACH0_BASE, BASE_CASE, __VA_ARGS__)
100
101
// Some helpers to unify implementation between different numbers of template
102
// types.
103
104
#define ONE_ARG_TYPENAME_TYPE typename T0
105
#define TWO_ARGS_TYPENAME_TYPE typename T0, typename T1
106
#define THREE_ARGS_TYPENAME_TYPE typename T0, typename T1, typename T2
107
108
#define ONE_ARG_TEMPLATE_TYPE T0
109
#define TWO_ARGS_TEMPLATE_TYPE T0, T1
110
#define THREE_ARGS_TEMPLATE_TYPE T0, T1, T2
111
112
#define ONE_ARG_TEMPLATE_TYPE_ARG T0 x
113
#define TWO_ARGS_TEMPLATE_TYPE_ARG T0 x, T1 y
114
#define THREE_ARGS_TEMPLATE_TYPE_ARG T0 x, T1 y, T2 z
115
116
#define ONE_ARG_TEMPLATE_TYPE_ARG_REF T0 &x
117
#define TWO_ARGS_TEMPLATE_TYPE_ARG_REF T0 &x, T1 &y
118
#define THREE_ARGS_TEMPLATE_TYPE_ARG_REF T0 &x, T1 &y, T2 &z
119
120
#define ONE_ARG_ARG x
121
#define TWO_ARGS_ARG x, y
122
#define THREE_ARGS_ARG x, y, z
123
124
#define ONE_ARG_SIMPLIFIED_ARG \
125
simplify_if_swizzle_t<T0> { x }
126
#define TWO_ARGS_SIMPLIFIED_ARG \
127
simplify_if_swizzle_t<T0>{x}, simplify_if_swizzle_t<T1> { y }
128
#define THREE_ARGS_SIMPLIFIED_ARG \
129
simplify_if_swizzle_t<T0>{x}, simplify_if_swizzle_t<T1>{y}, \
130
simplify_if_swizzle_t<T2> { \
131
z \
132
}
133
134
#define TWO_ARGS_ARG_ROTATED y, x
135
#define THREE_ARGS_ARG_ROTATED z, x, y
136
137
#define ONE_ARG_CONVERTED_ARG detail::builtins::convert_arg(x)
138
#define TWO_ARGS_CONVERTED_ARG \
139
detail::builtins::convert_arg(x), detail::builtins::convert_arg(y)
140
#define THREE_ARGS_CONVERTED_ARG \
141
detail::builtins::convert_arg(x), detail::builtins::convert_arg(y), \
142
detail::builtins::convert_arg(z)
143
144
#define ONE_ARG_AUTO_ARG auto x
145
#define TWO_ARGS_AUTO_ARG auto x, auto y
146
#define THREE_ARGS_AUTO_ARG auto x, auto y, auto z
147
148
#define ONE_ARG_TYPE_ARG(TYPE) TYPE x
149
#define TWO_ARGS_TYPE_ARG(TYPE) TYPE x, TYPE y
150
#define THREE_ARGS_TYPE_ARG(TYPE) TYPE x, TYPE y, TYPE z
151
152
#define ONE_ARG_TYPE(TYPE) TYPE
153
#define TWO_ARGS_TYPE(TYPE) TYPE, TYPE
154
#define THREE_ARGS_TYPE(TYPE) TYPE, TYPE, TYPE
155
156
#define ONE_ARG_VEC_TYPE(TYPE, VL) vec<TYPE, VL>
157
#define TWO_ARGS_VEC_TYPE(TYPE, VL) vec<TYPE, VL>, vec<TYPE, VL>
158
#define THREE_ARGS_VEC_TYPE(TYPE, VL) \
159
vec<TYPE, VL>, vec<TYPE, VL>, vec<TYPE, VL>
160
161
#define ONE_ARG_VEC_TYPE_ARG(TYPE, VL) vec<TYPE, VL> x
162
#define TWO_ARGS_VEC_TYPE_ARG(TYPE, VL) vec<TYPE, VL> x, vec<TYPE, VL> y
163
#define THREE_ARGS_VEC_TYPE_ARG(TYPE, VL) \
164
vec<TYPE, VL> x, vec<TYPE, VL> y, vec<TYPE, VL> z
165
166
#define TWO_ARGS_LESS_ONE ONE_ARG
167
#define THREE_ARGS_LESS_ONE TWO_ARGS
168
169
#define SYCL_CONCAT_IMPL(A, B) A##B
170
#define SYCL_CONCAT(A, B) SYCL_CONCAT_IMPL(A, B)
171
172
#define LESS_ONE(NUM_ARGS) SYCL_CONCAT(NUM_ARGS, _LESS_ONE)
173
174
// 3 types.
175
#define FP_TYPES float, double, half
176
// 6 types.
177
#define SIGNED_TYPES char, signed char, short, int, long, long long
178
// 5 types
179
#define UNSIGNED_TYPES \
180
unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long
181
// 11 types
182
#define INTEGER_TYPES SIGNED_TYPES, UNSIGNED_TYPES
183
// 8 types
184
#define FIXED_WIDTH_INTEGER_TYPES \
185
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t
186
187
// Use (NAME)/(NS::NAME) to deal win min/max macros in windows.h throughout this
188
// file.
189
190
#define DEVICE_IMPL_TEMPLATE_CUSTOM_DELEGATE( \
191
NUM_ARGS, NAME, ENABLER, DELEGATOR, NS,
/*SCALAR_VEC_IMPL*/
...) \
192
template <NUM_ARGS##_TYPENAME_TYPE> \
193
detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>(NAME)( \
194
NUM_ARGS##_TEMPLATE_TYPE_ARG) { \
195
if constexpr (detail::is_marray_v<T0>) { \
196
return detail::DELEGATOR( \
197
[](NUM_ARGS##_AUTO_ARG) { return (NS::NAME)(NUM_ARGS##_ARG); }, \
198
NUM_ARGS##_ARG); \
199
} else { \
200
return __VA_ARGS__(NUM_ARGS##_CONVERTED_ARG); \
201
} \
202
}
203
204
#define DEVICE_IMPL_TEMPLATE(NUM_ARGS, NAME, ENABLER,
/*SCALAR_VEC_IMPL*/
...) \
205
DEVICE_IMPL_TEMPLATE_CUSTOM_DELEGATE(NUM_ARGS, NAME, ENABLER, \
206
builtin_marray_impl, sycl, __VA_ARGS__)
207
208
#ifdef __SYCL_BUILD_SYCL_DLL
209
#define SYCL_BUILTIN_EXPORT __SYCL_EXPORT
210
#else
211
#define SYCL_BUILTIN_EXPORT
212
#endif
213
214
// Use extern function declaration in function scope to save compile time.
215
// Otherwise the FE has to parse multiple types/VLs/functions costing us around
216
// 0.3s in compile-time. It also allows us to skip providing all the explicit
217
// declarations through even more macro magic.
218
#define HOST_IMPL_TEMPLATE_CUSTOM_DELEGATOR( \
219
NUM_ARGS, NAME, ENABLER, FUNC_CLASS, RET_TYPE_TRAITS, DELEGATOR) \
220
template <typename... Ts> auto __##FUNC_CLASS##_##NAME##_lambda(Ts... xs) { \
221
/* Can't inline into the real lambda due to \
222
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112867. Can't emulate a \
223
* lambda through a local struct because templates are not allowed in \
224
* local structs. Have to specify FUNC_CLASS to avoid \
225
* ambiguity between, e.g. sycl::__cos_lambda/sycl::native::__cos_lambda \
226
* or between max in common functions and max in integer functions. \
227
*/
\
228
using ret_ty = typename detail::RET_TYPE_TRAITS< \
229
typename detail::first_type<Ts...>::type>::type; \
230
extern SYCL_BUILTIN_EXPORT ret_ty __##NAME##_impl(Ts...); \
231
return __##NAME##_impl(xs...); \
232
} \
233
template <NUM_ARGS##_TYPENAME_TYPE> \
234
detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>(NAME)( \
235
NUM_ARGS##_TEMPLATE_TYPE_ARG) { \
236
return detail::DELEGATOR( \
237
[](auto... xs) { return __##FUNC_CLASS##_##NAME##_lambda(xs...); }, \
238
NUM_ARGS##_ARG); \
239
}
240
241
#define HOST_IMPL_TEMPLATE(NUM_ARGS, NAME, ENABLER, FUNC_CLASS, \
242
RET_TYPE_TRAITS) \
243
HOST_IMPL_TEMPLATE_CUSTOM_DELEGATOR(NUM_ARGS, NAME, ENABLER, FUNC_CLASS, \
244
RET_TYPE_TRAITS, \
245
builtin_default_host_impl)
246
247
#define HOST_IMPL_SCALAR_RET_TYPE(NUM_ARGS, NAME, RET_TYPE, TYPE) \
248
inline RET_TYPE(NAME)(NUM_ARGS##_TYPE_ARG(TYPE)) { \
249
extern SYCL_BUILTIN_EXPORT RET_TYPE __##NAME##_impl( \
250
NUM_ARGS##_TYPE(TYPE)); \
251
return __##NAME##_impl(NUM_ARGS##_ARG); \
252
}
253
254
#define HOST_IMPL_SCALAR(NUM_ARGS, NAME, TYPE) \
255
HOST_IMPL_SCALAR_RET_TYPE(NUM_ARGS, NAME, TYPE, TYPE)
include
sycl
detail
builtins
helper_macros.hpp
Generated by
1.9.1