DPC++ Runtime
Runtime libraries for oneAPI DPC++
helper_macros.hpp
Go to the documentation of this file.
1 //==-- helper_macros.hpp -- Utility macros to implement sycl builtins ------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 // Usage:
12 // #define HANDLE_TYPE(INVARIANT_ARG1, INVARIANT_ARG2, TYPE) ...
13 // FOR_EACH2(HANDLE_TYPE, A1, A2, TYPE1, TYPE2, ...)
14 // it will expand into
15 // HANDLE_TYPE(A1, A2, TYPE1)
16 // HANDLE_TYPE(A1, A2, TYPE2)
17 // ...
18 // Number of "invariant" arguments determines the numeric suffix for the
19 // FOR_EACHN. Only 0-4 are currently supported, and up to 15 types at most.
20 #define GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, \
21  _15, NAME, ...) \
22  NAME
23 #define FOR_EACH4_A1(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1) \
24  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG1)
25 #define FOR_EACH4_A2(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2) \
26  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG1) \
27  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG2)
28 #define FOR_EACH4_A3(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
29  ARG3) \
30  FOR_EACH4_A2(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2) \
31  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG3)
32 #define FOR_EACH4_A4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
33  ARG3, ARG4) \
34  FOR_EACH4_A3(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3) \
35  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG4)
36 #define FOR_EACH4_A5(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
37  ARG3, ARG4, ARG5) \
38  FOR_EACH4_A4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
39  ARG4) \
40  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG5)
41 #define FOR_EACH4_A6(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
42  ARG3, ARG4, ARG5, ARG6) \
43  FOR_EACH4_A5(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
44  ARG4, ARG5) \
45  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG6)
46 #define FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
47  ARG3, ARG4, ARG5, ARG6, ARG7) \
48  FOR_EACH4_A6(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
49  ARG4, ARG5, ARG6) \
50  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG7)
51 #define FOR_EACH4_A8(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
52  ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) \
53  FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
54  ARG4, ARG5, ARG6, ARG7) \
55  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG8)
56 #define FOR_EACH4_A11(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
57  ARG3, ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
58  FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
59  ARG4, ARG5, ARG6, ARG7) \
60  FOR_EACH4_A4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG8, ARG9, ARG10, \
61  ARG11)
62 #define FOR_EACH4_A14(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
63  ARG3, ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11, \
64  ARG12, ARG13, ARG14) \
65  FOR_EACH4_A11(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
66  ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
67  FOR_EACH4_A3(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG12, ARG13, ARG14)
68 
69 // https://stackoverflow.com/a/5134656
70 // Workaround for MSVC's non-standard preprocessor behavior. Alternatively,
71 // /Zc:preprocessor could be used to to fix that. Only necessary for custom host
72 // compiler scenario.
73 #define EXPAND(x) x
74 
75 #define FOR_EACH4(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ...) \
76  EXPAND(GET_MACRO( \
77  __VA_ARGS__, FOR_EACH4##_A15, FOR_EACH4##_A14, FOR_EACH4##_A13, \
78  FOR_EACH4##_A12, FOR_EACH4##_A11, FOR_EACH4##_A10, FOR_EACH4##_A9, \
79  FOR_EACH4##_A8, FOR_EACH4##_A7, FOR_EACH4##_A6, FOR_EACH4##_A5, \
80  FOR_EACH4##_A4, FOR_EACH4##_A3, FOR_EACH4##_A2, FOR_EACH4##_A1, \
81  _0, )(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, __VA_ARGS__))
82 
83 #define FOR_EACH3_BASE(BASE_CASE, FIXED1, FIXED2, FIXED3, ARG1) \
84  BASE_CASE(FIXED1, FIXED2, FIXED3, ARG1)
85 #define FOR_EACH3(BASE_CASE, FIXED1, FIXED2, FIXED3, ...) \
86  FOR_EACH4(FOR_EACH3_BASE, BASE_CASE, FIXED1, FIXED2, FIXED3, __VA_ARGS__)
87 
88 #define FOR_EACH2_BASE(BASE_CASE, FIXED1, FIXED2, ARG1) \
89  BASE_CASE(FIXED1, FIXED2, ARG1)
90 #define FOR_EACH2(BASE_CASE, FIXED1, FIXED2, ...) \
91  FOR_EACH3(FOR_EACH2_BASE, BASE_CASE, FIXED1, FIXED2, __VA_ARGS__)
92 
93 #define FOR_EACH1_BASE(BASE_CASE, FIXED1, ARG1) BASE_CASE(FIXED1, ARG1)
94 #define FOR_EACH1(BASE_CASE, FIXED1, ...) \
95  FOR_EACH2(FOR_EACH1_BASE, BASE_CASE, FIXED1, __VA_ARGS__)
96 
97 #define FOR_EACH0_BASE(BASE_CASE, ARG1) BASE_CASE(ARG1)
98 #define FOR_EACH0(BASE_CASE, ...) \
99  FOR_EACH1(FOR_EACH0_BASE, BASE_CASE, __VA_ARGS__)
100 
101 // Some helpers to unify implementation between different numbers of template
102 // types.
103 
104 #define ONE_ARG_TYPENAME_TYPE typename T0
105 #define TWO_ARGS_TYPENAME_TYPE typename T0, typename T1
106 #define THREE_ARGS_TYPENAME_TYPE typename T0, typename T1, typename T2
107 
108 #define ONE_ARG_TEMPLATE_TYPE T0
109 #define TWO_ARGS_TEMPLATE_TYPE T0, T1
110 #define THREE_ARGS_TEMPLATE_TYPE T0, T1, T2
111 
112 #define ONE_ARG_TEMPLATE_TYPE_ARG T0 x
113 #define TWO_ARGS_TEMPLATE_TYPE_ARG T0 x, T1 y
114 #define THREE_ARGS_TEMPLATE_TYPE_ARG T0 x, T1 y, T2 z
115 
116 #define ONE_ARG_TEMPLATE_TYPE_ARG_REF T0 &x
117 #define TWO_ARGS_TEMPLATE_TYPE_ARG_REF T0 &x, T1 &y
118 #define THREE_ARGS_TEMPLATE_TYPE_ARG_REF T0 &x, T1 &y, T2 &z
119 
120 #define ONE_ARG_ARG x
121 #define TWO_ARGS_ARG x, y
122 #define THREE_ARGS_ARG x, y, z
123 
124 #define ONE_ARG_SIMPLIFIED_ARG \
125  simplify_if_swizzle_t<T0> { x }
126 #define TWO_ARGS_SIMPLIFIED_ARG \
127  simplify_if_swizzle_t<T0>{x}, simplify_if_swizzle_t<T1> { y }
128 #define THREE_ARGS_SIMPLIFIED_ARG \
129  simplify_if_swizzle_t<T0>{x}, simplify_if_swizzle_t<T1>{y}, \
130  simplify_if_swizzle_t<T2> { \
131  z \
132  }
133 
134 #define TWO_ARGS_ARG_ROTATED y, x
135 #define THREE_ARGS_ARG_ROTATED z, x, y
136 
137 #define ONE_ARG_CONVERTED_ARG detail::builtins::convert_arg(x)
138 #define TWO_ARGS_CONVERTED_ARG \
139  detail::builtins::convert_arg(x), detail::builtins::convert_arg(y)
140 #define THREE_ARGS_CONVERTED_ARG \
141  detail::builtins::convert_arg(x), detail::builtins::convert_arg(y), \
142  detail::builtins::convert_arg(z)
143 
144 #define ONE_ARG_AUTO_ARG auto x
145 #define TWO_ARGS_AUTO_ARG auto x, auto y
146 #define THREE_ARGS_AUTO_ARG auto x, auto y, auto z
147 
148 #define ONE_ARG_TYPE_ARG(TYPE) TYPE x
149 #define TWO_ARGS_TYPE_ARG(TYPE) TYPE x, TYPE y
150 #define THREE_ARGS_TYPE_ARG(TYPE) TYPE x, TYPE y, TYPE z
151 
152 #define ONE_ARG_TYPE(TYPE) TYPE
153 #define TWO_ARGS_TYPE(TYPE) TYPE, TYPE
154 #define THREE_ARGS_TYPE(TYPE) TYPE, TYPE, TYPE
155 
156 #define ONE_ARG_VEC_TYPE(TYPE, VL) vec<TYPE, VL>
157 #define TWO_ARGS_VEC_TYPE(TYPE, VL) vec<TYPE, VL>, vec<TYPE, VL>
158 #define THREE_ARGS_VEC_TYPE(TYPE, VL) \
159  vec<TYPE, VL>, vec<TYPE, VL>, vec<TYPE, VL>
160 
161 #define ONE_ARG_VEC_TYPE_ARG(TYPE, VL) vec<TYPE, VL> x
162 #define TWO_ARGS_VEC_TYPE_ARG(TYPE, VL) vec<TYPE, VL> x, vec<TYPE, VL> y
163 #define THREE_ARGS_VEC_TYPE_ARG(TYPE, VL) \
164  vec<TYPE, VL> x, vec<TYPE, VL> y, vec<TYPE, VL> z
165 
166 #define TWO_ARGS_LESS_ONE ONE_ARG
167 #define THREE_ARGS_LESS_ONE TWO_ARGS
168 
169 #define SYCL_CONCAT_IMPL(A, B) A##B
170 #define SYCL_CONCAT(A, B) SYCL_CONCAT_IMPL(A, B)
171 
172 #define LESS_ONE(NUM_ARGS) SYCL_CONCAT(NUM_ARGS, _LESS_ONE)
173 
174 // 3 types.
175 #define FP_TYPES float, double, half
176 // 6 types.
177 #define SIGNED_TYPES char, signed char, short, int, long, long long
178 // 5 types
179 #define UNSIGNED_TYPES \
180  unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long
181 // 11 types
182 #define INTEGER_TYPES SIGNED_TYPES, UNSIGNED_TYPES
183 // 8 types
184 #define FIXED_WIDTH_INTEGER_TYPES \
185  int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t
186 
187 // Use (NAME)/(NS::NAME) to deal win min/max macros in windows.h throughout this
188 // file.
189 
190 #define DEVICE_IMPL_TEMPLATE_CUSTOM_DELEGATE( \
191  NUM_ARGS, NAME, ENABLER, DELEGATOR, NS, /*SCALAR_VEC_IMPL*/...) \
192  template <NUM_ARGS##_TYPENAME_TYPE> \
193  detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>(NAME)( \
194  NUM_ARGS##_TEMPLATE_TYPE_ARG) { \
195  if constexpr (detail::is_marray_v<T0>) { \
196  return detail::DELEGATOR( \
197  [](NUM_ARGS##_AUTO_ARG) { return (NS::NAME)(NUM_ARGS##_ARG); }, \
198  NUM_ARGS##_ARG); \
199  } else { \
200  return __VA_ARGS__(NUM_ARGS##_CONVERTED_ARG); \
201  } \
202  }
203 
204 #define DEVICE_IMPL_TEMPLATE(NUM_ARGS, NAME, ENABLER, /*SCALAR_VEC_IMPL*/...) \
205  DEVICE_IMPL_TEMPLATE_CUSTOM_DELEGATE(NUM_ARGS, NAME, ENABLER, \
206  builtin_marray_impl, sycl, __VA_ARGS__)
207 
208 #ifdef __SYCL_BUILD_SYCL_DLL
209 #define SYCL_BUILTIN_EXPORT __SYCL_EXPORT
210 #else
211 #define SYCL_BUILTIN_EXPORT
212 #endif
213 
214 // Use extern function declaration in function scope to save compile time.
215 // Otherwise the FE has to parse multiple types/VLs/functions costing us around
216 // 0.3s in compile-time. It also allows us to skip providing all the explicit
217 // declarations through even more macro magic.
218 #define HOST_IMPL_TEMPLATE_CUSTOM_DELEGATOR( \
219  NUM_ARGS, NAME, ENABLER, FUNC_CLASS, RET_TYPE_TRAITS, DELEGATOR) \
220  template <typename... Ts> auto __##FUNC_CLASS##_##NAME##_lambda(Ts... xs) { \
221  /* Can't inline into the real lambda due to \
222  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112867. Can't emulate a \
223  * lambda through a local struct because templates are not allowed in \
224  * local structs. Have to specify FUNC_CLASS to avoid \
225  * ambiguity between, e.g. sycl::__cos_lambda/sycl::native::__cos_lambda \
226  * or between max in common functions and max in integer functions. \
227  */ \
228  using ret_ty = typename detail::RET_TYPE_TRAITS< \
229  typename detail::first_type<Ts...>::type>::type; \
230  extern SYCL_BUILTIN_EXPORT ret_ty __##NAME##_impl(Ts...); \
231  return __##NAME##_impl(xs...); \
232  } \
233  template <NUM_ARGS##_TYPENAME_TYPE> \
234  detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>(NAME)( \
235  NUM_ARGS##_TEMPLATE_TYPE_ARG) { \
236  return detail::DELEGATOR( \
237  [](auto... xs) { return __##FUNC_CLASS##_##NAME##_lambda(xs...); }, \
238  NUM_ARGS##_ARG); \
239  }
240 
241 #define HOST_IMPL_TEMPLATE(NUM_ARGS, NAME, ENABLER, FUNC_CLASS, \
242  RET_TYPE_TRAITS) \
243  HOST_IMPL_TEMPLATE_CUSTOM_DELEGATOR(NUM_ARGS, NAME, ENABLER, FUNC_CLASS, \
244  RET_TYPE_TRAITS, \
245  builtin_default_host_impl)
246 
247 #define HOST_IMPL_SCALAR_RET_TYPE(NUM_ARGS, NAME, RET_TYPE, TYPE) \
248  inline RET_TYPE(NAME)(NUM_ARGS##_TYPE_ARG(TYPE)) { \
249  extern SYCL_BUILTIN_EXPORT RET_TYPE __##NAME##_impl( \
250  NUM_ARGS##_TYPE(TYPE)); \
251  return __##NAME##_impl(NUM_ARGS##_ARG); \
252  }
253 
254 #define HOST_IMPL_SCALAR(NUM_ARGS, NAME, TYPE) \
255  HOST_IMPL_SCALAR_RET_TYPE(NUM_ARGS, NAME, TYPE, TYPE)