DPC++ Runtime
Runtime libraries for oneAPI DPC++
enqueue_functions.hpp
Go to the documentation of this file.
1 //==------ enqueue_functions.hpp ------- SYCL enqueue free functions -------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <utility> // for std::forward
12 
13 #include <sycl/event.hpp>
15 #include <sycl/handler.hpp>
16 #include <sycl/nd_range.hpp>
17 #include <sycl/queue.hpp>
18 #include <sycl/range.hpp>
19 
20 namespace sycl {
21 inline namespace _V1 {
22 namespace ext::oneapi::experimental {
23 
24 namespace detail {
25 // Trait for identifying sycl::range and sycl::nd_range.
26 template <typename RangeT> struct is_range_or_nd_range : std::false_type {};
27 template <int Dimensions>
28 struct is_range_or_nd_range<range<Dimensions>> : std::true_type {};
29 template <int Dimensions>
30 struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {};
31 
32 template <typename RangeT>
34 
35 template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess;
36 } // namespace detail
37 
38 // Available only when Range is range or nd_range
39 template <
40  typename RangeT, typename PropertiesT = empty_properties_t,
41  typename = std::enable_if_t<
42  ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>>
44 public:
45  launch_config(RangeT Range, PropertiesT Properties = {})
46  : MRange{Range}, MProperties{Properties} {}
47 
48 private:
49  RangeT MRange;
50  PropertiesT MProperties;
51 
52  const RangeT &getRange() const noexcept { return MRange; }
53 
54  const PropertiesT &getProperties() const noexcept { return MProperties; }
55 
56  template <typename LCRangeT, typename LCPropertiesT>
58 };
59 
60 namespace detail {
61 // Helper for accessing the members of launch_config.
62 template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
64  : MLaunchConfig{LaunchConfig} {}
65 
67 
68  const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); }
69 
70  const LCPropertiesT &getProperties() const noexcept {
71  return MLaunchConfig.getProperties();
72  }
73 };
74 } // namespace detail
75 
76 template <typename CommandGroupFunc>
77 void submit(queue Q, CommandGroupFunc &&CGF) {
78  // TODO: Use new submit without Events.
79  Q.submit(std::forward<CommandGroupFunc>(CGF));
80 }
81 
82 template <typename CommandGroupFunc>
83 event submit_with_event(queue Q, CommandGroupFunc &&CGF) {
84  return Q.submit(std::forward<CommandGroupFunc>(CGF));
85 }
86 
87 template <typename KernelName = sycl::detail::auto_name, typename KernelType>
88 void single_task(handler &CGH, const KernelType &KernelObj) {
89  CGH.single_task<KernelName>(KernelObj);
90 }
91 
92 template <typename KernelName = sycl::detail::auto_name, typename KernelType>
93 void single_task(queue Q, const KernelType &KernelObj) {
94  submit(Q, [&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); });
95 }
96 
97 template <typename... ArgsT>
98 void single_task(handler &CGH, const kernel &KernelObj, ArgsT &&...Args) {
99  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
100  CGH.single_task(KernelObj);
101 }
102 
103 template <typename... ArgsT>
104 void single_task(queue Q, const kernel &KernelObj, ArgsT &&...Args) {
105  submit(Q, [&](handler &CGH) {
106  single_task(CGH, KernelObj, std::forward<ArgsT>(Args)...);
107  });
108 }
109 
110 // TODO: Make overloads for scalar arguments for range.
111 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
112  typename KernelType, typename... ReductionsT>
114  const KernelType &KernelObj, ReductionsT &&...Reductions) {
115  CGH.parallel_for<KernelName>(Range, std::forward<ReductionsT>(Reductions)...,
116  KernelObj);
117 }
118 
119 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
120  typename KernelType, typename... ReductionsT>
121 void parallel_for(queue Q, range<Dimensions> Range, const KernelType &KernelObj,
122  ReductionsT &&...Reductions) {
123  submit(Q, [&](handler &CGH) {
124  parallel_for<KernelName>(CGH, Range, KernelObj,
125  std::forward<ReductionsT>(Reductions)...);
126  });
127 }
128 
129 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
130  typename Properties, typename KernelType, typename... ReductionsT>
132  launch_config<range<Dimensions>, Properties> Config,
133  const KernelType &KernelObj, ReductionsT &&...Reductions) {
135  Properties>
136  ConfigAccess(Config);
137  CGH.parallel_for<KernelName>(ConfigAccess.getRange(),
138  std::forward<ReductionsT>(Reductions)...,
139  KernelObj);
140 }
141 
142 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
143  typename Properties, typename KernelType, typename... ReductionsT>
144 void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
145  const KernelType &KernelObj, ReductionsT &&...Reductions) {
146  submit(Q, [&](handler &CGH) {
147  parallel_for<KernelName>(CGH, Config, KernelObj,
148  std::forward<ReductionsT>(Reductions)...);
149  });
150 }
151 
152 template <int Dimensions, typename... ArgsT>
154  const kernel &KernelObj, ArgsT &&...Args) {
155  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
156  CGH.parallel_for(Range, KernelObj);
157 }
158 
159 template <int Dimensions, typename... ArgsT>
160 void parallel_for(queue Q, range<Dimensions> Range, const kernel &KernelObj,
161  ArgsT &&...Args) {
162  submit(Q, [&](handler &CGH) {
163  parallel_for(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
164  });
165 }
166 
167 template <int Dimensions, typename Properties, typename... ArgsT>
169  launch_config<range<Dimensions>, Properties> Config,
170  const kernel &KernelObj, ArgsT &&...Args) {
172  Properties>
173  ConfigAccess(Config);
174  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
175  CGH.parallel_for(ConfigAccess.getRange(), KernelObj);
176 }
177 
178 template <int Dimensions, typename Properties, typename... ArgsT>
179 void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
180  const kernel &KernelObj, ArgsT &&...Args) {
181  submit(Q, [&](handler &CGH) {
182  parallel_for(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
183  });
184 }
185 
186 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
187  typename KernelType, typename... ReductionsT>
189  const KernelType &KernelObj, ReductionsT &&...Reductions) {
190  CGH.parallel_for<KernelName>(Range, std::forward<ReductionsT>(Reductions)...,
191  KernelObj);
192 }
193 
194 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
195  typename KernelType, typename... ReductionsT>
196 void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
197  ReductionsT &&...Reductions) {
198  submit(Q, [&](handler &CGH) {
199  nd_launch(CGH, Range, KernelObj, std::forward<ReductionsT>(Reductions)...);
200  });
201 }
202 
203 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
204  typename Properties, typename KernelType, typename... ReductionsT>
205 void nd_launch(handler &CGH,
206  launch_config<nd_range<Dimensions>, Properties> Config,
207  const KernelType &KernelObj, ReductionsT &&...Reductions) {
208 
210  Properties>
211  ConfigAccess(Config);
212  CGH.parallel_for<KernelName>(ConfigAccess.getRange(),
213  std::forward<ReductionsT>(Reductions)...,
214  KernelObj);
215 }
216 
217 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
218  typename Properties, typename KernelType, typename... ReductionsT>
219 void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
220  const KernelType &KernelObj, ReductionsT &&...Reductions) {
221  submit(Q, [&](handler &CGH) {
222  nd_launch(CGH, Config, KernelObj, std::forward<ReductionsT>(Reductions)...);
223  });
224 }
225 
226 template <int Dimensions, typename... ArgsT>
228  const kernel &KernelObj, ArgsT &&...Args) {
229  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
230  CGH.parallel_for(Range, KernelObj);
231 }
232 
233 template <int Dimensions, typename... ArgsT>
234 void nd_launch(queue Q, nd_range<Dimensions> Range, const kernel &KernelObj,
235  ArgsT &&...Args) {
236  submit(Q, [&](handler &CGH) {
237  nd_launch(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
238  });
239 }
240 
241 template <int Dimensions, typename Properties, typename... ArgsT>
242 void nd_launch(handler &CGH,
243  launch_config<nd_range<Dimensions>, Properties> Config,
244  const kernel &KernelObj, ArgsT &&...Args) {
246  Properties>
247  ConfigAccess(Config);
248  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
249  CGH.parallel_for(ConfigAccess.getRange(), KernelObj);
250 }
251 
252 template <int Dimensions, typename Properties, typename... ArgsT>
253 void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
254  const kernel &KernelObj, ArgsT &&...Args) {
255  submit(Q, [&](handler &CGH) {
256  nd_launch(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
257  });
258 }
259 
260 inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
261  CGH.memcpy(Dest, Src, NumBytes);
262 }
263 
264 inline void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes) {
265  submit(Q, [&](handler &CGH) { memcpy(CGH, Dest, Src, NumBytes); });
266 }
267 
268 template <typename T>
269 void copy(handler &CGH, const T *Src, T *Dest, size_t Count) {
270  CGH.copy<T>(Src, Dest, Count);
271 }
272 
273 template <typename T> void copy(queue Q, const T *Src, T *Dest, size_t Count) {
274  submit(Q, [&](handler &CGH) { copy<T>(CGH, Src, Dest, Count); });
275 }
276 
277 inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) {
278  CGH.memset(Ptr, Value, NumBytes);
279 }
280 
281 inline void memset(queue Q, void *Ptr, int Value, size_t NumBytes) {
282  submit(Q, [&](handler &CGH) { memset(CGH, Ptr, Value, NumBytes); });
283 }
284 
285 template <typename T>
286 void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) {
287  CGH.fill(Ptr, Pattern, Count);
288 }
289 
290 template <typename T>
291 void fill(sycl::queue Q, T *Ptr, const T &Pattern, size_t Count) {
292  submit(Q, [&](handler &CGH) { fill<T>(CGH, Ptr, Pattern, Count); });
293 }
294 
295 inline void prefetch(handler &CGH, void *Ptr, size_t NumBytes) {
296  CGH.prefetch(Ptr, NumBytes);
297 }
298 
299 inline void prefetch(queue Q, void *Ptr, size_t NumBytes) {
300  submit(Q, [&](handler &CGH) { prefetch(CGH, Ptr, NumBytes); });
301 }
302 
303 inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) {
304  CGH.mem_advise(Ptr, NumBytes, Advice);
305 }
306 
307 inline void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice) {
308  submit(Q, [&](handler &CGH) { mem_advise(CGH, Ptr, NumBytes, Advice); });
309 }
310 
311 inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); }
312 
313 inline void barrier(queue Q) {
314  submit(Q, [&](handler &CGH) { barrier(CGH); });
315 }
316 
317 inline void partial_barrier(handler &CGH, const std::vector<event> &Events) {
318  CGH.ext_oneapi_barrier(Events);
319 }
320 
321 inline void partial_barrier(queue Q, const std::vector<event> &Events) {
322  submit(Q, [&](handler &CGH) { partial_barrier(CGH, Events); });
323 }
324 
325 } // namespace ext::oneapi::experimental
326 } // namespace _V1
327 } // namespace sycl
launch_config(RangeT Range, PropertiesT Properties={})
Command group handler class.
Definition: handler.hpp:461
void single_task(_KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function as a function object type.
Definition: handler.hpp:2011
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2017
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2597
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:947
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:971
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2826
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:964
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:955
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2881
void set_args(Ts &&...Args)
Sets arguments for OpenCL interoperability kernels.
Definition: handler.hpp:1999
Provides an abstraction of a SYCL kernel.
Definition: kernel.hpp:77
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:105
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:340
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
void prefetch(handler &CGH, void *Ptr, size_t NumBytes)
event submit_with_event(queue Q, CommandGroupFunc &&CGF)
void single_task(handler &CGH, const KernelType &KernelObj)
void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes)
void partial_barrier(handler &CGH, const std::vector< event > &Events)
void submit(queue Q, CommandGroupFunc &&CGF)
void nd_launch(handler &CGH, nd_range< Dimensions > Range, const KernelType &KernelObj, ReductionsT &&...Reductions)
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
decltype(properties{}) empty_properties_t
Definition: properties.hpp:190
void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes)
void parallel_for(handler &CGH, range< Dimensions > Range, const KernelType &KernelObj, ReductionsT &&...Reductions)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: access.hpp:18
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
LaunchConfigAccess(const launch_config< LCRangeT, LCPropertiesT > &LaunchConfig)
const launch_config< LCRangeT, LCPropertiesT > & MLaunchConfig