DPC++ Runtime
Runtime libraries for oneAPI DPC++
enqueue_functions.hpp
Go to the documentation of this file.
1 //==------ enqueue_functions.hpp ------- SYCL enqueue free functions -------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <utility>
12 
13 #include <sycl/detail/common.hpp>
14 #include <sycl/event.hpp>
16 #include <sycl/handler.hpp>
17 #include <sycl/nd_range.hpp>
18 #include <sycl/queue.hpp>
19 #include <sycl/range.hpp>
20 
21 namespace sycl {
22 inline namespace _V1 {
23 namespace ext::oneapi::experimental {
24 
25 namespace detail {
26 // Trait for identifying sycl::range and sycl::nd_range.
27 template <typename RangeT> struct is_range_or_nd_range : std::false_type {};
28 template <int Dimensions>
29 struct is_range_or_nd_range<range<Dimensions>> : std::true_type {};
30 template <int Dimensions>
31 struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {};
32 
33 template <typename RangeT>
35 
36 template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess;
37 } // namespace detail
38 
39 // Available only when Range is range or nd_range
40 template <
41  typename RangeT, typename PropertiesT = empty_properties_t,
42  typename = std::enable_if_t<
43  ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>>
45 public:
46  launch_config(RangeT Range, PropertiesT Properties = {})
47  : MRange{Range}, MProperties{Properties} {}
48 
49 private:
50  RangeT MRange;
51  PropertiesT MProperties;
52 
53  const RangeT &getRange() const noexcept { return MRange; }
54 
55  const PropertiesT &getProperties() const noexcept { return MProperties; }
56 
57  template <typename LCRangeT, typename LCPropertiesT>
59 };
60 
61 namespace detail {
62 // Helper for accessing the members of launch_config.
63 template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
65  : MLaunchConfig{LaunchConfig} {}
66 
68 
69  const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); }
70 
71  const LCPropertiesT &getProperties() const noexcept {
72  return MLaunchConfig.getProperties();
73  }
74 };
75 
76 template <typename CommandGroupFunc>
77 void submit_impl(queue &Q, CommandGroupFunc &&CGF,
78  const sycl::detail::code_location &CodeLoc) {
79  Q.submit_without_event(std::forward<CommandGroupFunc>(CGF), CodeLoc);
80 }
81 } // namespace detail
82 
83 template <typename CommandGroupFunc>
84 void submit(queue Q, CommandGroupFunc &&CGF,
85  const sycl::detail::code_location &CodeLoc =
86  sycl::detail::code_location::current()) {
88  Q, std::forward<CommandGroupFunc>(CGF), CodeLoc);
89 }
90 
91 template <typename CommandGroupFunc>
92 event submit_with_event(queue Q, CommandGroupFunc &&CGF,
93  const sycl::detail::code_location &CodeLoc =
94  sycl::detail::code_location::current()) {
95  return Q.submit(std::forward<CommandGroupFunc>(CGF), CodeLoc);
96 }
97 
98 template <typename KernelName = sycl::detail::auto_name, typename KernelType>
99 void single_task(handler &CGH, const KernelType &KernelObj) {
100  CGH.single_task<KernelName>(KernelObj);
101 }
102 
103 template <typename KernelName = sycl::detail::auto_name, typename KernelType>
104 void single_task(queue Q, const KernelType &KernelObj,
105  const sycl::detail::code_location &CodeLoc =
106  sycl::detail::code_location::current()) {
107  submit(
108  Q, [&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); },
109  CodeLoc);
110 }
111 
112 template <typename... ArgsT>
113 void single_task(handler &CGH, const kernel &KernelObj, ArgsT &&...Args) {
114  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
115  CGH.single_task(KernelObj);
116 }
117 
118 template <typename... ArgsT>
119 void single_task(queue Q, const kernel &KernelObj, ArgsT &&...Args) {
120  submit(Q, [&](handler &CGH) {
121  single_task(CGH, KernelObj, std::forward<ArgsT>(Args)...);
122  });
123 }
124 
125 // TODO: Make overloads for scalar arguments for range.
126 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
127  typename KernelType, typename... ReductionsT>
129  const KernelType &KernelObj, ReductionsT &&...Reductions) {
130  CGH.parallel_for<KernelName>(Range, std::forward<ReductionsT>(Reductions)...,
131  KernelObj);
132 }
133 
134 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
135  typename KernelType, typename... ReductionsT>
136 void parallel_for(queue Q, range<Dimensions> Range, const KernelType &KernelObj,
137  ReductionsT &&...Reductions) {
138  submit(Q, [&](handler &CGH) {
139  parallel_for<KernelName>(CGH, Range, KernelObj,
140  std::forward<ReductionsT>(Reductions)...);
141  });
142 }
143 
144 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
145  typename Properties, typename KernelType, typename... ReductionsT>
147  launch_config<range<Dimensions>, Properties> Config,
148  const KernelType &KernelObj, ReductionsT &&...Reductions) {
150  Properties>
151  ConfigAccess(Config);
152  CGH.parallel_for<KernelName>(
153  ConfigAccess.getRange(), ConfigAccess.getProperties(),
154  std::forward<ReductionsT>(Reductions)..., KernelObj);
155 }
156 
157 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
158  typename Properties, typename KernelType, typename... ReductionsT>
159 void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
160  const KernelType &KernelObj, ReductionsT &&...Reductions) {
161  submit(Q, [&](handler &CGH) {
162  parallel_for<KernelName>(CGH, Config, KernelObj,
163  std::forward<ReductionsT>(Reductions)...);
164  });
165 }
166 
167 template <int Dimensions, typename... ArgsT>
169  const kernel &KernelObj, ArgsT &&...Args) {
170  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
171  CGH.parallel_for(Range, KernelObj);
172 }
173 
174 template <int Dimensions, typename... ArgsT>
175 void parallel_for(queue Q, range<Dimensions> Range, const kernel &KernelObj,
176  ArgsT &&...Args) {
177  submit(Q, [&](handler &CGH) {
178  parallel_for(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
179  });
180 }
181 
182 template <int Dimensions, typename Properties, typename... ArgsT>
184  launch_config<range<Dimensions>, Properties> Config,
185  const kernel &KernelObj, ArgsT &&...Args) {
187  Properties>
188  ConfigAccess(Config);
189  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
190  CGH.parallel_for(ConfigAccess.getRange(), KernelObj);
191 }
192 
193 template <int Dimensions, typename Properties, typename... ArgsT>
194 void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
195  const kernel &KernelObj, ArgsT &&...Args) {
196  submit(Q, [&](handler &CGH) {
197  parallel_for(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
198  });
199 }
200 
201 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
202  typename KernelType, typename... ReductionsT>
204  const KernelType &KernelObj, ReductionsT &&...Reductions) {
205  CGH.parallel_for<KernelName>(Range, std::forward<ReductionsT>(Reductions)...,
206  KernelObj);
207 }
208 
209 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
210  typename KernelType, typename... ReductionsT>
211 void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
212  ReductionsT &&...Reductions) {
213  submit(Q, [&](handler &CGH) {
214  nd_launch<KernelName>(CGH, Range, KernelObj,
215  std::forward<ReductionsT>(Reductions)...);
216  });
217 }
218 
219 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
220  typename Properties, typename KernelType, typename... ReductionsT>
221 void nd_launch(handler &CGH,
222  launch_config<nd_range<Dimensions>, Properties> Config,
223  const KernelType &KernelObj, ReductionsT &&...Reductions) {
224 
226  Properties>
227  ConfigAccess(Config);
228  CGH.parallel_for<KernelName>(
229  ConfigAccess.getRange(), ConfigAccess.getProperties(),
230  std::forward<ReductionsT>(Reductions)..., KernelObj);
231 }
232 
233 template <typename KernelName = sycl::detail::auto_name, int Dimensions,
234  typename Properties, typename KernelType, typename... ReductionsT>
235 void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
236  const KernelType &KernelObj, ReductionsT &&...Reductions) {
237  submit(Q, [&](handler &CGH) {
238  nd_launch<KernelName>(CGH, Config, KernelObj,
239  std::forward<ReductionsT>(Reductions)...);
240  });
241 }
242 
243 template <int Dimensions, typename... ArgsT>
245  const kernel &KernelObj, ArgsT &&...Args) {
246  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
247  CGH.parallel_for(Range, KernelObj);
248 }
249 
250 template <int Dimensions, typename... ArgsT>
251 void nd_launch(queue Q, nd_range<Dimensions> Range, const kernel &KernelObj,
252  ArgsT &&...Args) {
253  submit(Q, [&](handler &CGH) {
254  nd_launch(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
255  });
256 }
257 
258 template <int Dimensions, typename Properties, typename... ArgsT>
259 void nd_launch(handler &CGH,
260  launch_config<nd_range<Dimensions>, Properties> Config,
261  const kernel &KernelObj, ArgsT &&...Args) {
263  Properties>
264  ConfigAccess(Config);
265  CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
266  CGH.parallel_for(ConfigAccess.getRange(), KernelObj);
267 }
268 
269 template <int Dimensions, typename Properties, typename... ArgsT>
270 void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
271  const kernel &KernelObj, ArgsT &&...Args) {
272  submit(Q, [&](handler &CGH) {
273  nd_launch(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
274  });
275 }
276 
277 inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
278  CGH.memcpy(Dest, Src, NumBytes);
279 }
280 
281 __SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes,
282  const sycl::detail::code_location &CodeLoc =
283  sycl::detail::code_location::current());
284 
285 template <typename T>
286 void copy(handler &CGH, const T *Src, T *Dest, size_t Count) {
287  CGH.copy<T>(Src, Dest, Count);
288 }
289 
290 template <typename T>
291 void copy(queue Q, const T *Src, T *Dest, size_t Count,
292  const sycl::detail::code_location &CodeLoc =
293  sycl::detail::code_location::current()) {
294  submit(Q, [&](handler &CGH) { copy<T>(CGH, Src, Dest, Count); }, CodeLoc);
295 }
296 
297 inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) {
298  CGH.memset(Ptr, Value, NumBytes);
299 }
300 
301 __SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes,
302  const sycl::detail::code_location &CodeLoc =
303  sycl::detail::code_location::current());
304 
305 template <typename T>
306 void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) {
307  CGH.fill(Ptr, Pattern, Count);
308 }
309 
310 template <typename T>
311 void fill(sycl::queue Q, T *Ptr, const T &Pattern, size_t Count,
312  const sycl::detail::code_location &CodeLoc =
313  sycl::detail::code_location::current()) {
314  submit(Q, [&](handler &CGH) { fill<T>(CGH, Ptr, Pattern, Count); }, CodeLoc);
315 }
316 
317 inline void prefetch(handler &CGH, void *Ptr, size_t NumBytes) {
318  CGH.prefetch(Ptr, NumBytes);
319 }
320 
321 inline void prefetch(queue Q, void *Ptr, size_t NumBytes,
322  const sycl::detail::code_location &CodeLoc =
323  sycl::detail::code_location::current()) {
324  submit(Q, [&](handler &CGH) { prefetch(CGH, Ptr, NumBytes); }, CodeLoc);
325 }
326 
327 inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) {
328  CGH.mem_advise(Ptr, NumBytes, Advice);
329 }
330 
331 __SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice,
332  const sycl::detail::code_location &CodeLoc =
333  sycl::detail::code_location::current());
334 
335 inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); }
336 
337 inline void barrier(queue Q, const sycl::detail::code_location &CodeLoc =
338  sycl::detail::code_location::current()) {
339  submit(Q, [&](handler &CGH) { barrier(CGH); }, CodeLoc);
340 }
341 
342 inline void partial_barrier(handler &CGH, const std::vector<event> &Events) {
343  CGH.ext_oneapi_barrier(Events);
344 }
345 
346 inline void partial_barrier(queue Q, const std::vector<event> &Events,
347  const sycl::detail::code_location &CodeLoc =
348  sycl::detail::code_location::current()) {
349  submit(Q, [&](handler &CGH) { partial_barrier(CGH, Events); }, CodeLoc);
350 }
351 
352 } // namespace ext::oneapi::experimental
353 } // namespace _V1
354 } // namespace sycl
launch_config(RangeT Range, PropertiesT Properties={})
Command group handler class.
Definition: handler.hpp:468
void single_task(_KERNELFUNCPARAM(KernelFunc))
Defines and invokes a SYCL kernel function as a function object type.
Definition: handler.hpp:2005
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2011
void copy(accessor< T_Src, Dims, AccessMode, AccessTarget, IsPlaceholder > Src, std::shared_ptr< T_Dst > Dst)
Copies the content of memory object accessed by Src into the memory pointed by Dst.
Definition: handler.hpp:2591
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:926
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:950
void fill(accessor< T, Dims, AccessMode, AccessTarget, IsPlaceholder, PropertyListT > Dst, const T &Pattern)
Fills memory pointed by accessor with the pattern given.
Definition: handler.hpp:2800
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:943
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:934
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2862
void set_args(Ts &&...Args)
Sets arguments for OpenCL interoperability kernels.
Definition: handler.hpp:1993
Provides an abstraction of a SYCL kernel.
Definition: kernel.hpp:71
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:340
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
void submit_impl(queue &Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc)
void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice)
void submit(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc=sycl::detail::code_location::current())
void copy(handler &CGH, const T *Src, T *Dest, size_t Count)
event submit_with_event(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc=sycl::detail::code_location::current())
void prefetch(handler &CGH, void *Ptr, size_t NumBytes)
void single_task(handler &CGH, const KernelType &KernelObj)
void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes)
void partial_barrier(handler &CGH, const std::vector< event > &Events)
properties< std::tuple<> > empty_properties_t
Definition: properties.hpp:234
void nd_launch(handler &CGH, nd_range< Dimensions > Range, const KernelType &KernelObj, ReductionsT &&...Reductions)
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count)
void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes)
void parallel_for(handler &CGH, range< Dimensions > Range, const KernelType &KernelObj, ReductionsT &&...Reductions)
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: access.hpp:18
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
LaunchConfigAccess(const launch_config< LCRangeT, LCPropertiesT > &LaunchConfig)
const launch_config< LCRangeT, LCPropertiesT > & MLaunchConfig