DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.cpp
Go to the documentation of this file.
1 //==-------------- queue.cpp -----------------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <detail/event_impl.hpp>
11 #include <detail/queue_impl.hpp>
12 #include <sycl/detail/common.hpp>
13 #include <sycl/event.hpp>
14 #include <sycl/exception_list.hpp>
16 #include <sycl/handler.hpp>
17 #include <sycl/queue.hpp>
18 
19 #include <algorithm>
20 
21 namespace sycl {
22 inline namespace _V1 {
23 
24 queue::queue(const context &SyclContext, const device_selector &DeviceSelector,
25  const async_handler &AsyncHandler, const property_list &PropList) {
26  const std::vector<device> Devs = SyclContext.get_devices();
27 
28  auto Comp = [&DeviceSelector](const device &d1, const device &d2) {
29  return DeviceSelector(d1) < DeviceSelector(d2);
30  };
31 
32  const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp);
33 
34  impl = std::make_shared<detail::queue_impl>(
35  detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext),
37 }
38 
39 queue::queue(const context &SyclContext, const device &SyclDevice,
40  const async_handler &AsyncHandler, const property_list &PropList) {
41  impl = std::make_shared<detail::queue_impl>(
42  detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext),
44 }
45 
46 queue::queue(const device &SyclDevice, const async_handler &AsyncHandler,
47  const property_list &PropList) {
48  impl = std::make_shared<detail::queue_impl>(
50 }
51 
52 queue::queue(const context &SyclContext, const device_selector &deviceSelector,
53  const property_list &PropList)
54  : queue(SyclContext, deviceSelector,
55  detail::getSyclObjImpl(SyclContext)->get_async_handler(),
56  PropList) {}
57 
58 queue::queue(const context &SyclContext, const device &SyclDevice,
59  const property_list &PropList)
60  : queue(SyclContext, SyclDevice,
61  detail::getSyclObjImpl(SyclContext)->get_async_handler(),
62  PropList) {}
63 
64 queue::queue(cl_command_queue clQueue, const context &SyclContext,
65  const async_handler &AsyncHandler) {
66  const property_list PropList{};
67  impl = std::make_shared<detail::queue_impl>(
68  reinterpret_cast<sycl::detail::pi::PiQueue>(clQueue),
70 }
71 
72 cl_command_queue queue::get() const { return impl->get(); }
73 
74 context queue::get_context() const { return impl->get_context(); }
75 
76 device queue::get_device() const { return impl->get_device(); }
77 
79  return impl->getCommandGraph()
80  ? ext::oneapi::experimental::queue_state::recording
81  : ext::oneapi::experimental::queue_state::executing;
82 }
83 
87  auto Graph = impl->getCommandGraph();
88  if (!Graph)
89  throw sycl::exception(
91  "ext_oneapi_get_graph() can only be called on recording queues.");
92 
96 }
97 
98 void queue::throw_asynchronous() { impl->throw_asynchronous(); }
99 
100 event queue::memset(void *Ptr, int Value, size_t Count,
101  const detail::code_location &CodeLoc) {
102  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
103  return impl->memset(impl, Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true);
104 }
105 
106 event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent,
107  const detail::code_location &CodeLoc) {
108  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
109  return impl->memset(impl, Ptr, Value, Count, {DepEvent},
110  /*CallerNeedsEvent=*/true);
111 }
112 
113 event queue::memset(void *Ptr, int Value, size_t Count,
114  const std::vector<event> &DepEvents,
115  const detail::code_location &CodeLoc) {
116  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
117  return impl->memset(impl, Ptr, Value, Count, DepEvents,
118  /*CallerNeedsEvent=*/true);
119 }
120 
121 event queue::memcpy(void *Dest, const void *Src, size_t Count,
122  const detail::code_location &CodeLoc) {
123  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
124  return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true,
125  CodeLoc);
126 }
127 
128 event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent,
129  const detail::code_location &CodeLoc) {
130  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
131  return impl->memcpy(impl, Dest, Src, Count, {DepEvent},
132  /*CallerNeedsEvent=*/true, CodeLoc);
133 }
134 
135 event queue::memcpy(void *Dest, const void *Src, size_t Count,
136  const std::vector<event> &DepEvents,
137  const detail::code_location &CodeLoc) {
138  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
139  return impl->memcpy(impl, Dest, Src, Count, DepEvents,
140  /*CallerNeedsEvent=*/true, CodeLoc);
141 }
142 
143 event queue::mem_advise(const void *Ptr, size_t Length, int Advice,
144  const detail::code_location &CodeLoc) {
145  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
146  return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {},
147  /*CallerNeedsEvent=*/true);
148 }
149 
150 event queue::mem_advise(const void *Ptr, size_t Length, int Advice,
151  event DepEvent, const detail::code_location &CodeLoc) {
152  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
153  return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent},
154  /*CallerNeedsEvent=*/true);
155 }
156 
157 event queue::mem_advise(const void *Ptr, size_t Length, int Advice,
158  const std::vector<event> &DepEvents,
159  const detail::code_location &CodeLoc) {
160  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
161  return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents,
162  /*CallerNeedsEvent=*/true);
163 }
164 
165 event queue::submit_impl(std::function<void(handler &)> CGH,
166  const detail::code_location &CodeLoc) {
167  return impl->submit(CGH, impl, CodeLoc);
168 }
169 
170 event queue::submit_impl(std::function<void(handler &)> CGH, queue SecondQueue,
171  const detail::code_location &CodeLoc) {
172  return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc);
173 }
174 
175 void queue::submit_without_event_impl(std::function<void(handler &)> CGH,
176  const detail::code_location &CodeLoc) {
177  return impl->submit_without_event(CGH, impl, CodeLoc);
178 }
179 
180 event queue::submit_impl_and_postprocess(
181  std::function<void(handler &)> CGH, const detail::code_location &CodeLoc,
182  const SubmitPostProcessF &PostProcess) {
183  return impl->submit(CGH, impl, CodeLoc, &PostProcess);
184 }
185 
186 event queue::submit_impl_and_postprocess(
187  std::function<void(handler &)> CGH, queue SecondQueue,
188  const detail::code_location &CodeLoc,
189  const SubmitPostProcessF &PostProcess) {
190  return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, &PostProcess);
191 }
192 
194  impl->wait(CodeLoc);
195 }
196 
198  impl->wait_and_throw(CodeLoc);
199 }
200 
201 static event
203  // This function should not be called when a queue is recording to a graph,
204  // as a graph can record from multiple queues and we cannot guarantee the
205  // last node added by an in-order queue will be the last node added to the
206  // graph.
207  assert(!QueueImpl->getCommandGraph() &&
208  "Should not be called in on graph recording.");
209 
210  return QueueImpl->getLastEvent();
211 }
212 
221  if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents &&
222  !impl->MIsProfilingEnabled) {
223  event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
224  // If the last event was discarded, fall back to enqueuing a barrier.
225  if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
226  return InOrderLastEvent;
227  }
228 
229  return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
230 }
231 
241 event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
242  const detail::code_location &CodeLoc) {
243  bool AllEventsEmptyOrNop = std::all_of(
244  begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
245  auto EventImpl = detail::getSyclObjImpl(Event);
246  return EventImpl->isDefaultConstructed() || EventImpl->isNOP();
247  });
248  if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents &&
249  !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
250  event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
251  // If the last event was discarded, fall back to enqueuing a barrier.
252  if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
253  return InOrderLastEvent;
254  }
255 
256  return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
257  CodeLoc);
258 }
259 
260 template <typename Param>
263  return impl->get_info<Param>();
264 }
265 
266 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, Picode) \
267  template __SYCL_EXPORT ReturnT queue::get_info<info::queue::Desc>() const;
268 
269 #include <sycl/info/queue_traits.def>
270 
271 #undef __SYCL_PARAM_TRAITS_SPEC
272 
273 template <typename Param>
276  return impl->get_backend_info<Param>();
277 }
278 
279 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, Picode) \
280  template __SYCL_EXPORT ReturnT \
281  queue::get_backend_info<info::DescType::Desc>() const;
282 
283 #include <sycl/info/sycl_backend_traits.def>
284 
285 #undef __SYCL_PARAM_TRAITS_SPEC
286 
287 bool queue::is_in_order() const {
288  return has_property<property::queue::in_order>();
289 }
290 
292 
293 bool queue::ext_oneapi_empty() const { return impl->ext_oneapi_empty(); }
294 
295 void queue::ext_oneapi_prod() { impl->flush(); }
296 
297 pi_native_handle queue::getNative(int32_t &NativeHandleDesc) const {
298  return impl->getNative(NativeHandleDesc);
299 }
300 
301 event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
302  bool IsDeviceImageScope, size_t NumBytes,
303  size_t Offset,
304  const std::vector<event> &DepEvents) {
305  return impl->memcpyToDeviceGlobal(impl, DeviceGlobalPtr, Src,
306  IsDeviceImageScope, NumBytes, Offset,
307  DepEvents, /*CallerNeedsEvent=*/true);
308 }
309 
310 event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
311  bool IsDeviceImageScope, size_t NumBytes,
312  size_t Offset,
313  const std::vector<event> &DepEvents) {
314  return impl->memcpyFromDeviceGlobal(impl, Dest, DeviceGlobalPtr,
315  IsDeviceImageScope, NumBytes, Offset,
316  DepEvents, /*CallerNeedsEvent=*/true);
317 }
318 
319 bool queue::device_has(aspect Aspect) const {
320  // avoid creating sycl object from impl
321  return impl->getDeviceImplPtr()->has(Aspect);
322 }
323 
325  return has_property<
327 }
328 
330  if (!is_in_order())
331  throw sycl::exception(
333  "ext_oneapi_get_last_event() can only be called on in-order queues.");
334  if (impl->MDiscardEvents)
335  throw sycl::exception(
337  "ext_oneapi_get_last_event() cannot be called on queues with the "
338  "ext::oneapi::property::queue::discard_events property.");
339  return impl->getLastEvent();
340 }
341 
342 void queue::ext_oneapi_set_external_event(const event &external_event) {
343  if (!is_in_order())
345  "ext_oneapi_set_external_event() can only be called "
346  "on in-order queues.");
347  if (impl->MDiscardEvents)
348  throw sycl::exception(
350  "ext_oneapi_set_external_event() cannot be called on queues with the "
351  "ext::oneapi::property::queue::discard_events property.");
352  return impl->setExternalEvent(external_event);
353 }
354 
355 const property_list &queue::getPropList() const { return impl->getPropList(); }
356 
357 } // namespace _V1
358 } // namespace sycl
359 
361  // Compared to using the impl pointer, the unique ID helps avoid hash
362  // collisions with previously destroyed queues.
363  return std::hash<unsigned long long>()(
364  sycl::detail::getSyclObjImpl(Q)->getQueueID());
365 }
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:468
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2887
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
bool ext_oneapi_empty() const
Allows to check status of the queue (completed vs noncompleted).
Definition: queue.cpp:293
const property_list & PropList
Definition: queue.hpp:194
event memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.cpp:121
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:116
bool is_in_order() const
Returns whether the queue is in order or OoO.
Definition: queue.cpp:287
event memset(void *Ptr, int Value, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.cpp:100
pi_native_handle getNative(int32_t &NativeHandleDesc) const
Definition: queue.cpp:297
const device_selector const async_handler & AsyncHandler
Definition: queue.hpp:248
event mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc=detail::code_location::current())
Provides additional information to the underlying runtime about how different allocations are used.
Definition: queue.cpp:143
device get_device() const
Definition: queue.cpp:76
backend get_backend() const noexcept
Returns the backend associated with this queue.
Definition: queue.cpp:291
void ext_oneapi_set_external_event(const event &external_event)
Definition: queue.cpp:342
bool ext_codeplay_supports_fusion() const
Returns true if the queue was created with the ext::codeplay::experimental::property::queue::enable_f...
Definition: queue.cpp:324
void wait_and_throw_proxy(const detail::code_location &CodeLoc)
Proxy method for wait_and_throw to forward the code location information to the implementation.
Definition: queue.cpp:197
event ext_oneapi_get_last_event() const
Definition: queue.cpp:329
event ext_oneapi_submit_barrier(const detail::code_location &CodeLoc=detail::code_location::current())
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: queue.cpp:220
ext::oneapi::experimental::queue_state ext_oneapi_get_state() const
Definition: queue.cpp:78
context get_context() const
Definition: queue.cpp:74
bool has_property() const noexcept
Definition: queue.hpp:465
detail::is_queue_info_desc< Param >::return_type get_info() const
Queries SYCL queue for information.
Definition: queue.cpp:262
detail::is_backend_info_desc< Param >::return_type get_backend_info() const
Queries SYCL queue for SYCL backend-specific information.
Definition: queue.cpp:275
const device_selector & DeviceSelector
Definition: queue.hpp:234
ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::modifiable > ext_oneapi_get_graph() const
Definition: queue.cpp:86
void ext_oneapi_prod()
Provides a hint to the runtime that previously issued commands to this queue should begin executing o...
Definition: queue.cpp:295
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:340
void wait_proxy(const detail::code_location &CodeLoc)
Proxy method for wait to forward the code location information to the implementation.
Definition: queue.cpp:193
void throw_asynchronous()
Checks if any asynchronous errors have been produced by the queue and if so reports them to the async...
Definition: queue.cpp:98
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:31
backend getImplBackend(const T &Impl)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:40
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
@ modifiable
In modifiable state, commands can be added to graph.
static event getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl)
Definition: queue.cpp:202
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
uintptr_t pi_native_handle
Definition: pi.h:258
_pi_mem_advice pi_mem_advice
Definition: pi.h:914
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
size_t operator()(const sycl::queue &Q) const
Definition: queue.cpp:360