DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue.cpp
Go to the documentation of this file.
1 //==-------------- queue.cpp -----------------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <detail/event_impl.hpp>
11 #include <detail/queue_impl.hpp>
12 #include <sycl/detail/common.hpp>
13 #include <sycl/event.hpp>
14 #include <sycl/exception_list.hpp>
16 #include <sycl/handler.hpp>
17 #include <sycl/queue.hpp>
18 
19 #include <algorithm>
20 
21 namespace sycl {
22 inline namespace _V1 {
23 
24 queue::queue(const context &SyclContext, const device_selector &DeviceSelector,
25  const async_handler &AsyncHandler, const property_list &PropList) {
26  const std::vector<device> Devs = SyclContext.get_devices();
27 
28  auto Comp = [&DeviceSelector](const device &d1, const device &d2) {
29  return DeviceSelector(d1) < DeviceSelector(d2);
30  };
31 
32  const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp);
33 
34  impl = std::make_shared<detail::queue_impl>(
35  detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext),
37 }
38 
39 queue::queue(const context &SyclContext, const device &SyclDevice,
40  const async_handler &AsyncHandler, const property_list &PropList) {
41  impl = std::make_shared<detail::queue_impl>(
42  detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext),
44 }
45 
46 queue::queue(const device &SyclDevice, const async_handler &AsyncHandler,
47  const property_list &PropList) {
48  impl = std::make_shared<detail::queue_impl>(
50 }
51 
52 queue::queue(const context &SyclContext, const device_selector &deviceSelector,
53  const property_list &PropList)
54  : queue(SyclContext, deviceSelector,
55  detail::getSyclObjImpl(SyclContext)->get_async_handler(),
56  PropList) {}
57 
58 queue::queue(const context &SyclContext, const device &SyclDevice,
59  const property_list &PropList)
60  : queue(SyclContext, SyclDevice,
61  detail::getSyclObjImpl(SyclContext)->get_async_handler(),
62  PropList) {}
63 
64 queue::queue(cl_command_queue clQueue, const context &SyclContext,
65  const async_handler &AsyncHandler) {
66  const property_list PropList{};
67  impl = std::make_shared<detail::queue_impl>(
68  // TODO(pi2ur): Don't cast straight from cl_command_queue
69  reinterpret_cast<ur_queue_handle_t>(clQueue),
71 }
72 
73 cl_command_queue queue::get() const { return impl->get(); }
74 
75 context queue::get_context() const { return impl->get_context(); }
76 
77 device queue::get_device() const { return impl->get_device(); }
78 
80  return impl->getCommandGraph()
81  ? ext::oneapi::experimental::queue_state::recording
82  : ext::oneapi::experimental::queue_state::executing;
83 }
84 
88  auto Graph = impl->getCommandGraph();
89  if (!Graph)
90  throw sycl::exception(
92  "ext_oneapi_get_graph() can only be called on recording queues.");
93 
97 }
98 
99 void queue::throw_asynchronous() { impl->throw_asynchronous(); }
100 
101 event queue::memset(void *Ptr, int Value, size_t Count,
102  const detail::code_location &CodeLoc) {
103  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
104  return impl->memset(impl, Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true);
105 }
106 
107 event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent,
108  const detail::code_location &CodeLoc) {
109  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
110  return impl->memset(impl, Ptr, Value, Count, {DepEvent},
111  /*CallerNeedsEvent=*/true);
112 }
113 
114 event queue::memset(void *Ptr, int Value, size_t Count,
115  const std::vector<event> &DepEvents,
116  const detail::code_location &CodeLoc) {
117  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
118  return impl->memset(impl, Ptr, Value, Count, DepEvents,
119  /*CallerNeedsEvent=*/true);
120 }
121 
122 event queue::memcpy(void *Dest, const void *Src, size_t Count,
123  const detail::code_location &CodeLoc) {
124  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
125  return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true,
126  CodeLoc);
127 }
128 
129 event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent,
130  const detail::code_location &CodeLoc) {
131  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
132  return impl->memcpy(impl, Dest, Src, Count, {DepEvent},
133  /*CallerNeedsEvent=*/true, CodeLoc);
134 }
135 
136 event queue::memcpy(void *Dest, const void *Src, size_t Count,
137  const std::vector<event> &DepEvents,
138  const detail::code_location &CodeLoc) {
139  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
140  return impl->memcpy(impl, Dest, Src, Count, DepEvents,
141  /*CallerNeedsEvent=*/true, CodeLoc);
142 }
143 
144 event queue::mem_advise(const void *Ptr, size_t Length, int Advice,
145  const detail::code_location &CodeLoc) {
146  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
147  return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), {},
148  /*CallerNeedsEvent=*/true);
149 }
150 
151 event queue::mem_advise(const void *Ptr, size_t Length, int Advice,
152  event DepEvent, const detail::code_location &CodeLoc) {
153  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
154  return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice),
155  {DepEvent},
156  /*CallerNeedsEvent=*/true);
157 }
158 
159 event queue::mem_advise(const void *Ptr, size_t Length, int Advice,
160  const std::vector<event> &DepEvents,
161  const detail::code_location &CodeLoc) {
162  detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
163  return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice),
164  DepEvents,
165  /*CallerNeedsEvent=*/true);
166 }
167 
168 event queue::submit_impl(std::function<void(handler &)> CGH,
169  const detail::code_location &CodeLoc) {
170  return impl->submit(CGH, impl, CodeLoc);
171 }
172 
173 event queue::submit_impl(std::function<void(handler &)> CGH, queue SecondQueue,
174  const detail::code_location &CodeLoc) {
175  return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc);
176 }
177 
178 void queue::submit_without_event_impl(std::function<void(handler &)> CGH,
179  const detail::code_location &CodeLoc) {
180  return impl->submit_without_event(CGH, impl, CodeLoc);
181 }
182 
183 event queue::submit_impl_and_postprocess(
184  std::function<void(handler &)> CGH, const detail::code_location &CodeLoc,
185  const SubmitPostProcessF &PostProcess) {
186  return impl->submit(CGH, impl, CodeLoc, &PostProcess);
187 }
188 
189 event queue::submit_impl_and_postprocess(
190  std::function<void(handler &)> CGH, queue SecondQueue,
191  const detail::code_location &CodeLoc,
192  const SubmitPostProcessF &PostProcess) {
193  return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, &PostProcess);
194 }
195 
197  impl->wait(CodeLoc);
198 }
199 
201  impl->wait_and_throw(CodeLoc);
202 }
203 
204 static event
206  // This function should not be called when a queue is recording to a graph,
207  // as a graph can record from multiple queues and we cannot guarantee the
208  // last node added by an in-order queue will be the last node added to the
209  // graph.
210  assert(!QueueImpl->getCommandGraph() &&
211  "Should not be called in on graph recording.");
212 
213  return QueueImpl->getLastEvent();
214 }
215 
224  if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents &&
225  !impl->MIsProfilingEnabled) {
226  event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
227  // If the last event was discarded, fall back to enqueuing a barrier.
228  if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
229  return InOrderLastEvent;
230  }
231 
232  return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
233 }
234 
244 event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
245  const detail::code_location &CodeLoc) {
246  bool AllEventsEmptyOrNop = std::all_of(
247  begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
248  auto EventImpl = detail::getSyclObjImpl(Event);
249  return EventImpl->isDefaultConstructed() || EventImpl->isNOP();
250  });
251  if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents &&
252  !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
253  event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
254  // If the last event was discarded, fall back to enqueuing a barrier.
255  if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
256  return InOrderLastEvent;
257  }
258 
259  return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
260  CodeLoc);
261 }
262 
263 template <typename Param>
266  return impl->get_info<Param>();
267 }
268 
269 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, Picode) \
270  template __SYCL_EXPORT ReturnT queue::get_info<info::queue::Desc>() const;
271 
272 #include <sycl/info/queue_traits.def>
273 
274 #undef __SYCL_PARAM_TRAITS_SPEC
275 
276 template <typename Param>
279  return impl->get_backend_info<Param>();
280 }
281 
282 #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, Picode) \
283  template __SYCL_EXPORT ReturnT \
284  queue::get_backend_info<info::DescType::Desc>() const;
285 
286 #include <sycl/info/sycl_backend_traits.def>
287 
288 #undef __SYCL_PARAM_TRAITS_SPEC
289 
290 bool queue::is_in_order() const {
291  return has_property<property::queue::in_order>();
292 }
293 
295 
296 bool queue::ext_oneapi_empty() const { return impl->ext_oneapi_empty(); }
297 
298 void queue::ext_oneapi_prod() { impl->flush(); }
299 
300 ur_native_handle_t queue::getNative(int32_t &NativeHandleDesc) const {
301  return impl->getNative(NativeHandleDesc);
302 }
303 
304 event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src,
305  bool IsDeviceImageScope, size_t NumBytes,
306  size_t Offset,
307  const std::vector<event> &DepEvents) {
308  return impl->memcpyToDeviceGlobal(impl, DeviceGlobalPtr, Src,
309  IsDeviceImageScope, NumBytes, Offset,
310  DepEvents, /*CallerNeedsEvent=*/true);
311 }
312 
313 event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
314  bool IsDeviceImageScope, size_t NumBytes,
315  size_t Offset,
316  const std::vector<event> &DepEvents) {
317  return impl->memcpyFromDeviceGlobal(impl, Dest, DeviceGlobalPtr,
318  IsDeviceImageScope, NumBytes, Offset,
319  DepEvents, /*CallerNeedsEvent=*/true);
320 }
321 
322 bool queue::device_has(aspect Aspect) const {
323  // avoid creating sycl object from impl
324  return impl->getDeviceImplPtr()->has(Aspect);
325 }
326 
328  return has_property<
330 }
331 
333  if (!is_in_order())
334  throw sycl::exception(
336  "ext_oneapi_get_last_event() can only be called on in-order queues.");
337  if (impl->MDiscardEvents)
338  throw sycl::exception(
340  "ext_oneapi_get_last_event() cannot be called on queues with the "
341  "ext::oneapi::property::queue::discard_events property.");
342  return impl->getLastEvent();
343 }
344 
345 void queue::ext_oneapi_set_external_event(const event &external_event) {
346  if (!is_in_order())
348  "ext_oneapi_set_external_event() can only be called "
349  "on in-order queues.");
350  if (impl->MDiscardEvents)
351  throw sycl::exception(
353  "ext_oneapi_set_external_event() cannot be called on queues with the "
354  "ext::oneapi::property::queue::discard_events property.");
355  return impl->setExternalEvent(external_event);
356 }
357 
358 const property_list &queue::getPropList() const { return impl->getPropList(); }
359 
360 } // namespace _V1
361 } // namespace sycl
362 
364  // Compared to using the impl pointer, the unique ID helps avoid hash
365  // collisions with previously destroyed queues.
366  return std::hash<unsigned long long>()(
367  sycl::detail::getSyclObjImpl(Q)->getQueueID());
368 }
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:50
Data type that manages the code_location information in TLS.
Definition: common.hpp:131
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:467
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2918
Objects of the property_list class are containers for the SYCL properties.
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
bool ext_oneapi_empty() const
Allows to check status of the queue (completed vs noncompleted).
Definition: queue.cpp:296
const property_list & PropList
Definition: queue.hpp:194
event memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: queue.cpp:122
queue(const property_list &PropList={})
Constructs a SYCL queue instance using the device returned by an instance of default_selector.
Definition: queue.hpp:116
bool is_in_order() const
Returns whether the queue is in order or OoO.
Definition: queue.cpp:290
event memset(void *Ptr, int Value, size_t Count, const detail::code_location &CodeLoc=detail::code_location::current())
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue.cpp:101
const device_selector const async_handler & AsyncHandler
Definition: queue.hpp:248
event mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc=detail::code_location::current())
Provides additional information to the underlying runtime about how different allocations are used.
Definition: queue.cpp:144
device get_device() const
Definition: queue.cpp:77
backend get_backend() const noexcept
Returns the backend associated with this queue.
Definition: queue.cpp:294
void ext_oneapi_set_external_event(const event &external_event)
Definition: queue.cpp:345
bool ext_codeplay_supports_fusion() const
Returns true if the queue was created with the ext::codeplay::experimental::property::queue::enable_f...
Definition: queue.cpp:327
void wait_and_throw_proxy(const detail::code_location &CodeLoc)
Proxy method for wait_and_throw to forward the code location information to the implementation.
Definition: queue.cpp:200
event ext_oneapi_get_last_event() const
Definition: queue.cpp:332
ur_native_handle_t getNative(int32_t &NativeHandleDesc) const
Definition: queue.cpp:300
event ext_oneapi_submit_barrier(const detail::code_location &CodeLoc=detail::code_location::current())
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: queue.cpp:223
ext::oneapi::experimental::queue_state ext_oneapi_get_state() const
Definition: queue.cpp:79
context get_context() const
Definition: queue.cpp:75
bool has_property() const noexcept
Definition: queue.hpp:465
detail::is_queue_info_desc< Param >::return_type get_info() const
Queries SYCL queue for information.
Definition: queue.cpp:265
detail::is_backend_info_desc< Param >::return_type get_backend_info() const
Queries SYCL queue for SYCL backend-specific information.
Definition: queue.cpp:278
const device_selector & DeviceSelector
Definition: queue.hpp:234
ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::modifiable > ext_oneapi_get_graph() const
Definition: queue.cpp:87
void ext_oneapi_prod()
Provides a hint to the runtime that previously issued commands to this queue should begin executing o...
Definition: queue.cpp:298
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:340
void wait_proxy(const detail::code_location &CodeLoc)
Proxy method for wait to forward the code location information to the implementation.
Definition: queue.cpp:196
void throw_asynchronous()
Checks if any asynchronous errors have been produced by the queue and if so reports them to the async...
Definition: queue.cpp:99
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:31
backend getImplBackend(const T &Impl)
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:40
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: helpers.hpp:45
@ modifiable
In modifiable state, commands can be added to graph.
static event getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl)
Definition: queue.cpp:205
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65
Definition: access.hpp:18
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
size_t operator()(const sycl::queue &Q) const
Definition: queue.cpp:363