DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue_impl.cpp
Go to the documentation of this file.
1 //==------------------ queue_impl.cpp - SYCL queue -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/event_impl.hpp>
11 #include <detail/queue_impl.hpp>
12 #include <sycl/context.hpp>
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/pi.hpp>
15 #include <sycl/device.hpp>
16 
17 #include <cstring>
18 #include <utility>
19 
20 #ifdef XPTI_ENABLE_INSTRUMENTATION
21 #include "xpti/xpti_trace_framework.hpp"
22 #include <detail/xpti_registry.hpp>
23 #include <sstream>
24 #endif
25 
26 namespace sycl {
27 inline namespace _V1 {
28 namespace detail {
29 std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 0;
30 
31 static std::vector<sycl::detail::pi::PiEvent>
32 getPIEvents(const std::vector<sycl::event> &DepEvents) {
33  std::vector<sycl::detail::pi::PiEvent> RetPiEvents;
34  for (const sycl::event &Event : DepEvents) {
35  const EventImplPtr &EventImpl = detail::getSyclObjImpl(Event);
36  if (EventImpl->getHandleRef() != nullptr)
37  RetPiEvents.push_back(EventImpl->getHandleRef());
38  }
39  return RetPiEvents;
40 }
41 
42 template <>
43 uint32_t queue_impl::get_info<info::queue::reference_count>() const {
44  sycl::detail::pi::PiResult result = PI_SUCCESS;
45  if (!is_host())
47  MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result,
48  nullptr);
49  return result;
50 }
51 
52 template <> context queue_impl::get_info<info::queue::context>() const {
53  return get_context();
54 }
55 
56 template <> device queue_impl::get_info<info::queue::device>() const {
57  return get_device();
58 }
59 
60 template <>
61 typename info::platform::version::return_type
62 queue_impl::get_backend_info<info::platform::version>() const {
63  if (getContextImplPtr()->getBackend() != backend::opencl) {
65  "the info::platform::version info descriptor can "
66  "only be queried with an OpenCL backend");
67  }
68  return get_device().get_platform().get_info<info::platform::version>();
69 }
70 
71 template <>
72 typename info::device::version::return_type
73 queue_impl::get_backend_info<info::device::version>() const {
74  if (getContextImplPtr()->getBackend() != backend::opencl) {
76  "the info::device::version info descriptor can only "
77  "be queried with an OpenCL backend");
78  }
79  return get_device().get_info<info::device::version>();
80 }
81 
82 template <>
83 typename info::device::backend_version::return_type
84 queue_impl::get_backend_info<info::device::backend_version>() const {
85  if (getContextImplPtr()->getBackend() != backend::ext_oneapi_level_zero) {
87  "the info::device::backend_version info descriptor "
88  "can only be queried with a Level Zero backend");
89  }
90  return "";
91  // Currently The Level Zero backend does not define the value of this
92  // information descriptor and implementations are encouraged to return the
93  // empty string as per specification.
94 }
95 
97  const std::shared_ptr<detail::queue_impl> &QueueImpl) {
98  auto EventImpl = std::make_shared<detail::event_impl>(QueueImpl);
99  EventImpl->setContextImpl(detail::getSyclObjImpl(QueueImpl->get_context()));
100  EventImpl->setStateIncomplete();
101  return detail::createSyclObjFromImpl<event>(EventImpl);
102 }
103 
104 static event createDiscardedEvent() {
105  EventImplPtr EventImpl =
106  std::make_shared<event_impl>(event_impl::HES_Discarded);
107  return createSyclObjFromImpl<event>(EventImpl);
108 }
109 
110 const std::vector<event> &
111 queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
112  std::vector<event> &MutableVec,
113  std::unique_lock<std::mutex> &QueueLock) {
114  if (!isInOrder())
115  return DepEvents;
116 
117  QueueLock.lock();
118  EventImplPtr ExtraEvent =
120  std::optional<event> ExternalEvent = popExternalEvent();
121 
122  if (!ExternalEvent && !ExtraEvent)
123  return DepEvents;
124 
125  MutableVec = DepEvents;
126  if (ExternalEvent)
127  MutableVec.push_back(*ExternalEvent);
128  if (ExtraEvent)
129  MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
130  return MutableVec;
131 }
132 
133 event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
134  void *Ptr, int Value, size_t Count,
135  const std::vector<event> &DepEvents) {
136 #if XPTI_ENABLE_INSTRUMENTATION
137  // We need a code pointer value and we use the object ptr; if code location
138  // information is available, we will have function name and source file
139  // information
140  XPTIScope PrepareNotify((void *)this,
141  (uint16_t)xpti::trace_point_type_t::node_create,
142  SYCL_STREAM_NAME, "memory_transfer_node");
143  PrepareNotify.addMetadata([&](auto TEvent) {
144  xpti::addMetadata(TEvent, "sycl_device",
145  reinterpret_cast<size_t>(
146  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
147  xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
148  xpti::addMetadata(TEvent, "value_set", Value);
149  xpti::addMetadata(TEvent, "memory_size", Count);
150  xpti::addMetadata(TEvent, "queue_id", MQueueID);
151  });
152  // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a
153  // metadata entry to TLS for use by callback handlers
154  xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
155  // Notify XPTI about the memset submission
156  PrepareNotify.notify();
157  // Emit a begin/end scope for this call
158  PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
159 #endif
160 
161  return submitMemOpHelper(
162  Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
163  [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self,
164  Count, Value);
165 }
166 
167 void report(const code_location &CodeLoc) {
168  std::cout << "Exception caught at ";
169  if (CodeLoc.fileName())
170  std::cout << "File: " << CodeLoc.fileName();
171  if (CodeLoc.functionName())
172  std::cout << " | Function: " << CodeLoc.functionName();
173  if (CodeLoc.lineNumber())
174  std::cout << " | Line: " << CodeLoc.lineNumber();
175  if (CodeLoc.columnNumber())
176  std::cout << " | Column: " << CodeLoc.columnNumber();
177  std::cout << '\n';
178 }
179 
180 event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
181  void *Dest, const void *Src, size_t Count,
182  const std::vector<event> &DepEvents,
183  const code_location &CodeLoc) {
184 #if XPTI_ENABLE_INSTRUMENTATION
185  // We need a code pointer value and we duse the object ptr; If code location
186  // is available, we use the source file information along with the object
187  // pointer.
188  XPTIScope PrepareNotify((void *)this,
189  (uint16_t)xpti::trace_point_type_t::node_create,
190  SYCL_STREAM_NAME, "memory_transfer_node");
191  PrepareNotify.addMetadata([&](auto TEvent) {
192  xpti::addMetadata(TEvent, "sycl_device",
193  reinterpret_cast<size_t>(
194  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
195  xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast<size_t>(Src));
196  xpti::addMetadata(TEvent, "dest_memory_ptr",
197  reinterpret_cast<size_t>(Dest));
198  xpti::addMetadata(TEvent, "memory_size", Count);
199  xpti::addMetadata(TEvent, "queue_id", MQueueID);
200  });
201  xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
202  // Notify XPTI about the memset submission
203  PrepareNotify.notify();
204  // Emit a begin/end scope for this call
205  PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
206 #endif
207 
208  if ((!Src || !Dest) && Count != 0) {
209  report(CodeLoc);
210  throw runtime_error("NULL pointer argument in memory copy operation.",
211  PI_ERROR_INVALID_VALUE);
212  }
213  return submitMemOpHelper(
214  Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
215  [](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
216  Count, Dest);
217 }
218 
219 event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
220  const void *Ptr, size_t Length,
221  pi_mem_advice Advice,
222  const std::vector<event> &DepEvents) {
223  return submitMemOpHelper(
224  Self, DepEvents,
225  [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
226  [](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
227  Self, Length, Advice);
228 }
229 
231  const std::shared_ptr<detail::queue_impl> &Self, void *DeviceGlobalPtr,
232  const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset,
233  const std::vector<event> &DepEvents) {
234  return submitMemOpHelper(
235  Self, DepEvents,
236  [&](handler &CGH) {
237  CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope,
238  NumBytes, Offset);
239  },
240  [](const auto &...Args) {
242  },
243  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src);
244 }
245 
247  const std::shared_ptr<detail::queue_impl> &Self, void *Dest,
248  const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes,
249  size_t Offset, const std::vector<event> &DepEvents) {
250  return submitMemOpHelper(
251  Self, DepEvents,
252  [&](handler &CGH) {
253  CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope,
254  NumBytes, Offset);
255  },
256  [](const auto &...Args) {
258  },
259  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
260 }
261 
263  std::lock_guard<std::mutex> Lock{MMutex};
264  if (MDiscardEvents)
265  return createDiscardedEvent();
266  if (!MGraph.expired() && MGraphLastEventPtr)
267  return detail::createSyclObjFromImpl<event>(MGraphLastEventPtr);
268  if (!MLastEventPtr)
269  MLastEventPtr = std::make_shared<event_impl>(std::nullopt);
270  return detail::createSyclObjFromImpl<event>(MLastEventPtr);
271 }
272 
273 void queue_impl::addEvent(const event &Event) {
274  EventImplPtr EImpl = getSyclObjImpl(Event);
275  assert(EImpl && "Event implementation is missing");
276  auto *Cmd = static_cast<Command *>(EImpl->getCommand());
277  if (!Cmd) {
278  // if there is no command on the event, we cannot track it with MEventsWeak
279  // as that will leave it with no owner. Track in MEventsShared only if we're
280  // unable to call piQueueFinish during wait.
281  if (is_host() || MEmulateOOO)
282  addSharedEvent(Event);
283  }
284  // As long as the queue supports piQueueFinish we only need to store events
285  // for unenqueued commands and host tasks.
286  else if (is_host() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
287  std::weak_ptr<event_impl> EventWeakPtr{EImpl};
288  std::lock_guard<std::mutex> Lock{MMutex};
289  MEventsWeak.push_back(std::move(EventWeakPtr));
290  }
291 }
292 
296 void queue_impl::addSharedEvent(const event &Event) {
297  assert(is_host() || MEmulateOOO);
298  std::lock_guard<std::mutex> Lock(MMutex);
299  // Events stored in MEventsShared are not released anywhere else aside from
300  // calls to queue::wait/wait_and_throw, which a user application might not
301  // make, and ~queue_impl(). If the number of events grows large enough,
302  // there's a good chance that most of them are already completed and ownership
303  // of them can be released.
304  const size_t EventThreshold = 128;
305  if (MEventsShared.size() >= EventThreshold) {
306  // Generally, the vector is ordered so that the oldest events are in the
307  // front and the newer events are in the end. So, search to find the first
308  // event that isn't yet complete. All the events prior to that can be
309  // erased. This could leave some few events further on that have completed
310  // not yet erased, but that is OK. This cleanup doesn't have to be perfect.
311  // This also keeps the algorithm linear rather than quadratic because it
312  // doesn't continually recheck things towards the back of the list that
313  // really haven't had time to complete.
314  MEventsShared.erase(
315  MEventsShared.begin(),
316  std::find_if(
317  MEventsShared.begin(), MEventsShared.end(), [](const event &E) {
318  return E.get_info<info::event::command_execution_status>() !=
319  info::event_command_status::complete;
320  }));
321  }
322  MEventsShared.push_back(Event);
323 }
324 
325 template <typename HandlerFuncT>
326 event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
327  const std::vector<event> &DepEvents,
328  HandlerFuncT HandlerFunc) {
329  return submit(
330  [&](handler &CGH) {
331  CGH.depends_on(DepEvents);
332  HandlerFunc(CGH);
333  },
334  Self, {});
335 }
336 
337 template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
338 event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
339  const std::vector<event> &DepEvents,
340  HandlerFuncT HandlerFunc,
341  MemOpFuncT MemOpFunc,
342  MemOpArgTs... MemOpArgs) {
343  // We need to submit command and update the last event under same lock if we
344  // have in-order queue.
345  {
346  std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);
347 
348  std::vector<event> MutableDepEvents;
349  const std::vector<event> &ExpandedDepEvents =
350  getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
351 
352  // If we have a command graph set we need to capture the op through the
353  // handler rather than by-passing the scheduler.
355  ExpandedDepEvents, MContext)) {
357  MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
358  /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
359  return createDiscardedEvent();
360  }
361 
362  event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
363  auto EventImpl = detail::getSyclObjImpl(ResEvent);
364  MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
365  &EventImpl->getHandleRef(), EventImpl);
366 
367  if (MContext->is_host())
369 
370  if (isInOrder()) {
371  auto &EventToStoreIn =
373  EventToStoreIn = EventImpl;
374  }
375  // Track only if we won't be able to handle it with piQueueFinish.
376  if (MEmulateOOO)
377  addSharedEvent(ResEvent);
378  return discard_or_return(ResEvent);
379  }
380  }
381  return submitWithHandler(Self, DepEvents, HandlerFunc);
382 }
383 
385  std::string &Name, int32_t StreamID,
386  uint64_t &IId) {
387  void *TraceEvent = nullptr;
388  (void)CodeLoc;
389  (void)Name;
390  (void)StreamID;
391  (void)IId;
392 #ifdef XPTI_ENABLE_INSTRUMENTATION
393  constexpr uint16_t NotificationTraceType = xpti::trace_wait_begin;
394  if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
395  return TraceEvent;
396 
397  xpti::payload_t Payload;
398  bool HasSourceInfo = false;
399  // We try to create a unique string for the wait() call by combining it with
400  // the queue address
401  xpti::utils::StringHelper NG;
402  Name = NG.nameWithAddress<queue_impl *>("queue.wait", this);
403 
404  if (CodeLoc.fileName()) {
405  // We have source code location information
406  Payload =
407  xpti::payload_t(Name.c_str(), CodeLoc.fileName(), CodeLoc.lineNumber(),
408  CodeLoc.columnNumber(), (void *)this);
409  HasSourceInfo = true;
410  } else {
411  // We have no location information, so we'll use the address of the queue
412  Payload = xpti::payload_t(Name.c_str(), (void *)this);
413  }
414  // wait() calls could be at different user-code locations; We create a new
415  // event based on the code location info and if this has been seen before, a
416  // previously created event will be returned.
417  uint64_t QWaitInstanceNo = 0;
418  xpti::trace_event_data_t *WaitEvent =
419  xptiMakeEvent(Name.c_str(), &Payload, xpti::trace_graph_event,
420  xpti_at::active, &QWaitInstanceNo);
421  IId = QWaitInstanceNo;
422  if (WaitEvent) {
423  device D = get_device();
424  std::string DevStr;
425  if (getSyclObjImpl(D)->is_host())
426  DevStr = "HOST";
427  else if (D.is_cpu())
428  DevStr = "CPU";
429  else if (D.is_gpu())
430  DevStr = "GPU";
431  else if (D.is_accelerator())
432  DevStr = "ACCELERATOR";
433  else
434  DevStr = "UNKNOWN";
435  xpti::addMetadata(WaitEvent, "sycl_device_type", DevStr);
436  if (HasSourceInfo) {
437  xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName());
438  xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName());
439  xpti::addMetadata(WaitEvent, "sym_line_no",
440  static_cast<int32_t>((CodeLoc.lineNumber())));
441  xpti::addMetadata(WaitEvent, "sym_column_no",
442  static_cast<int32_t>((CodeLoc.columnNumber())));
443  }
444  xptiNotifySubscribers(StreamID, xpti::trace_wait_begin, nullptr, WaitEvent,
445  QWaitInstanceNo,
446  static_cast<const void *>(Name.c_str()));
447  TraceEvent = (void *)WaitEvent;
448  }
449 #endif
450  return TraceEvent;
451 }
452 
453 void queue_impl::instrumentationEpilog(void *TelemetryEvent, std::string &Name,
454  int32_t StreamID, uint64_t IId) {
455  (void)TelemetryEvent;
456  (void)Name;
457  (void)StreamID;
458  (void)IId;
459 #ifdef XPTI_ENABLE_INSTRUMENTATION
460  constexpr uint16_t NotificationTraceType = xpti::trace_wait_end;
461  if (!(xptiCheckTraceEnabled(StreamID, NotificationTraceType) &&
462  TelemetryEvent))
463  return;
464  // Close the wait() scope
465  xpti::trace_event_data_t *TraceEvent =
466  (xpti::trace_event_data_t *)TelemetryEvent;
467  xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, TraceEvent,
468  IId, static_cast<const void *>(Name.c_str()));
469 #endif
470 }
471 
473  (void)CodeLoc;
474 #ifdef XPTI_ENABLE_INSTRUMENTATION
475  void *TelemetryEvent = nullptr;
476  uint64_t IId;
477  std::string Name;
478  int32_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
479  TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
480 #endif
481 
482  if (MGraph.lock()) {
484  "wait cannot be called for a queue which is "
485  "recording to a command graph.");
486  }
487 
488  std::vector<std::weak_ptr<event_impl>> WeakEvents;
489  std::vector<event> SharedEvents;
490  {
491  std::lock_guard<std::mutex> Lock(MMutex);
492  WeakEvents.swap(MEventsWeak);
493  SharedEvents.swap(MEventsShared);
494  }
495  // If the queue is either a host one or does not support OOO (and we use
496  // multiple in-order queues as a result of that), wait for each event
497  // directly. Otherwise, only wait for unenqueued or host task events, starting
498  // from the latest submitted task in order to minimize total amount of calls,
499  // then handle the rest with piQueueFinish.
500  const bool SupportsPiFinish = !is_host() && !MEmulateOOO;
501  for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
502  EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
503  if (std::shared_ptr<event_impl> EventImplSharedPtr =
504  EventImplWeakPtrIt->lock()) {
505  // A nullptr PI event indicates that piQueueFinish will not cover it,
506  // either because it's a host task event or an unenqueued one.
507  if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) {
508  EventImplSharedPtr->wait(EventImplSharedPtr);
509  }
510  }
511  }
512  if (SupportsPiFinish) {
513  const PluginPtr &Plugin = getPlugin();
515  assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
516  "shouldn't have shared events");
517  } else {
518  for (event &Event : SharedEvents)
519  Event.wait();
520  }
521 
522  std::vector<EventImplPtr> StreamsServiceEvents;
523  {
524  std::lock_guard<std::mutex> Lock(MStreamsServiceEventsMutex);
525  StreamsServiceEvents.swap(MStreamsServiceEvents);
526  }
527  for (const EventImplPtr &Event : StreamsServiceEvents)
528  Event->wait(Event);
529 
530  // If there is an external event set, we need to wait on it.
531  std::optional<event> ExternalEvent = popExternalEvent();
532  if (ExternalEvent)
533  ExternalEvent->wait();
534 
535 #ifdef XPTI_ENABLE_INSTRUMENTATION
536  instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
537 #endif
538 }
539 
540 pi_native_handle queue_impl::getNative(int32_t &NativeHandleDesc) const {
541  const PluginPtr &Plugin = getPlugin();
542  if (getContextImplPtr()->getBackend() == backend::opencl)
543  Plugin->call<PiApiKind::piQueueRetain>(MQueues[0]);
544  pi_native_handle Handle{};
545  Plugin->call<PiApiKind::piextQueueGetNativeHandle>(MQueues[0], &Handle,
546  &NativeHandleDesc);
547  return Handle;
548 }
549 
551  // Clean up only if a scheduler instance exits.
554 }
555 
557  // If we have in-order queue where events are not discarded then just check
558  // the status of the last event.
559  if (isInOrder() && !MDiscardEvents) {
560  std::lock_guard<std::mutex> Lock(MMutex);
561  return !MLastEventPtr ||
562  MLastEventPtr->get_info<info::event::command_execution_status>() ==
564  }
565 
566  // Check the status of the backend queue if this is not a host queue.
567  if (!is_host()) {
568  pi_bool IsReady = false;
570  MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady,
571  nullptr);
572  if (!IsReady)
573  return false;
574  }
575 
576  // We may have events like host tasks which are not submitted to the backend
577  // queue so we need to get their status separately.
578  std::lock_guard<std::mutex> Lock(MMutex);
579  for (event Event : MEventsShared)
580  if (Event.get_info<info::event::command_execution_status>() !=
582  return false;
583 
584  for (auto EventImplWeakPtrIt = MEventsWeak.begin();
585  EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt)
586  if (std::shared_ptr<event_impl> EventImplSharedPtr =
587  EventImplWeakPtrIt->lock())
588  if (EventImplSharedPtr->is_host() &&
589  EventImplSharedPtr
590  ->get_info<info::event::command_execution_status>() !=
592  return false;
593 
594  // If we didn't exit early above then it means that all events in the queue
595  // are completed.
596  return true;
597 }
598 
600  if (!(MDiscardEvents))
601  return Event;
602  return createDiscardedEvent();
603 }
604 
605 } // namespace detail
606 } // namespace _V1
607 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:107
static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, pi_mem_advice Advice, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, void *DstMem, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, const void *SrcMem, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_from_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static Scheduler & getInstance()
Definition: scheduler.cpp:261
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Definition: scheduler.cpp:614
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Definition: scheduler.cpp:742
event discard_or_return(const event &Event)
Definition: queue_impl.cpp:599
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue_impl.hpp:406
std::vector< EventImplPtr > MStreamsServiceEvents
Definition: queue_impl.hpp:937
std::optional< event > popExternalEvent()
Definition: queue_impl.hpp:736
static std::atomic< unsigned long long > MNextAvailableQueueID
Definition: queue_impl.hpp:977
void addEvent(const event &Event)
Stores an event that should be associated with the queue.
Definition: queue_impl.cpp:273
std::vector< sycl::detail::pi::PiQueue > MQueues
List of queues created for FPGA device from a single SYCL queue.
Definition: queue_impl.hpp:918
pi_native_handle getNative(int32_t &NativeHandleDesc) const
Gets the native handle of the SYCL queue.
Definition: queue_impl.cpp:540
unsigned long long MQueueID
Definition: queue_impl.hpp:976
std::vector< std::weak_ptr< event_impl > > MEventsWeak
These events are tracked, but not owned, by the queue.
Definition: queue_impl.hpp:907
event submitMemOpHelper(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs)
Performs submission of a memory operation directly if scheduler can be bypassed, or with a handler ot...
std::vector< event > MEventsShared
Events without data dependencies (such as USM) need an owner, additionally, USM operations are not ad...
Definition: queue_impl.hpp:912
std::mutex MMutex
Protects all the fields that can be changed by class' methods.
Definition: queue_impl.hpp:901
event submitWithHandler(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc)
Helper function for submitting a memory operation with a handler.
Definition: queue_impl.cpp:326
void addSharedEvent(const event &Event)
queue_impl.addEvent tracks events with weak pointers but some events have no other owners.
Definition: queue_impl.cpp:296
const ContextImplPtr MContext
Definition: queue_impl.hpp:904
sycl::detail::pi::PiQueue & getHandleRef()
Definition: queue_impl.hpp:612
event memset(const std::shared_ptr< queue_impl > &Self, void *Ptr, int Value, size_t Count, const std::vector< event > &DepEvents)
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue_impl.cpp:133
const PluginPtr & getPlugin() const
Definition: queue_impl.hpp:361
event mem_advise(const std::shared_ptr< queue_impl > &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector< event > &DepEvents)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: queue_impl.cpp:219
event memcpyToDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
Definition: queue_impl.cpp:230
event memcpyFromDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
Definition: queue_impl.cpp:246
event memcpy(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *Src, size_t Count, const std::vector< event > &DepEvents, const code_location &CodeLoc)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue_impl.cpp:180
void wait(const detail::code_location &Loc={})
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue_impl.cpp:472
bool MEmulateOOO
Indicates that a native out-of-order queue could not be created and we need to emulate it with multip...
Definition: queue_impl.hpp:925
void instrumentationEpilog(void *TelementryEvent, std::string &Name, int32_t StreamID, uint64_t IId)
Definition: queue_impl.cpp:453
const ContextImplPtr & getContextImplPtr() const
Definition: queue_impl.hpp:363
void * instrumentationProlog(const detail::code_location &CodeLoc, std::string &Name, int32_t StreamID, uint64_t &iid)
Definition: queue_impl.cpp:384
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
Definition: queue_impl.hpp:974
const std::vector< event > & getExtendDependencyList(const std::vector< event > &DepEvents, std::vector< event > &MutableVec, std::unique_lock< std::mutex > &QueueLock)
Definition: queue_impl.cpp:111
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
bool is_accelerator() const
Check if device is an accelerator device.
Definition: device.cpp:83
bool is_gpu() const
Check if device is a GPU device.
Definition: device.cpp:81
bool is_cpu() const
Check if device is a CPU device.
Definition: device.cpp:79
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:458
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1376
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:946
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:970
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:954
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr const char * SYCL_STREAM_NAME
static event createDiscardedEvent()
Definition: queue_impl.cpp:104
static std::vector< sycl::detail::pi::PiEvent > getPIEvents(const std::vector< sycl::event > &DepEvents)
Definition: queue_impl.cpp:32
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
void report(const code_location &CodeLoc)
Definition: queue_impl.cpp:167
static event prepareSYCLEventAssociatedWithQueue(const std::shared_ptr< detail::queue_impl > &QueueImpl)
Definition: queue_impl.cpp:96
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
Definition: access.hpp:18
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:777
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_cuda.cpp:186
uintptr_t pi_native_handle
Definition: pi.h:217
_pi_result
Definition: pi.h:224
pi_uint32 pi_bool
Definition: pi.h:215
@ PI_QUEUE_INFO_REFERENCE_COUNT
Definition: pi.h:501
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
Definition: pi.h:505
_pi_mem_advice
Definition: pi.h:599
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc)
Gets the native handle of a PI queue object.
Definition: pi_cuda.cpp:190
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:172
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_cuda.cpp:180
C++ wrapper of extern "C" PI interfaces.
constexpr unsigned long columnNumber() const noexcept
Definition: common.hpp:88
constexpr const char * fileName() const noexcept
Definition: common.hpp:89
constexpr const char * functionName() const noexcept
Definition: common.hpp:90
constexpr unsigned long lineNumber() const noexcept
Definition: common.hpp:87