DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue_impl.cpp
Go to the documentation of this file.
1 //==------------------ queue_impl.cpp - SYCL queue -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/event_impl.hpp>
11 #include <detail/queue_impl.hpp>
12 #include <sycl/context.hpp>
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/pi.hpp>
15 #include <sycl/device.hpp>
16 
17 #include <cstring>
18 #include <utility>
19 
20 #ifdef XPTI_ENABLE_INSTRUMENTATION
21 #include "xpti/xpti_trace_framework.hpp"
22 #include <detail/xpti_registry.hpp>
23 #include <sstream>
24 #endif
25 
26 namespace sycl {
27 inline namespace _V1 {
28 namespace detail {
29 std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 0;
30 
31 static std::vector<sycl::detail::pi::PiEvent>
32 getPIEvents(const std::vector<sycl::event> &DepEvents) {
33  std::vector<sycl::detail::pi::PiEvent> RetPiEvents;
34  for (const sycl::event &Event : DepEvents) {
35  const EventImplPtr &EventImpl = detail::getSyclObjImpl(Event);
36  if (EventImpl->getHandleRef() != nullptr)
37  RetPiEvents.push_back(EventImpl->getHandleRef());
38  }
39  return RetPiEvents;
40 }
41 
42 template <>
43 uint32_t queue_impl::get_info<info::queue::reference_count>() const {
44  sycl::detail::pi::PiResult result = PI_SUCCESS;
45  if (!is_host())
47  MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result,
48  nullptr);
49  return result;
50 }
51 
52 template <> context queue_impl::get_info<info::queue::context>() const {
53  return get_context();
54 }
55 
56 template <> device queue_impl::get_info<info::queue::device>() const {
57  return get_device();
58 }
59 
60 template <>
61 typename info::platform::version::return_type
62 queue_impl::get_backend_info<info::platform::version>() const {
63  if (getContextImplPtr()->getBackend() != backend::opencl) {
65  "the info::platform::version info descriptor can "
66  "only be queried with an OpenCL backend");
67  }
68  return get_device().get_platform().get_info<info::platform::version>();
69 }
70 
71 template <>
72 typename info::device::version::return_type
73 queue_impl::get_backend_info<info::device::version>() const {
74  if (getContextImplPtr()->getBackend() != backend::opencl) {
76  "the info::device::version info descriptor can only "
77  "be queried with an OpenCL backend");
78  }
79  return get_device().get_info<info::device::version>();
80 }
81 
82 template <>
83 typename info::device::backend_version::return_type
84 queue_impl::get_backend_info<info::device::backend_version>() const {
85  if (getContextImplPtr()->getBackend() != backend::ext_oneapi_level_zero) {
87  "the info::device::backend_version info descriptor "
88  "can only be queried with a Level Zero backend");
89  }
90  return "";
91  // Currently The Level Zero backend does not define the value of this
92  // information descriptor and implementations are encouraged to return the
93  // empty string as per specification.
94 }
95 
97  const std::shared_ptr<detail::queue_impl> &QueueImpl) {
98  auto EventImpl = std::make_shared<detail::event_impl>(QueueImpl);
99  EventImpl->setContextImpl(detail::getSyclObjImpl(QueueImpl->get_context()));
100  EventImpl->setStateIncomplete();
101  return detail::createSyclObjFromImpl<event>(EventImpl);
102 }
103 
104 static event createDiscardedEvent() {
105  EventImplPtr EventImpl =
106  std::make_shared<event_impl>(event_impl::HES_Discarded);
107  return createSyclObjFromImpl<event>(EventImpl);
108 }
109 
110 const std::vector<event> &
111 queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
112  std::vector<event> &MutableVec,
113  std::unique_lock<std::mutex> &QueueLock) {
114  if (!isInOrder())
115  return DepEvents;
116 
117  QueueLock.lock();
118  EventImplPtr ExtraEvent =
120  std::optional<event> ExternalEvent = popExternalEvent();
121 
122  if (!ExternalEvent && !ExtraEvent)
123  return DepEvents;
124 
125  MutableVec = DepEvents;
126  if (ExternalEvent)
127  MutableVec.push_back(*ExternalEvent);
128  if (ExtraEvent)
129  MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
130  return MutableVec;
131 }
132 
133 event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
134  void *Ptr, int Value, size_t Count,
135  const std::vector<event> &DepEvents) {
136 #if XPTI_ENABLE_INSTRUMENTATION
137  // We need a code pointer value and we use the object ptr; if code location
138  // information is available, we will have function name and source file
139  // information
140  XPTIScope PrepareNotify((void *)this,
141  (uint16_t)xpti::trace_point_type_t::node_create,
142  SYCL_STREAM_NAME, "memory_transfer_node");
143  PrepareNotify.addMetadata([&](auto TEvent) {
144  xpti::addMetadata(TEvent, "sycl_device",
145  reinterpret_cast<size_t>(
146  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
147  xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
148  xpti::addMetadata(TEvent, "value_set", Value);
149  xpti::addMetadata(TEvent, "memory_size", Count);
150  xpti::addMetadata(TEvent, "queue_id", MQueueID);
151  });
152  // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a
153  // metadata entry to TLS for use by callback handlers
154  xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
155  // Notify XPTI about the memset submission
156  PrepareNotify.notify();
157  // Emit a begin/end scope for this call
158  PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
159 #endif
160 
161  return submitMemOpHelper(
162  Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
163  [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self,
164  Count, Value);
165 }
166 
167 void report(const code_location &CodeLoc) {
168  std::cout << "Exception caught at ";
169  if (CodeLoc.fileName())
170  std::cout << "File: " << CodeLoc.fileName();
171  if (CodeLoc.functionName())
172  std::cout << " | Function: " << CodeLoc.functionName();
173  if (CodeLoc.lineNumber())
174  std::cout << " | Line: " << CodeLoc.lineNumber();
175  if (CodeLoc.columnNumber())
176  std::cout << " | Column: " << CodeLoc.columnNumber();
177  std::cout << '\n';
178 }
179 
180 event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
181  void *Dest, const void *Src, size_t Count,
182  const std::vector<event> &DepEvents,
183  const code_location &CodeLoc) {
184 #if XPTI_ENABLE_INSTRUMENTATION
185  // We need a code pointer value and we duse the object ptr; If code location
186  // is available, we use the source file information along with the object
187  // pointer.
188  XPTIScope PrepareNotify((void *)this,
189  (uint16_t)xpti::trace_point_type_t::node_create,
190  SYCL_STREAM_NAME, "memory_transfer_node");
191  PrepareNotify.addMetadata([&](auto TEvent) {
192  xpti::addMetadata(TEvent, "sycl_device",
193  reinterpret_cast<size_t>(
194  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
195  xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast<size_t>(Src));
196  xpti::addMetadata(TEvent, "dest_memory_ptr",
197  reinterpret_cast<size_t>(Dest));
198  xpti::addMetadata(TEvent, "memory_size", Count);
199  xpti::addMetadata(TEvent, "queue_id", MQueueID);
200  });
201  xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
202  // Notify XPTI about the memset submission
203  PrepareNotify.notify();
204  // Emit a begin/end scope for this call
205  PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
206 #endif
207 
208  if ((!Src || !Dest) && Count != 0) {
209  report(CodeLoc);
210  throw runtime_error("NULL pointer argument in memory copy operation.",
211  PI_ERROR_INVALID_VALUE);
212  }
213  return submitMemOpHelper(
214  Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
215  [](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self,
216  Count, Dest);
217 }
218 
219 event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
220  const void *Ptr, size_t Length,
221  pi_mem_advice Advice,
222  const std::vector<event> &DepEvents) {
223  return submitMemOpHelper(
224  Self, DepEvents,
225  [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
226  [](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr,
227  Self, Length, Advice);
228 }
229 
231  const std::shared_ptr<detail::queue_impl> &Self, void *DeviceGlobalPtr,
232  const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset,
233  const std::vector<event> &DepEvents) {
234  return submitMemOpHelper(
235  Self, DepEvents,
236  [&](handler &CGH) {
237  CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope,
238  NumBytes, Offset);
239  },
240  [](const auto &...Args) {
242  },
243  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src);
244 }
245 
247  const std::shared_ptr<detail::queue_impl> &Self, void *Dest,
248  const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes,
249  size_t Offset, const std::vector<event> &DepEvents) {
250  return submitMemOpHelper(
251  Self, DepEvents,
252  [&](handler &CGH) {
253  CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope,
254  NumBytes, Offset);
255  },
256  [](const auto &...Args) {
258  },
259  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
260 }
261 
263  std::lock_guard<std::mutex> Lock{MMutex};
264  if (MDiscardEvents)
265  return createDiscardedEvent();
266  if (!MGraph.expired() && MGraphLastEventPtr)
267  return detail::createSyclObjFromImpl<event>(MGraphLastEventPtr);
268  if (!MLastEventPtr)
269  MLastEventPtr = std::make_shared<event_impl>(std::nullopt);
270  return detail::createSyclObjFromImpl<event>(MLastEventPtr);
271 }
272 
273 void queue_impl::addEvent(const event &Event) {
274  EventImplPtr EImpl = getSyclObjImpl(Event);
275  assert(EImpl && "Event implementation is missing");
276  auto *Cmd = static_cast<Command *>(EImpl->getCommand());
277  if (!Cmd) {
278  // if there is no command on the event, we cannot track it with MEventsWeak
279  // as that will leave it with no owner. Track in MEventsShared only if we're
280  // unable to call piQueueFinish during wait.
281  if (is_host() || MEmulateOOO)
282  addSharedEvent(Event);
283  }
284  // As long as the queue supports piQueueFinish we only need to store events
285  // for unenqueued commands and host tasks.
286  else if (is_host() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
287  std::weak_ptr<event_impl> EventWeakPtr{EImpl};
288  std::lock_guard<std::mutex> Lock{MMutex};
289  MEventsWeak.push_back(std::move(EventWeakPtr));
290  }
291 }
292 
296 void queue_impl::addSharedEvent(const event &Event) {
297  assert(is_host() || MEmulateOOO);
298  std::lock_guard<std::mutex> Lock(MMutex);
299  // Events stored in MEventsShared are not released anywhere else aside from
300  // calls to queue::wait/wait_and_throw, which a user application might not
301  // make, and ~queue_impl(). If the number of events grows large enough,
302  // there's a good chance that most of them are already completed and ownership
303  // of them can be released.
304  const size_t EventThreshold = 128;
305  if (MEventsShared.size() >= EventThreshold) {
306  // Generally, the vector is ordered so that the oldest events are in the
307  // front and the newer events are in the end. So, search to find the first
308  // event that isn't yet complete. All the events prior to that can be
309  // erased. This could leave some few events further on that have completed
310  // not yet erased, but that is OK. This cleanup doesn't have to be perfect.
311  // This also keeps the algorithm linear rather than quadratic because it
312  // doesn't continually recheck things towards the back of the list that
313  // really haven't had time to complete.
314  MEventsShared.erase(
315  MEventsShared.begin(),
316  std::find_if(
317  MEventsShared.begin(), MEventsShared.end(), [](const event &E) {
318  return E.get_info<info::event::command_execution_status>() !=
319  info::event_command_status::complete;
320  }));
321  }
322  MEventsShared.push_back(Event);
323 }
324 
325 static bool
326 areEventsSafeForSchedulerBypass(const std::vector<sycl::event> &DepEvents,
327  ContextImplPtr Context) {
328  auto CheckEvent = [&Context](const sycl::event &Event) {
329  const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
330  // Events that don't have an initialized context are throwaway events that
331  // don't represent actual dependencies. Calling getContextImpl() would set
332  // their context, which we wish to avoid as it is expensive.
333  // NOP events also don't represent actual dependencies.
334  if ((!SyclEventImplPtr->isContextInitialized() &&
335  !SyclEventImplPtr->is_host()) ||
336  SyclEventImplPtr->isNOP()) {
337  return true;
338  }
339  if (SyclEventImplPtr->is_host()) {
340  return SyclEventImplPtr->isCompleted();
341  }
342  // Cross-context dependencies can't be passed to the backend directly.
343  if (SyclEventImplPtr->getContextImpl() != Context)
344  return false;
345 
346  // A nullptr here means that the commmand does not produce a PI event or it
347  // hasn't been enqueued yet.
348  return SyclEventImplPtr->getHandleRef() != nullptr;
349  };
350 
351  return std::all_of(
352  DepEvents.begin(), DepEvents.end(),
353  [&CheckEvent](const sycl::event &Event) { return CheckEvent(Event); });
354 }
355 
356 template <typename HandlerFuncT>
357 event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
358  const std::vector<event> &DepEvents,
359  HandlerFuncT HandlerFunc) {
360  return submit(
361  [&](handler &CGH) {
362  CGH.depends_on(DepEvents);
363  HandlerFunc(CGH);
364  },
365  Self, {});
366 }
367 
368 template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
369 event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
370  const std::vector<event> &DepEvents,
371  HandlerFuncT HandlerFunc,
372  MemOpFuncT MemOpFunc,
373  MemOpArgTs... MemOpArgs) {
374  // We need to submit command and update the last event under same lock if we
375  // have in-order queue.
376  {
377  std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);
378 
379  std::vector<event> MutableDepEvents;
380  const std::vector<event> &ExpandedDepEvents =
381  getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
382 
383  // If we have a command graph set we need to capture the op through the
384  // handler rather than by-passing the scheduler.
385  if (MGraph.expired() &&
386  areEventsSafeForSchedulerBypass(ExpandedDepEvents, MContext)) {
388  MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
389  /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
390  return createDiscardedEvent();
391  }
392 
393  event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
394  auto EventImpl = detail::getSyclObjImpl(ResEvent);
395  MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
396  &EventImpl->getHandleRef(), EventImpl);
397 
398  if (MContext->is_host())
400 
401  if (isInOrder()) {
402  auto &EventToStoreIn =
404  EventToStoreIn = EventImpl;
405  }
406  // Track only if we won't be able to handle it with piQueueFinish.
407  if (MEmulateOOO)
408  addSharedEvent(ResEvent);
409  return discard_or_return(ResEvent);
410  }
411  }
412  return submitWithHandler(Self, DepEvents, HandlerFunc);
413 }
414 
416  std::string &Name, int32_t StreamID,
417  uint64_t &IId) {
418  void *TraceEvent = nullptr;
419  (void)CodeLoc;
420  (void)Name;
421  (void)StreamID;
422  (void)IId;
423 #ifdef XPTI_ENABLE_INSTRUMENTATION
424  constexpr uint16_t NotificationTraceType = xpti::trace_wait_begin;
425  if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
426  return TraceEvent;
427 
428  xpti::payload_t Payload;
429  bool HasSourceInfo = false;
430  // We try to create a unique string for the wait() call by combining it with
431  // the queue address
432  xpti::utils::StringHelper NG;
433  Name = NG.nameWithAddress<queue_impl *>("queue.wait", this);
434 
435  if (CodeLoc.fileName()) {
436  // We have source code location information
437  Payload =
438  xpti::payload_t(Name.c_str(), CodeLoc.fileName(), CodeLoc.lineNumber(),
439  CodeLoc.columnNumber(), (void *)this);
440  HasSourceInfo = true;
441  } else {
442  // We have no location information, so we'll use the address of the queue
443  Payload = xpti::payload_t(Name.c_str(), (void *)this);
444  }
445  // wait() calls could be at different user-code locations; We create a new
446  // event based on the code location info and if this has been seen before, a
447  // previously created event will be returned.
448  uint64_t QWaitInstanceNo = 0;
449  xpti::trace_event_data_t *WaitEvent =
450  xptiMakeEvent(Name.c_str(), &Payload, xpti::trace_graph_event,
451  xpti_at::active, &QWaitInstanceNo);
452  IId = QWaitInstanceNo;
453  if (WaitEvent) {
454  device D = get_device();
455  std::string DevStr;
456  if (getSyclObjImpl(D)->is_host())
457  DevStr = "HOST";
458  else if (D.is_cpu())
459  DevStr = "CPU";
460  else if (D.is_gpu())
461  DevStr = "GPU";
462  else if (D.is_accelerator())
463  DevStr = "ACCELERATOR";
464  else
465  DevStr = "UNKNOWN";
466  xpti::addMetadata(WaitEvent, "sycl_device_type", DevStr);
467  if (HasSourceInfo) {
468  xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName());
469  xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName());
470  xpti::addMetadata(WaitEvent, "sym_line_no",
471  static_cast<int32_t>((CodeLoc.lineNumber())));
472  xpti::addMetadata(WaitEvent, "sym_column_no",
473  static_cast<int32_t>((CodeLoc.columnNumber())));
474  }
475  xptiNotifySubscribers(StreamID, xpti::trace_wait_begin, nullptr, WaitEvent,
476  QWaitInstanceNo,
477  static_cast<const void *>(Name.c_str()));
478  TraceEvent = (void *)WaitEvent;
479  }
480 #endif
481  return TraceEvent;
482 }
483 
484 void queue_impl::instrumentationEpilog(void *TelemetryEvent, std::string &Name,
485  int32_t StreamID, uint64_t IId) {
486  (void)TelemetryEvent;
487  (void)Name;
488  (void)StreamID;
489  (void)IId;
490 #ifdef XPTI_ENABLE_INSTRUMENTATION
491  constexpr uint16_t NotificationTraceType = xpti::trace_wait_end;
492  if (!(xptiCheckTraceEnabled(StreamID, NotificationTraceType) &&
493  TelemetryEvent))
494  return;
495  // Close the wait() scope
496  xpti::trace_event_data_t *TraceEvent =
497  (xpti::trace_event_data_t *)TelemetryEvent;
498  xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, TraceEvent,
499  IId, static_cast<const void *>(Name.c_str()));
500 #endif
501 }
502 
504  (void)CodeLoc;
505 #ifdef XPTI_ENABLE_INSTRUMENTATION
506  void *TelemetryEvent = nullptr;
507  uint64_t IId;
508  std::string Name;
509  int32_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
510  TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
511 #endif
512 
513  if (MGraph.lock()) {
515  "wait cannot be called for a queue which is "
516  "recording to a command graph.");
517  }
518 
519  std::vector<std::weak_ptr<event_impl>> WeakEvents;
520  std::vector<event> SharedEvents;
521  {
522  std::lock_guard<std::mutex> Lock(MMutex);
523  WeakEvents.swap(MEventsWeak);
524  SharedEvents.swap(MEventsShared);
525  }
526  // If the queue is either a host one or does not support OOO (and we use
527  // multiple in-order queues as a result of that), wait for each event
528  // directly. Otherwise, only wait for unenqueued or host task events, starting
529  // from the latest submitted task in order to minimize total amount of calls,
530  // then handle the rest with piQueueFinish.
531  const bool SupportsPiFinish = !is_host() && !MEmulateOOO;
532  for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
533  EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
534  if (std::shared_ptr<event_impl> EventImplSharedPtr =
535  EventImplWeakPtrIt->lock()) {
536  // A nullptr PI event indicates that piQueueFinish will not cover it,
537  // either because it's a host task event or an unenqueued one.
538  if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) {
539  EventImplSharedPtr->wait(EventImplSharedPtr);
540  }
541  }
542  }
543  if (SupportsPiFinish) {
544  const PluginPtr &Plugin = getPlugin();
546  assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
547  "shouldn't have shared events");
548  } else {
549  for (event &Event : SharedEvents)
550  Event.wait();
551  }
552 
553  std::vector<EventImplPtr> StreamsServiceEvents;
554  {
555  std::lock_guard<std::mutex> Lock(MStreamsServiceEventsMutex);
556  StreamsServiceEvents.swap(MStreamsServiceEvents);
557  }
558  for (const EventImplPtr &Event : StreamsServiceEvents)
559  Event->wait(Event);
560 
561  // If there is an external event set, we need to wait on it.
562  std::optional<event> ExternalEvent = popExternalEvent();
563  if (ExternalEvent)
564  ExternalEvent->wait();
565 
566 #ifdef XPTI_ENABLE_INSTRUMENTATION
567  instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
568 #endif
569 }
570 
571 pi_native_handle queue_impl::getNative(int32_t &NativeHandleDesc) const {
572  const PluginPtr &Plugin = getPlugin();
573  if (getContextImplPtr()->getBackend() == backend::opencl)
574  Plugin->call<PiApiKind::piQueueRetain>(MQueues[0]);
575  pi_native_handle Handle{};
576  Plugin->call<PiApiKind::piextQueueGetNativeHandle>(MQueues[0], &Handle,
577  &NativeHandleDesc);
578  return Handle;
579 }
580 
582  // Clean up only if a scheduler instance exits.
585 }
586 
588  // If we have in-order queue where events are not discarded then just check
589  // the status of the last event.
590  if (isInOrder() && !MDiscardEvents) {
591  std::lock_guard<std::mutex> Lock(MMutex);
592  return !MLastEventPtr ||
593  MLastEventPtr->get_info<info::event::command_execution_status>() ==
595  }
596 
597  // Check the status of the backend queue if this is not a host queue.
598  if (!is_host()) {
599  pi_bool IsReady = false;
601  MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady,
602  nullptr);
603  if (!IsReady)
604  return false;
605  }
606 
607  // We may have events like host tasks which are not submitted to the backend
608  // queue so we need to get their status separately.
609  std::lock_guard<std::mutex> Lock(MMutex);
610  for (event Event : MEventsShared)
611  if (Event.get_info<info::event::command_execution_status>() !=
613  return false;
614 
615  for (auto EventImplWeakPtrIt = MEventsWeak.begin();
616  EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt)
617  if (std::shared_ptr<event_impl> EventImplSharedPtr =
618  EventImplWeakPtrIt->lock())
619  if (EventImplSharedPtr->is_host() &&
620  EventImplSharedPtr
621  ->get_info<info::event::command_execution_status>() !=
623  return false;
624 
625  // If we didn't exit early above then it means that all events in the queue
626  // are completed.
627  return true;
628 }
629 
631  if (!(MDiscardEvents))
632  return Event;
633  return createDiscardedEvent();
634 }
635 
636 } // namespace detail
637 } // namespace _V1
638 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:107
static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, pi_mem_advice Advice, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, void *DstMem, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, const void *SrcMem, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_from_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static Scheduler & getInstance()
Definition: scheduler.cpp:261
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
Definition: scheduler.cpp:614
event discard_or_return(const event &Event)
Definition: queue_impl.cpp:630
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue_impl.hpp:406
std::vector< EventImplPtr > MStreamsServiceEvents
Definition: queue_impl.hpp:938
std::optional< event > popExternalEvent()
Definition: queue_impl.hpp:736
static std::atomic< unsigned long long > MNextAvailableQueueID
Definition: queue_impl.hpp:978
void addEvent(const event &Event)
Stores an event that should be associated with the queue.
Definition: queue_impl.cpp:273
std::vector< sycl::detail::pi::PiQueue > MQueues
List of queues created for FPGA device from a single SYCL queue.
Definition: queue_impl.hpp:919
pi_native_handle getNative(int32_t &NativeHandleDesc) const
Gets the native handle of the SYCL queue.
Definition: queue_impl.cpp:571
unsigned long long MQueueID
Definition: queue_impl.hpp:977
std::vector< std::weak_ptr< event_impl > > MEventsWeak
These events are tracked, but not owned, by the queue.
Definition: queue_impl.hpp:908
event submitMemOpHelper(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs)
Performs submission of a memory operation directly if scheduler can be bypassed, or with a handler ot...
std::vector< event > MEventsShared
Events without data dependencies (such as USM) need an owner, additionally, USM operations are not ad...
Definition: queue_impl.hpp:913
std::mutex MMutex
Protects all the fields that can be changed by class' methods.
Definition: queue_impl.hpp:902
event submitWithHandler(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc)
Helper function for submitting a memory operation with a handler.
Definition: queue_impl.cpp:357
void addSharedEvent(const event &Event)
queue_impl.addEvent tracks events with weak pointers but some events have no other owners.
Definition: queue_impl.cpp:296
const ContextImplPtr MContext
Definition: queue_impl.hpp:905
sycl::detail::pi::PiQueue & getHandleRef()
Definition: queue_impl.hpp:612
event memset(const std::shared_ptr< queue_impl > &Self, void *Ptr, int Value, size_t Count, const std::vector< event > &DepEvents)
Fills the memory pointed by a USM pointer with the value specified.
Definition: queue_impl.cpp:133
const PluginPtr & getPlugin() const
Definition: queue_impl.hpp:361
event mem_advise(const std::shared_ptr< queue_impl > &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector< event > &DepEvents)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: queue_impl.cpp:219
event memcpyToDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
Definition: queue_impl.cpp:230
event memcpyFromDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
Definition: queue_impl.cpp:246
event memcpy(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *Src, size_t Count, const std::vector< event > &DepEvents, const code_location &CodeLoc)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: queue_impl.cpp:180
void wait(const detail::code_location &Loc={})
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue_impl.cpp:503
bool MEmulateOOO
Indicates that a native out-of-order queue could not be created and we need to emulate it with multip...
Definition: queue_impl.hpp:926
void instrumentationEpilog(void *TelementryEvent, std::string &Name, int32_t StreamID, uint64_t IId)
Definition: queue_impl.cpp:484
const ContextImplPtr & getContextImplPtr() const
Definition: queue_impl.hpp:363
void * instrumentationProlog(const detail::code_location &CodeLoc, std::string &Name, int32_t StreamID, uint64_t &iid)
Definition: queue_impl.cpp:415
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
Definition: queue_impl.hpp:975
const std::vector< event > & getExtendDependencyList(const std::vector< event > &DepEvents, std::vector< event > &MutableVec, std::unique_lock< std::mutex > &QueueLock)
Definition: queue_impl.cpp:111
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:64
bool is_accelerator() const
Check if device is an accelerator device.
Definition: device.cpp:83
bool is_gpu() const
Check if device is a GPU device.
Definition: device.cpp:81
bool is_cpu() const
Check if device is a CPU device.
Definition: device.cpp:79
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:458
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1374
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:944
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:968
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:952
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr const char * SYCL_STREAM_NAME
static event createDiscardedEvent()
Definition: queue_impl.cpp:104
static std::vector< sycl::detail::pi::PiEvent > getPIEvents(const std::vector< sycl::event > &DepEvents)
Definition: queue_impl.cpp:32
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
void report(const code_location &CodeLoc)
Definition: queue_impl.cpp:167
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Definition: queue_impl.cpp:326
static event prepareSYCLEventAssociatedWithQueue(const std::shared_ptr< detail::queue_impl > &QueueImpl)
Definition: queue_impl.cpp:96
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
Definition: access.hpp:18
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
Definition: device.hpp:777
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_cuda.cpp:186
uintptr_t pi_native_handle
Definition: pi.h:217
_pi_result
Definition: pi.h:224
pi_uint32 pi_bool
Definition: pi.h:215
@ PI_QUEUE_INFO_REFERENCE_COUNT
Definition: pi.h:501
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
Definition: pi.h:505
_pi_mem_advice
Definition: pi.h:599
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc)
Gets the native handle of a PI queue object.
Definition: pi_cuda.cpp:190
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:172
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_cuda.cpp:180
C++ wrapper of extern "C" PI interfaces.
bool all_of(const simd_mask< _Tp, _Abi > &) noexcept
constexpr unsigned long columnNumber() const noexcept
Definition: common.hpp:88
constexpr const char * fileName() const noexcept
Definition: common.hpp:89
constexpr const char * functionName() const noexcept
Definition: common.hpp:90
constexpr unsigned long lineNumber() const noexcept
Definition: common.hpp:87