DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue_impl.cpp
Go to the documentation of this file.
1 //==------------------ queue_impl.cpp - SYCL queue -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/event_impl.hpp>
11 #include <detail/queue_impl.hpp>
12 #include <sycl/context.hpp>
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/pi.hpp>
15 #include <sycl/device.hpp>
16 
17 #include <cstring>
18 #include <utility>
19 
20 #ifdef XPTI_ENABLE_INSTRUMENTATION
21 #include "xpti/xpti_trace_framework.hpp"
22 #include <detail/xpti_registry.hpp>
23 #include <sstream>
24 #endif
25 
26 namespace sycl {
28 namespace detail {
29 template <>
30 uint32_t queue_impl::get_info<info::queue::reference_count>() const {
31  RT::PiResult result = PI_SUCCESS;
32  if (!is_host())
34  MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result,
35  nullptr);
36  return result;
37 }
38 
39 template <> context queue_impl::get_info<info::queue::context>() const {
40  return get_context();
41 }
42 
43 template <> device queue_impl::get_info<info::queue::device>() const {
44  return get_device();
45 }
46 
47 static event
48 prepareUSMEvent(const std::shared_ptr<detail::queue_impl> &QueueImpl,
49  RT::PiEvent NativeEvent) {
50  auto EventImpl = std::make_shared<detail::event_impl>(QueueImpl);
51  EventImpl->getHandleRef() = NativeEvent;
52  EventImpl->setContextImpl(detail::getSyclObjImpl(QueueImpl->get_context()));
53  EventImpl->setStateIncomplete();
54  return detail::createSyclObjFromImpl<event>(EventImpl);
55 }
56 
57 static event createDiscardedEvent() {
58  EventImplPtr EventImpl =
59  std::make_shared<event_impl>(event_impl::HES_Discarded);
60  return createSyclObjFromImpl<event>(EventImpl);
61 }
62 
63 event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
64  void *Ptr, int Value, size_t Count,
65  const std::vector<event> &DepEvents) {
66 #if XPTI_ENABLE_INSTRUMENTATION
67  // We need a code pointer value and we use the object ptr; if code location
68  // information is available, we will have function name and source file
69  // information
70  XPTIScope PrepareNotify((void *)this,
71  (uint16_t)xpti::trace_point_type_t::node_create,
72  SYCL_MEM_ALLOC_STREAM_NAME, "queue.memset()");
73  PrepareNotify.addMetadata([&](auto TEvent) {
74  xpti::addMetadata(TEvent, "sycl_device",
75  reinterpret_cast<size_t>(
76  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
77  xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
78  xpti::addMetadata(TEvent, "value_set", Value);
79  xpti::addMetadata(TEvent, "memory_size", Count);
80  });
81  // Notify XPTI about the memset submission
82  PrepareNotify.notify();
83  // Emit a begin/end scope for this call
84  PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
85 #endif
86  if (MHasDiscardEventsSupport) {
87  MemoryManager::fill_usm(Ptr, Self, Count, Value,
88  getOrWaitEvents(DepEvents, MContext), nullptr);
89  return createDiscardedEvent();
90  }
91  event ResEvent;
92  {
93  // We need to submit command and update the last event under same lock if we
94  // have in-order queue.
95  auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
96  : std::unique_lock<std::mutex>();
97  // If the last submitted command in the in-order queue is host_task then
98  // wait for it before submitting usm command.
99  if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
100  MLastCGType == CG::CGTYPE::CodeplayInteropTask))
101  MLastEvent.wait();
102 
103  RT::PiEvent NativeEvent{};
104  MemoryManager::fill_usm(Ptr, Self, Count, Value,
105  getOrWaitEvents(DepEvents, MContext), &NativeEvent);
106 
107  if (MContext->is_host())
108  return MDiscardEvents ? createDiscardedEvent() : event();
109 
110  ResEvent = prepareUSMEvent(Self, NativeEvent);
111  if (isInOrder()) {
112  MLastEvent = ResEvent;
113  // We don't create a command group for usm commands, so set it to None.
114  // This variable is used to perform explicit dependency management when
115  // required.
116  MLastCGType = CG::CGTYPE::None;
117  }
118  }
119  // Track only if we won't be able to handle it with piQueueFinish.
120  if (MEmulateOOO)
121  addSharedEvent(ResEvent);
122  return MDiscardEvents ? createDiscardedEvent() : ResEvent;
123 }
124 
125 event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
126  void *Dest, const void *Src, size_t Count,
127  const std::vector<event> &DepEvents) {
128 #if XPTI_ENABLE_INSTRUMENTATION
129  // We need a code pointer value and we duse the object ptr; If code location
130  // is available, we use the source file information along with the object
131  // pointer.
132  XPTIScope PrepareNotify((void *)this,
133  (uint16_t)xpti::trace_point_type_t::node_create,
134  SYCL_MEM_ALLOC_STREAM_NAME, "queue.memcpy()");
135  PrepareNotify.addMetadata([&](auto TEvent) {
136  xpti::addMetadata(TEvent, "sycl_device",
137  reinterpret_cast<size_t>(
138  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
139  xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast<size_t>(Src));
140  xpti::addMetadata(TEvent, "dest_memory_ptr",
141  reinterpret_cast<size_t>(Dest));
142  xpti::addMetadata(TEvent, "memory_size", Count);
143  });
144  // Notify XPTI about the memset submission
145  PrepareNotify.notify();
146  // Emit a begin/end scope for this call
147  PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
148 #endif
149  if (MHasDiscardEventsSupport) {
150  MemoryManager::copy_usm(Src, Self, Count, Dest,
151  getOrWaitEvents(DepEvents, MContext), nullptr);
152  return createDiscardedEvent();
153  }
154  event ResEvent;
155  {
156  // We need to submit command and update the last event under same lock if we
157  // have in-order queue.
158  auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
159  : std::unique_lock<std::mutex>();
160  // If the last submitted command in the in-order queue is host_task then
161  // wait for it before submitting usm command.
162  if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
163  MLastCGType == CG::CGTYPE::CodeplayInteropTask))
164  MLastEvent.wait();
165 
166  RT::PiEvent NativeEvent{};
167  MemoryManager::copy_usm(Src, Self, Count, Dest,
168  getOrWaitEvents(DepEvents, MContext), &NativeEvent);
169 
170  if (MContext->is_host())
171  return MDiscardEvents ? createDiscardedEvent() : event();
172 
173  ResEvent = prepareUSMEvent(Self, NativeEvent);
174  if (isInOrder()) {
175  MLastEvent = ResEvent;
176  // We don't create a command group for usm commands, so set it to None.
177  // This variable is used to perform explicit dependency management when
178  // required.
179  MLastCGType = CG::CGTYPE::None;
180  }
181  }
182  // Track only if we won't be able to handle it with piQueueFinish.
183  if (MEmulateOOO)
184  addSharedEvent(ResEvent);
185  return MDiscardEvents ? createDiscardedEvent() : ResEvent;
186 }
187 
188 event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
189  const void *Ptr, size_t Length,
190  pi_mem_advice Advice,
191  const std::vector<event> &DepEvents) {
192  if (MHasDiscardEventsSupport) {
193  MemoryManager::advise_usm(Ptr, Self, Length, Advice,
194  getOrWaitEvents(DepEvents, MContext), nullptr);
195  return createDiscardedEvent();
196  }
197  event ResEvent;
198  {
199  // We need to submit command and update the last event under same lock if we
200  // have in-order queue.
201  auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
202  : std::unique_lock<std::mutex>();
203  // If the last submitted command in the in-order queue is host_task then
204  // wait for it before submitting usm command.
205  if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
206  MLastCGType == CG::CGTYPE::CodeplayInteropTask))
207  MLastEvent.wait();
208 
209  RT::PiEvent NativeEvent{};
210  MemoryManager::advise_usm(Ptr, Self, Length, Advice,
211  getOrWaitEvents(DepEvents, MContext),
212  &NativeEvent);
213 
214  if (MContext->is_host())
215  return MDiscardEvents ? createDiscardedEvent() : event();
216 
217  ResEvent = prepareUSMEvent(Self, NativeEvent);
218  if (isInOrder()) {
219  MLastEvent = ResEvent;
220  // We don't create a command group for usm commands, so set it to None.
221  // This variable is used to perform explicit dependency management when
222  // required.
223  MLastCGType = CG::CGTYPE::None;
224  }
225  }
226  // Track only if we won't be able to handle it with piQueueFinish.
227  if (MEmulateOOO)
228  addSharedEvent(ResEvent);
229  return MDiscardEvents ? createDiscardedEvent() : ResEvent;
230 }
231 
232 event queue_impl::memcpyToDeviceGlobal(
233  const std::shared_ptr<detail::queue_impl> &Self, void *DeviceGlobalPtr,
234  const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset,
235  const std::vector<event> &DepEvents) {
236  if (MHasDiscardEventsSupport) {
237  MemoryManager::copy_to_device_global(
238  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src,
239  OSUtil::ExeModuleHandle, getOrWaitEvents(DepEvents, MContext), nullptr);
240  return createDiscardedEvent();
241  }
242  event ResEvent;
243  {
244  // We need to submit command and update the last event under same lock if we
245  // have in-order queue.
246  auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
247  : std::unique_lock<std::mutex>();
248  // If the last submitted command in the in-order queue is host_task then
249  // wait for it before submitting usm command.
250  if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
251  MLastCGType == CG::CGTYPE::CodeplayInteropTask))
252  MLastEvent.wait();
253 
254  RT::PiEvent NativeEvent{};
255  MemoryManager::copy_to_device_global(
256  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src,
257  OSUtil::ExeModuleHandle, getOrWaitEvents(DepEvents, MContext),
258  &NativeEvent);
259 
260  if (MContext->is_host())
261  return MDiscardEvents ? createDiscardedEvent() : event();
262 
263  ResEvent = prepareUSMEvent(Self, NativeEvent);
264 
265  if (isInOrder()) {
266  MLastEvent = ResEvent;
267  // We don't create a command group for usm commands, so set it to None.
268  // This variable is used to perform explicit dependency management when
269  // required.
270  MLastCGType = CG::CGTYPE::None;
271  }
272  }
273  // Track only if we won't be able to handle it with piQueueFinish.
274  if (MEmulateOOO)
275  addSharedEvent(ResEvent);
276  return MDiscardEvents ? createDiscardedEvent() : ResEvent;
277 }
278 
279 event queue_impl::memcpyFromDeviceGlobal(
280  const std::shared_ptr<detail::queue_impl> &Self, void *Dest,
281  const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes,
282  size_t Offset, const std::vector<event> &DepEvents) {
283  if (MHasDiscardEventsSupport) {
284  MemoryManager::copy_from_device_global(
285  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest,
286  OSUtil::ExeModuleHandle, getOrWaitEvents(DepEvents, MContext), nullptr);
287  return createDiscardedEvent();
288  }
289  event ResEvent;
290  {
291  // We need to submit command and update the last event under same lock if we
292  // have in-order queue.
293  auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
294  : std::unique_lock<std::mutex>();
295  // If the last submitted command in the in-order queue is host_task then
296  // wait for it before submitting usm command.
297  if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
298  MLastCGType == CG::CGTYPE::CodeplayInteropTask))
299  MLastEvent.wait();
300 
301  RT::PiEvent NativeEvent{};
302  MemoryManager::copy_from_device_global(
303  DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest,
304  OSUtil::ExeModuleHandle, getOrWaitEvents(DepEvents, MContext),
305  &NativeEvent);
306 
307  if (MContext->is_host())
308  return MDiscardEvents ? createDiscardedEvent() : event();
309 
310  ResEvent = prepareUSMEvent(Self, NativeEvent);
311 
312  if (isInOrder()) {
313  MLastEvent = ResEvent;
314  // We don't create a command group for usm commands, so set it to None.
315  // This variable is used to perform explicit dependency management when
316  // required.
317  MLastCGType = CG::CGTYPE::None;
318  }
319  }
320  // Track only if we won't be able to handle it with piQueueFinish.
321  if (MEmulateOOO)
322  addSharedEvent(ResEvent);
323  return MDiscardEvents ? createDiscardedEvent() : ResEvent;
324 }
325 
326 void queue_impl::addEvent(const event &Event) {
327  EventImplPtr EImpl = getSyclObjImpl(Event);
328  assert(EImpl && "Event implementation is missing");
329  auto *Cmd = static_cast<Command *>(EImpl->getCommand());
330  if (!Cmd) {
331  // if there is no command on the event, we cannot track it with MEventsWeak
332  // as that will leave it with no owner. Track in MEventsShared only if we're
333  // unable to call piQueueFinish during wait.
334  if (is_host() || MEmulateOOO)
335  addSharedEvent(Event);
336  }
337  // As long as the queue supports piQueueFinish we only need to store events
338  // for unenqueued commands and host tasks.
339  else if (is_host() || MEmulateOOO || EImpl->getHandleRef() == nullptr) {
340  std::weak_ptr<event_impl> EventWeakPtr{EImpl};
341  std::lock_guard<std::mutex> Lock{MMutex};
342  MEventsWeak.push_back(std::move(EventWeakPtr));
343  }
344 }
345 
349 void queue_impl::addSharedEvent(const event &Event) {
350  assert(is_host() || MEmulateOOO);
351  std::lock_guard<std::mutex> Lock(MMutex);
352  // Events stored in MEventsShared are not released anywhere else aside from
353  // calls to queue::wait/wait_and_throw, which a user application might not
354  // make, and ~queue_impl(). If the number of events grows large enough,
355  // there's a good chance that most of them are already completed and ownership
356  // of them can be released.
357  const size_t EventThreshold = 128;
358  if (MEventsShared.size() >= EventThreshold) {
359  // Generally, the vector is ordered so that the oldest events are in the
360  // front and the newer events are in the end. So, search to find the first
361  // event that isn't yet complete. All the events prior to that can be
362  // erased. This could leave some few events further on that have completed
363  // not yet erased, but that is OK. This cleanup doesn't have to be perfect.
364  // This also keeps the algorithm linear rather than quadratic because it
365  // doesn't continually recheck things towards the back of the list that
366  // really haven't had time to complete.
367  MEventsShared.erase(
368  MEventsShared.begin(),
369  std::find_if(
370  MEventsShared.begin(), MEventsShared.end(), [](const event &E) {
371  return E.get_info<info::event::command_execution_status>() !=
372  info::event_command_status::complete;
373  }));
374  }
375  MEventsShared.push_back(Event);
376 }
377 
378 void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
379  std::string &Name, int32_t StreamID,
380  uint64_t &IId) {
381  void *TraceEvent = nullptr;
382  (void)CodeLoc;
383  (void)Name;
384  (void)StreamID;
385  (void)IId;
386 #ifdef XPTI_ENABLE_INSTRUMENTATION
387  xpti::trace_event_data_t *WaitEvent = nullptr;
388  if (!xptiTraceEnabled())
389  return TraceEvent;
390 
391  xpti::payload_t Payload;
392  bool HasSourceInfo = false;
393  // We try to create a unique string for the wait() call by combining it with
394  // the queue address
395  xpti::utils::StringHelper NG;
396  Name = NG.nameWithAddress<queue_impl *>("queue.wait", this);
397 
398  if (CodeLoc.fileName()) {
399  // We have source code location information
400  Payload =
401  xpti::payload_t(Name.c_str(), CodeLoc.fileName(), CodeLoc.lineNumber(),
402  CodeLoc.columnNumber(), (void *)this);
403  HasSourceInfo = true;
404  } else {
405  // We have no location information, so we'll use the address of the queue
406  Payload = xpti::payload_t(Name.c_str(), (void *)this);
407  }
408  // wait() calls could be at different user-code locations; We create a new
409  // event based on the code location info and if this has been seen before, a
410  // previously created event will be returned.
411  uint64_t QWaitInstanceNo = 0;
412  WaitEvent = xptiMakeEvent(Name.c_str(), &Payload, xpti::trace_graph_event,
413  xpti_at::active, &QWaitInstanceNo);
414  IId = QWaitInstanceNo;
415  if (WaitEvent) {
416  device D = get_device();
417  std::string DevStr;
418  if (getSyclObjImpl(D)->is_host())
419  DevStr = "HOST";
420  else if (D.is_cpu())
421  DevStr = "CPU";
422  else if (D.is_gpu())
423  DevStr = "GPU";
424  else if (D.is_accelerator())
425  DevStr = "ACCELERATOR";
426  else
427  DevStr = "UNKNOWN";
428  xpti::addMetadata(WaitEvent, "sycl_device", DevStr);
429  if (HasSourceInfo) {
430  xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName());
431  xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName());
432  xpti::addMetadata(WaitEvent, "sym_line_no",
433  static_cast<int32_t>((CodeLoc.lineNumber())));
434  xpti::addMetadata(WaitEvent, "sym_column_no",
435  static_cast<int32_t>((CodeLoc.columnNumber())));
436  }
437  xptiNotifySubscribers(StreamID, xpti::trace_wait_begin, nullptr, WaitEvent,
438  QWaitInstanceNo,
439  static_cast<const void *>(Name.c_str()));
440  TraceEvent = (void *)WaitEvent;
441  }
442 #endif
443  return TraceEvent;
444 }
445 
446 void queue_impl::instrumentationEpilog(void *TelemetryEvent, std::string &Name,
447  int32_t StreamID, uint64_t IId) {
448  (void)TelemetryEvent;
449  (void)Name;
450  (void)StreamID;
451  (void)IId;
452 #ifdef XPTI_ENABLE_INSTRUMENTATION
453  if (!(xptiTraceEnabled() && TelemetryEvent))
454  return;
455  // Close the wait() scope
456  xpti::trace_event_data_t *TraceEvent =
457  (xpti::trace_event_data_t *)TelemetryEvent;
458  xptiNotifySubscribers(StreamID, xpti::trace_wait_end, nullptr, TraceEvent,
459  IId, static_cast<const void *>(Name.c_str()));
460 #endif
461 }
462 
464  (void)CodeLoc;
465 #ifdef XPTI_ENABLE_INSTRUMENTATION
466  void *TelemetryEvent = nullptr;
467  uint64_t IId;
468  std::string Name;
469  int32_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
470  TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
471 #endif
472 
473  std::vector<std::weak_ptr<event_impl>> WeakEvents;
474  std::vector<event> SharedEvents;
475  {
476  std::lock_guard<std::mutex> Lock(MMutex);
477  WeakEvents.swap(MEventsWeak);
478  SharedEvents.swap(MEventsShared);
479  }
480  // If the queue is either a host one or does not support OOO (and we use
481  // multiple in-order queues as a result of that), wait for each event
482  // directly. Otherwise, only wait for unenqueued or host task events, starting
483  // from the latest submitted task in order to minimize total amount of calls,
484  // then handle the rest with piQueueFinish.
485  const bool SupportsPiFinish = !is_host() && !MEmulateOOO;
486  for (auto EventImplWeakPtrIt = WeakEvents.rbegin();
487  EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
488  if (std::shared_ptr<event_impl> EventImplSharedPtr =
489  EventImplWeakPtrIt->lock()) {
490  // A nullptr PI event indicates that piQueueFinish will not cover it,
491  // either because it's a host task event or an unenqueued one.
492  if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef()) {
493  EventImplSharedPtr->wait(EventImplSharedPtr);
494  }
495  }
496  }
497  if (SupportsPiFinish) {
498  const detail::plugin &Plugin = getPlugin();
499  Plugin.call<detail::PiApiKind::piQueueFinish>(getHandleRef());
500  assert(SharedEvents.empty() && "Queues that support calling piQueueFinish "
501  "shouldn't have shared events");
502  } else {
503  for (event &Event : SharedEvents)
504  Event.wait();
505  }
506 
507  std::vector<EventImplPtr> StreamsServiceEvents;
508  {
509  std::lock_guard<std::mutex> Lock(MMutex);
510  StreamsServiceEvents.swap(MStreamsServiceEvents);
511  }
512  for (const EventImplPtr &Event : StreamsServiceEvents)
513  Event->wait(Event);
514 
515 #ifdef XPTI_ENABLE_INSTRUMENTATION
516  instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
517 #endif
518 }
519 
520 pi_native_handle queue_impl::getNative() const {
521  const detail::plugin &Plugin = getPlugin();
522  if (Plugin.getBackend() == backend::opencl)
523  Plugin.call<PiApiKind::piQueueRetain>(MQueues[0]);
524  pi_native_handle Handle{};
525  Plugin.call<PiApiKind::piextQueueGetNativeHandle>(MQueues[0], &Handle);
526  return Handle;
527 }
528 
529 bool queue_impl::ext_oneapi_empty() const {
530  // If we have in-order queue where events are not discarded then just check
531  // the status of the last event.
532  if (isInOrder() && !MDiscardEvents) {
533  std::lock_guard<std::mutex> Lock(MLastEventMtx);
534  return MLastEvent.get_info<info::event::command_execution_status>() ==
535  info::event_command_status::complete;
536  }
537 
538  // Check the status of the backend queue if this is not a host queue.
539  if (!is_host()) {
540  pi_bool IsReady = false;
542  MQueues[0], PI_EXT_ONEAPI_QUEUE_INFO_EMPTY, sizeof(pi_bool), &IsReady,
543  nullptr);
544  if (!IsReady)
545  return false;
546  }
547 
548  // We may have events like host tasks which are not submitted to the backend
549  // queue so we need to get their status separately.
550  std::lock_guard<std::mutex> Lock(MMutex);
551  for (event Event : MEventsShared)
552  if (Event.get_info<info::event::command_execution_status>() !=
553  info::event_command_status::complete)
554  return false;
555 
556  for (auto EventImplWeakPtrIt = MEventsWeak.begin();
557  EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt)
558  if (std::shared_ptr<event_impl> EventImplSharedPtr =
559  EventImplWeakPtrIt->lock())
560  if (EventImplSharedPtr->is_host() &&
561  EventImplSharedPtr
562  ->get_info<info::event::command_execution_status>() !=
563  info::event_command_status::complete)
564  return false;
565 
566  // If we didn't exit early above then it means that all events in the queue
567  // are completed.
568  return true;
569 }
570 
571 } // namespace detail
572 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
573 } // namespace sycl
sycl::_V1::detail::Command
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:99
event_impl.hpp
sycl::_V1::detail::SYCL_STREAM_NAME
constexpr const char * SYCL_STREAM_NAME
Definition: xpti_registry.hpp:29
pi_bool
pi_uint32 pi_bool
Definition: pi.h:131
sycl::_V1::detail::SYCL_MEM_ALLOC_STREAM_NAME
constexpr auto SYCL_MEM_ALLOC_STREAM_NAME
Definition: xpti_registry.hpp:35
device.hpp
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
xpti_registry.hpp
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:465
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, size_t Size)
Definition: memcpy.hpp:16
piextQueueGetNativeHandle
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle)
Gets the native handle of a PI queue object.
Definition: pi_esimd_emulator.cpp:1014
context.hpp
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::detail::plugin::getBackend
backend getBackend(void) const
Definition: plugin.hpp:229
sycl::_V1::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:40
queue_impl.hpp
pi.hpp
sycl::_V1::detail::pi::getPlugin
const plugin & getPlugin()
Definition: pi.cpp:506
sycl::_V1::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
sycl::_V1::ext::intel::experimental::esimd::wait
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
Definition: memory.hpp:307
sycl::_V1::detail::pi::PiResult
::pi_result PiResult
Definition: pi.hpp:122
sycl::_V1::device::is_gpu
bool is_gpu() const
Check if device is a GPU device.
Definition: device.cpp:87
sycl::_V1::detail::getOrWaitEvents
std::vector< RT::PiEvent > getOrWaitEvents(std::vector< sycl::event > DepEvents, std::shared_ptr< sycl::detail::context_impl > Context)
sycl::_V1::detail::code_location::functionName
constexpr const char * functionName() const noexcept
Definition: common.hpp:90
sycl::_V1::device::is_accelerator
bool is_accelerator() const
Check if device is an accelerator device.
Definition: device.cpp:89
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:978
common.hpp
sycl::_V1::detail::code_location::lineNumber
constexpr unsigned long lineNumber() const noexcept
Definition: common.hpp:87
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
Definition: pi.h:373
sycl::_V1::detail::EventImplPtr
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:42
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:133
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:1000
sycl::_V1::detail::queue_impl
Definition: queue_impl.hpp:59
sycl::_V1::detail::code_location::columnNumber
constexpr unsigned long columnNumber() const noexcept
Definition: common.hpp:88
sycl::_V1::detail::code_location
Definition: common.hpp:66
sycl::_V1::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
piQueueGetInfo
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:974
sycl::_V1::device::is_cpu
bool is_cpu() const
Check if device is a CPU device.
Definition: device.cpp:85
sycl::_V1::detail::code_location::fileName
constexpr const char * fileName() const noexcept
Definition: common.hpp:89
sycl::_V1::detail::createDiscardedEvent
static event createDiscardedEvent()
Definition: queue_impl.cpp:57
PI_QUEUE_INFO_REFERENCE_COUNT
@ PI_QUEUE_INFO_REFERENCE_COUNT
Definition: pi.h:369
sycl::_V1::detail::prepareUSMEvent
static event prepareUSMEvent(const std::shared_ptr< detail::queue_impl > &QueueImpl, RT::PiEvent NativeEvent)
Definition: queue_impl.cpp:48
sycl::_V1::detail::pi::PiEvent
::pi_event PiEvent
Definition: pi.hpp:136
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:41
memory_manager.hpp