DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue_impl.hpp
Go to the documentation of this file.
1 //==------------------ queue_impl.hpp - SYCL queue -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <detail/config.hpp>
12 #include <detail/context_impl.hpp>
13 #include <detail/device_impl.hpp>
14 #include <detail/event_impl.hpp>
16 #include <detail/kernel_impl.hpp>
17 #include <detail/plugin.hpp>
19 #include <detail/thread_pool.hpp>
20 #include <sycl/context.hpp>
23 #include <sycl/device.hpp>
24 #include <sycl/event.hpp>
25 #include <sycl/exception.hpp>
26 #include <sycl/exception_list.hpp>
27 #include <sycl/handler.hpp>
30 #include <sycl/property_list.hpp>
31 #include <sycl/stl.hpp>
32 
33 #include <utility>
34 
35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 #include "xpti/xpti_trace_framework.hpp"
37 #include <detail/xpti_registry.hpp>
38 #endif
39 
40 namespace sycl {
42 namespace detail {
43 
44 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
45 using DeviceImplPtr = std::shared_ptr<detail::device_impl>;
46 
48 static constexpr size_t MaxNumQueues = 256;
49 
52 enum class CUDAContextT : char { primary, custom };
53 
55 constexpr CUDAContextT DefaultContextType = CUDAContextT::custom;
56 
58 
59 class queue_impl {
60 public:
61  // \return a default context for the platform if it includes the device
62  // passed and default contexts are enabled, a new context otherwise.
66  context{createSyclObjFromImpl<device>(Device), {}, {}});
67 
68  ContextImplPtr DefaultContext = detail::getSyclObjImpl(
69  Device->get_platform().ext_oneapi_get_default_context());
70  if (DefaultContext->isDeviceValid(Device))
71  return DefaultContext;
73  context{createSyclObjFromImpl<device>(Device), {}, {}});
74  }
82  queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler,
83  const property_list &PropList)
84  : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};
85 
95  queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context,
96  const async_handler &AsyncHandler, const property_list &PropList)
97  : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
98  MPropList(PropList), MHostQueue(MDevice->is_host()),
99  MAssertHappenedBuffer(range<1>{1}),
100  MIsInorder(has_property<property::queue::in_order>()),
101  MDiscardEvents(
102  has_property<ext::oneapi::property::queue::discard_events>()),
103  MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
104  MHasDiscardEventsSupport(MDiscardEvents &&
105  (MHostQueue ? true : MIsInorder)) {
106  // We enable XPTI tracing events using the TLS mechanism; if the code
107  // location data is available, then the tracing data will be rich.
108 #if XPTI_ENABLE_INSTRUMENTATION
109  XPTIScope PrepareNotify((void *)this,
112  (uint16_t)xpti::trace_point_type_t::queue_create,
113  SYCL_STREAM_NAME, "queue_create");
114  // Cache the trace event, stream id and instance IDs for the destructor
115  if (xptiTraceEnabled()) {
116  MTraceEvent = (void *)PrepareNotify.traceEvent();
117  MStreamID = PrepareNotify.streamID();
118  MInstanceID = PrepareNotify.instanceID();
119  }
120  // Add the function to capture meta data for the XPTI trace event
121  PrepareNotify.addMetadata([&](auto TEvent) {
122  xpti::addMetadata(TEvent, "sycl_context",
123  reinterpret_cast<size_t>(MContext->getHandleRef()));
124  if (MDevice) {
125  xpti::addMetadata(TEvent, "sycl_device_name", MDevice->getDeviceName());
126  xpti::addMetadata(
127  TEvent, "sycl_device",
128  reinterpret_cast<size_t>(
129  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
130  }
131  xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
132  });
133  PrepareNotify.notify();
134 #endif
135  if (has_property<property::queue::enable_profiling>()) {
136  if (has_property<ext::oneapi::property::queue::discard_events>())
137  throw sycl::exception(make_error_code(errc::invalid),
138  "Queue cannot be constructed with both of "
139  "discard_events and enable_profiling.");
140  if (!MDevice->has(aspect::queue_profiling))
141  throw sycl::exception(make_error_code(errc::feature_not_supported),
142  "Cannot enable profiling, the associated device "
143  "does not have the queue_profiling aspect");
144  }
145  if (has_property<ext::intel::property::queue::compute_index>()) {
146  int Idx = get_property<ext::intel::property::queue::compute_index>()
147  .get_index();
148  int NumIndices =
149  createSyclObjFromImpl<device>(Device)
150  .get_info<ext::intel::info::device::max_compute_queue_indices>();
151  if (Idx < 0 || Idx >= NumIndices)
152  throw sycl::exception(
153  make_error_code(errc::invalid),
154  "Queue compute index must be a non-negative number less than "
155  "device's number of available compute queue indices.");
156  }
157  if (!Context->isDeviceValid(Device)) {
158  if (!Context->is_host() &&
159  Context->getPlugin().getBackend() == backend::opencl)
160  throw sycl::invalid_object_error(
161  "Queue cannot be constructed with the given context and device "
162  "since the device is not a member of the context (descendants of "
163  "devices from the context are not supported on OpenCL yet).",
164  PI_ERROR_INVALID_DEVICE);
165  throw sycl::invalid_object_error(
166  "Queue cannot be constructed with the given context and device "
167  "since the device is neither a member of the context nor a "
168  "descendant of its member.",
169  PI_ERROR_INVALID_DEVICE);
170  }
171  if (!MHostQueue) {
172  const QueueOrder QOrder =
173  MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO;
174  MQueues.push_back(createQueue(QOrder));
175  // This section is the second part of the instrumentation that uses the
176  // tracepoint information and notifies
177  }
178  }
179 
187  const async_handler &AsyncHandler)
188  : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(),
189  MHostQueue(false), MAssertHappenedBuffer(range<1>{1}),
190  MIsInorder(has_property<property::queue::in_order>()),
191  MDiscardEvents(
192  has_property<ext::oneapi::property::queue::discard_events>()),
193  MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
194  MHasDiscardEventsSupport(MDiscardEvents &&
195  (MHostQueue ? true : MIsInorder)) {
196  // The following commented section provides a guideline on how to use the
197  // TLS enabled mechanism to create a tracepoint and notify using XPTI. This
198  // is the prolog section and the epilog section will initiate the
199  // notification.
200 #if XPTI_ENABLE_INSTRUMENTATION
201  XPTIScope PrepareNotify((void *)this,
204  (uint16_t)xpti::trace_point_type_t::queue_create,
205  SYCL_STREAM_NAME, "queue_create");
206  if (xptiTraceEnabled()) {
207  // Cache the trace event, stream id and instance IDs for the destructor
208  MTraceEvent = (void *)PrepareNotify.traceEvent();
209  MStreamID = PrepareNotify.streamID();
210  MInstanceID = PrepareNotify.instanceID();
211  }
212  // Add the function to capture meta data for the XPTI trace event
213  PrepareNotify.addMetadata([&](auto TEvent) {
214  xpti::addMetadata(TEvent, "sycl_context",
215  reinterpret_cast<size_t>(MContext->getHandleRef()));
216  if (MDevice) {
217  xpti::addMetadata(TEvent, "sycl_device_name", MDevice->getDeviceName());
218  xpti::addMetadata(
219  TEvent, "sycl_device",
220  reinterpret_cast<size_t>(
221  MDevice->is_host() ? 0 : MDevice->getHandleRef()));
222  }
223  xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
224  });
225  PrepareNotify.notify();
226 #endif
227  if (has_property<ext::oneapi::property::queue::discard_events>() &&
228  has_property<property::queue::enable_profiling>()) {
229  throw sycl::exception(make_error_code(errc::invalid),
230  "Queue cannot be constructed with both of "
231  "discard_events and enable_profiling.");
232  }
233 
234  MQueues.push_back(pi::cast<RT::PiQueue>(PiQueue));
235 
236  RT::PiDevice DevicePI{};
237  const detail::plugin &Plugin = getPlugin();
238  // TODO catch an exception and put it to list of asynchronous exceptions
239  Plugin.call<PiApiKind::piQueueGetInfo>(
240  MQueues[0], PI_QUEUE_INFO_DEVICE, sizeof(DevicePI), &DevicePI, nullptr);
241  MDevice = MContext->findMatchingDeviceImpl(DevicePI);
242  if (MDevice == nullptr) {
243  throw sycl::exception(
244  make_error_code(errc::invalid),
245  "Device provided by native Queue not found in Context.");
246  }
247  }
248 
250  // The trace event created in the constructor should be active through the
251  // lifetime of the queue object as member variables when ABI breakage is
252  // allowed. This example shows MTraceEvent as a member variable.
253 #if XPTI_ENABLE_INSTRUMENTATION
254  if (xptiTraceEnabled()) {
255  // Used cached information in member variables
256  xptiNotifySubscribers(
257  MStreamID, (uint16_t)xpti::trace_point_type_t::queue_destroy, nullptr,
258  (xpti::trace_event_data_t *)MTraceEvent, MInstanceID,
259  static_cast<const void *>("queue_destroy"));
260  }
261 #endif
262  throw_asynchronous();
263  if (!MHostQueue) {
265  }
266  }
267 
269  cl_command_queue get() {
270  if (MHostQueue) {
271  throw invalid_object_error(
272  "This instance of queue doesn't support OpenCL interoperability",
273  PI_ERROR_INVALID_QUEUE);
274  }
275  getPlugin().call<PiApiKind::piQueueRetain>(MQueues[0]);
276  return pi::cast<cl_command_queue>(MQueues[0]);
277  }
278 
281  return createSyclObjFromImpl<context>(MContext);
282  }
283 
284  const plugin &getPlugin() const { return MContext->getPlugin(); }
285 
286  const ContextImplPtr &getContextImplPtr() const { return MContext; }
287 
288  const DeviceImplPtr &getDeviceImplPtr() const { return MDevice; }
289 
291  device get_device() const { return createSyclObjFromImpl<device>(MDevice); }
292 
294  bool is_host() const { return MHostQueue; }
295 
297  bool has_discard_events_support() const { return MHasDiscardEventsSupport; }
298 
299  bool isInOrder() const { return MIsInorder; }
300 
304  template <typename Param> typename Param::return_type get_info() const;
305 
306  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
307 
321  event submit(const std::function<void(handler &)> &CGF,
322  const std::shared_ptr<queue_impl> &Self,
323  const std::shared_ptr<queue_impl> &SecondQueue,
324  const detail::code_location &Loc,
325  const SubmitPostProcessF *PostProcess = nullptr) {
326  try {
327  return submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess);
328  } catch (...) {
329  return SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, Loc,
330  PostProcess);
331  }
332  }
333 
342  event submit(const std::function<void(handler &)> &CGF,
343  const std::shared_ptr<queue_impl> &Self,
344  const detail::code_location &Loc,
345  const SubmitPostProcessF *PostProcess = nullptr) {
346  return submit_impl(CGF, Self, Self, nullptr, Loc, PostProcess);
347  }
348 
354  void wait(const detail::code_location &Loc = {});
355 
357  exception_list getExceptionList() const { return MExceptions; }
358 
360  void wait_and_throw(const detail::code_location &Loc = {}) {
361  wait(Loc);
362  throw_asynchronous();
363  }
364 
373  if (!MAsyncHandler)
374  return;
375 
376  exception_list Exceptions;
377  {
378  std::lock_guard<std::mutex> Lock(MMutex);
379  std::swap(Exceptions, MExceptions);
380  }
381  // Unlock the mutex before calling user-provided handler to avoid
382  // potential deadlock if the same queue is somehow referenced in the
383  // handler.
384  if (Exceptions.size())
385  MAsyncHandler(std::move(Exceptions));
386  }
387 
393  RT::PiQueueProperties CreationFlags = 0;
394 
395  if (Order == QueueOrder::OOO) {
397  }
398  if (MPropList.has_property<property::queue::enable_profiling>()) {
399  CreationFlags |= PI_QUEUE_FLAG_PROFILING_ENABLE;
400  }
401  if (MPropList.has_property<
402  ext::oneapi::cuda::property::queue::use_default_stream>()) {
403  CreationFlags |= __SYCL_PI_CUDA_USE_DEFAULT_STREAM;
404  }
405  if (MPropList
406  .has_property<ext::oneapi::property::queue::discard_events>()) {
407  // Pass this flag to the Level Zero plugin to be able to check it from
408  // queue property.
410  }
411  // Track that priority settings are not ambiguous.
412  bool PrioritySeen = false;
413  if (MPropList
414  .has_property<ext::oneapi::property::queue::priority_normal>()) {
415  // Normal is the default priority, don't pass anything.
416  PrioritySeen = true;
417  }
418  if (MPropList.has_property<ext::oneapi::property::queue::priority_low>()) {
419  if (PrioritySeen) {
420  throw sycl::exception(
421  make_error_code(errc::invalid),
422  "Queue cannot be constructed with different priorities.");
423  }
424  CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW;
425  PrioritySeen = true;
426  }
427  if (MPropList.has_property<ext::oneapi::property::queue::priority_high>()) {
428  if (PrioritySeen) {
429  throw sycl::exception(
430  make_error_code(errc::invalid),
431  "Queue cannot be constructed with different priorities.");
432  }
434  PrioritySeen = true;
435  }
436  RT::PiQueue Queue{};
437  RT::PiContext Context = MContext->getHandleRef();
438  RT::PiDevice Device = MDevice->getHandleRef();
439  const detail::plugin &Plugin = getPlugin();
440 
441  assert(Plugin.getBackend() == MDevice->getPlugin().getBackend());
442  RT::PiQueueProperties Properties[] = {PI_QUEUE_FLAGS, CreationFlags, 0, 0,
443  0};
444  if (has_property<ext::intel::property::queue::compute_index>()) {
445  int Idx = get_property<ext::intel::property::queue::compute_index>()
446  .get_index();
447  Properties[2] = PI_QUEUE_COMPUTE_INDEX;
448  Properties[3] = static_cast<RT::PiQueueProperties>(Idx);
449  }
451  Context, Device, Properties, &Queue);
452 
453  // If creating out-of-order queue failed and this property is not
454  // supported (for example, on FPGA), it will return
455  // PI_ERROR_INVALID_QUEUE_PROPERTIES and will try to create in-order queue.
456  if (!MEmulateOOO && Error == PI_ERROR_INVALID_QUEUE_PROPERTIES) {
457  MEmulateOOO = true;
458  Queue = createQueue(QueueOrder::Ordered);
459  } else {
460  Plugin.checkPiResult(Error);
461  }
462 
463  return Queue;
464  }
465 
469  RT::PiQueue *PIQ = nullptr;
470  bool ReuseQueue = false;
471  {
472  std::lock_guard<std::mutex> Lock(MMutex);
473 
474  // To achieve parallelism for FPGA with in order execution model with
475  // possibility of two kernels to share data with each other we shall
476  // create a queue for every kernel enqueued.
477  if (MQueues.size() < MaxNumQueues) {
478  MQueues.push_back({});
479  PIQ = &MQueues.back();
480  } else {
481  // If the limit of OpenCL queues is going to be exceeded - take the
482  // earliest used queue, wait until it finished and then reuse it.
483  PIQ = &MQueues[MNextQueueIdx];
484  MNextQueueIdx = (MNextQueueIdx + 1) % MaxNumQueues;
485  ReuseQueue = true;
486  }
487  }
488 
489  if (!ReuseQueue)
490  *PIQ = createQueue(QueueOrder::Ordered);
491  else
493 
494  return *PIQ;
495  }
496 
500  if (!MEmulateOOO)
501  return MQueues[0];
502 
503  return getExclusiveQueueHandleRef();
504  }
505 
508  template <typename propertyT> bool has_property() const noexcept {
509  return MPropList.has_property<propertyT>();
510  }
511 
515  template <typename propertyT> propertyT get_property() const {
516  return MPropList.get_property<propertyT>();
517  }
518 
528  event memset(const std::shared_ptr<queue_impl> &Self, void *Ptr, int Value,
529  size_t Count, const std::vector<event> &DepEvents);
540  event memcpy(const std::shared_ptr<queue_impl> &Self, void *Dest,
541  const void *Src, size_t Count,
542  const std::vector<event> &DepEvents);
553  event mem_advise(const std::shared_ptr<queue_impl> &Self, const void *Ptr,
554  size_t Length, pi_mem_advice Advice,
555  const std::vector<event> &DepEvents);
556 
560  void reportAsyncException(const std::exception_ptr &ExceptionPtr) {
561  std::lock_guard<std::mutex> Lock(MMutex);
562  MExceptions.PushBack(ExceptionPtr);
563  }
564 
566  return GlobalHandler::instance().getHostTaskThreadPool();
567  }
568 
572  pi_native_handle getNative() const;
573 
575  return MAssertHappenedBuffer;
576  }
577 
579  std::lock_guard<std::mutex> Lock(MMutex);
580  MStreamsServiceEvents.push_back(Event);
581  }
582 
583  bool ext_oneapi_empty() const;
584 
589  return detail::Scheduler::getInstance().isInFusionMode(
590  std::hash<typename std::shared_ptr<queue_impl>::element_type *>()(
591  this));
592  }
593 
594  event memcpyToDeviceGlobal(const std::shared_ptr<queue_impl> &Self,
595  void *DeviceGlobalPtr, const void *Src,
596  bool IsDeviceImageScope, size_t NumBytes,
597  size_t Offset,
598  const std::vector<event> &DepEvents);
599  event memcpyFromDeviceGlobal(const std::shared_ptr<queue_impl> &Self,
600  void *Dest, const void *DeviceGlobalPtr,
601  bool IsDeviceImageScope, size_t NumBytes,
602  size_t Offset,
603  const std::vector<event> &DepEvents);
604 
605 protected:
606  // template is needed for proper unit testing
607  template <typename HandlerType = handler>
608  void finalizeHandler(HandlerType &Handler, const CG::CGTYPE &Type,
609  event &EventRet) {
610  if (MIsInorder) {
611 
612  auto IsExpDepManaged = [](const CG::CGTYPE &Type) {
613  return (Type == CG::CGTYPE::CodeplayHostTask ||
614  Type == CG::CGTYPE::CodeplayInteropTask);
615  };
616 
617  // Accessing and changing of an event isn't atomic operation.
618  // Hence, here is the lock for thread-safety.
619  std::lock_guard<std::mutex> Lock{MLastEventMtx};
620 
621  if (MLastCGType == CG::CGTYPE::None)
622  MLastCGType = Type;
623  // Also handles case when sync model changes. E.g. Last is host, new is
624  // kernel.
625  bool NeedSeparateDependencyMgmt =
626  IsExpDepManaged(Type) || IsExpDepManaged(MLastCGType);
627 
628  if (NeedSeparateDependencyMgmt)
629  Handler.depends_on(MLastEvent);
630 
631  EventRet = Handler.finalize();
632 
633  MLastEvent = EventRet;
634  MLastCGType = Type;
635  } else
636  EventRet = Handler.finalize();
637  }
638 
639 protected:
650  event submit_impl(const std::function<void(handler &)> &CGF,
651  const std::shared_ptr<queue_impl> &Self,
652  const std::shared_ptr<queue_impl> &PrimaryQueue,
653  const std::shared_ptr<queue_impl> &SecondaryQueue,
654  const detail::code_location &Loc,
655  const SubmitPostProcessF *PostProcess) {
656  handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue);
657  Handler.saveCodeLoc(Loc);
658  CGF(Handler);
659 
660  // Scheduler will later omit events, that are not required to execute tasks.
661  // Host and interop tasks, however, are not submitted to low-level runtimes
662  // and require separate dependency management.
663  const CG::CGTYPE Type = Handler.getType();
664  event Event = detail::createSyclObjFromImpl<event>(
665  std::make_shared<detail::event_impl>());
666 
667  if (PostProcess) {
668  bool IsKernel = Type == CG::Kernel;
669  bool KernelUsesAssert = false;
670 
671  if (IsKernel)
672  // Kernel only uses assert if it's non interop one
673  KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) &&
674  ProgramManager::getInstance().kernelUsesAssert(
675  Handler.MOSModuleHandle, Handler.MKernelName);
676 
677  finalizeHandler(Handler, Type, Event);
678 
679  (*PostProcess)(IsKernel, KernelUsesAssert, Event);
680  } else
681  finalizeHandler(Handler, Type, Event);
682 
683  addEvent(Event);
684  return Event;
685  }
686 
687  // When instrumentation is enabled emits trace event for wait begin and
688  // returns the telemetry event generated for the wait
689  void *instrumentationProlog(const detail::code_location &CodeLoc,
690  std::string &Name, int32_t StreamID,
691  uint64_t &iid);
692  // Uses events generated by the Prolog and emits wait done event
693  void instrumentationEpilog(void *TelementryEvent, std::string &Name,
694  int32_t StreamID, uint64_t IId);
695 
701  void addSharedEvent(const event &Event);
702 
706  void addEvent(const event &Event);
707 
709  mutable std::mutex MMutex;
710 
713 
715  std::vector<std::weak_ptr<event_impl>> MEventsWeak;
716 
720  std::vector<event> MEventsShared;
724 
726  std::vector<RT::PiQueue> MQueues;
728  size_t MNextQueueIdx = 0;
729 
730  const bool MHostQueue = false;
733  bool MEmulateOOO = false;
734 
735  // Buffer to store assert failure descriptor
737 
738  // This event is employed for enhanced dependency tracking with in-order queue
739  // Access to the event should be guarded with MLastEventMtx
740  event MLastEvent;
741  mutable std::mutex MLastEventMtx;
742  // Used for in-order queues in pair with MLastEvent
743  // Host tasks are explicitly synchronized in RT, pi tasks - implicitly by
744  // backend. Using type to setup explicit sync between host and pi tasks.
745  CG::CGTYPE MLastCGType = CG::CGTYPE::None;
746 
747  const bool MIsInorder;
748 
749  std::vector<EventImplPtr> MStreamsServiceEvents;
750 
751  // All member variable defined here are needed for the SYCL instrumentation
752  // layer. Do not guard these variables below with XPTI_ENABLE_INSTRUMENTATION
753  // to ensure we have the same object layout when the macro in the library and
754  // SYCL app are not the same.
755  void *MTraceEvent = nullptr;
757  uint8_t MStreamID;
759  uint64_t MInstanceID = 0;
760 
761 public:
762  // Queue constructed with the discard_events property
763  const bool MDiscardEvents;
765 
766 protected:
767  // This flag says if we can discard events based on a queue "setup" which will
768  // be common for all operations submitted to the queue. This is a must
769  // condition for discarding, but even if it's true, in some cases, we won't be
770  // able to discard events, because the final decision is made right before the
771  // operation itself.
773 };
774 
775 } // namespace detail
776 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
777 } // namespace sycl
sycl::_V1::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:24
sycl::_V1::detail::pi::PiQueueProperties
::pi_queue_properties PiQueueProperties
Definition: pi.hpp:133
property_list.hpp
sycl::_V1::detail::queue_impl::MStreamsServiceEvents
std::vector< EventImplPtr > MStreamsServiceEvents
Definition: queue_impl.hpp:749
PI_QUEUE_COMPUTE_INDEX
constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX
Definition: pi.h:622
event_impl.hpp
sycl::_V1::detail::queue_impl::~queue_impl
~queue_impl()
Definition: queue_impl.hpp:249
sycl::_V1::detail::queue_impl::getPlugin
const plugin & getPlugin() const
Definition: queue_impl.hpp:284
sycl::_V1::detail::SYCL_STREAM_NAME
constexpr const char * SYCL_STREAM_NAME
Definition: xpti_registry.hpp:29
sycl::_V1::detail::QueueOrder
QueueOrder
Definition: queue_impl.hpp:57
context_impl.hpp
sycl::_V1::detail::queue_impl::MQueues
std::vector< RT::PiQueue > MQueues
List of queues created for FPGA device from a single SYCL queue.
Definition: queue_impl.hpp:726
sycl::_V1::detail::ContextImplPtr
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:30
sycl::_V1::detail::queue_impl::getExceptionList
exception_list getExceptionList() const
Definition: queue_impl.hpp:357
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:92
sycl::_V1::detail::queue_impl::MExceptions
exception_list MExceptions
Definition: queue_impl.hpp:721
config.hpp
stl.hpp
sycl::_V1::detail::queue_impl::MMutex
std::mutex MMutex
Protects all the fields that can be changed by class' methods.
Definition: queue_impl.hpp:709
device.hpp
sycl::_V1::detail::plugin::checkPiResult
void checkPiResult(RT::PiResult pi_result) const
Checks return value from PI calls.
Definition: plugin.hpp:116
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
sycl::_V1::detail::queue_impl::MAssertHappenedBuffer
buffer< AssertHappened, 1 > MAssertHappenedBuffer
Definition: queue_impl.hpp:736
sycl::_V1::buffer
Defines a shared array that can be used by kernels in queues.
Definition: buffer.hpp:37
xpti_registry.hpp
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:465
sycl::_V1::detail::DefaultContextType
constexpr CUDAContextT DefaultContextType
Default context type created for CUDA backend.
Definition: queue_impl.hpp:55
sycl::_V1::detail::queue_impl::is_host
bool is_host() const
Definition: queue_impl.hpp:294
sycl::_V1::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:59
sycl::_V1::detail::memcpy
void memcpy(void *Dst, const void *Src, size_t Size)
Definition: memcpy.hpp:16
sycl::_V1::detail::queue_impl::get_property
propertyT get_property() const
Definition: queue_impl.hpp:515
context_properties.hpp
PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
Definition: pi.h:630
sycl::_V1::detail::pi::PiDevice
::pi_device PiDevice
Definition: pi.hpp:124
context.hpp
sycl::_V1::detail::SYCLConfig
Definition: config.hpp:110
sycl::_V1::detail::queue_impl::get
cl_command_queue get()
Definition: queue_impl.hpp:269
sycl::_V1::detail::queue_impl::MContext
const ContextImplPtr MContext
Definition: queue_impl.hpp:712
event.hpp
piQueueRelease
pi_result piQueueRelease(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:986
sycl::_V1::detail::queue_impl::submit
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue_impl.hpp:321
sycl::_V1::detail::queue_impl::throw_asynchronous
void throw_asynchronous()
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue_impl.hpp:372
cuda_definitions.hpp
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
device_impl.hpp
plugin.hpp
sycl::_V1::detail::plugin::getBackend
backend getBackend(void) const
Definition: plugin.hpp:229
sycl::_V1::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:40
sycl::_V1::detail::queue_impl::getThreadPool
ThreadPool & getThreadPool()
Definition: queue_impl.hpp:565
sycl::_V1::detail::queue_impl::MLastEvent
event MLastEvent
Definition: queue_impl.hpp:740
sycl::_V1::detail::queue_impl::getExclusiveQueueHandleRef
RT::PiQueue & getExclusiveQueueHandleRef()
Definition: queue_impl.hpp:468
scheduler.hpp
sycl::_V1::detail::reduction::finalizeHandler
void finalizeHandler(handler &CGH)
Definition: reduction.hpp:1082
__SYCL_PI_CUDA_USE_DEFAULT_STREAM
#define __SYCL_PI_CUDA_USE_DEFAULT_STREAM
Definition: cuda_definitions.hpp:22
sycl::_V1::detail::pi::getPlugin
const plugin & getPlugin()
Definition: pi.cpp:506
sycl::_V1::exception_list
A list of asynchronous exceptions.
Definition: exception_list.hpp:31
sycl::_V1::range
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: buffer.hpp:28
sycl::_V1::detail::queue_impl::submit
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue_impl.hpp:342
sycl::_V1::detail::queue_impl::wait_and_throw
void wait_and_throw(const detail::code_location &Loc={})
Definition: queue_impl.hpp:360
sycl::_V1::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
sycl::_V1::detail::queue_impl::submit_impl
event submit_impl(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &PrimaryQueue, const std::shared_ptr< queue_impl > &SecondaryQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess)
Performs command group submission to the queue.
Definition: queue_impl.hpp:650
sycl::_V1::detail::queue_impl::has_property
bool has_property() const noexcept
Definition: queue_impl.hpp:508
sycl::_V1::detail::queue_impl::MDiscardEvents
const bool MDiscardEvents
Definition: queue_impl.hpp:763
sycl::_V1::detail::pi::PiResult
::pi_result PiResult
Definition: pi.hpp:122
PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
Definition: pi.h:628
sycl::_V1::detail::queue_impl::MStreamID
uint8_t MStreamID
The stream under which the traces are emitted from the queue object.
Definition: queue_impl.hpp:757
sycl::_V1::detail::queue_impl::getDefaultOrNew
static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device)
Definition: queue_impl.hpp:63
sycl::_V1::detail::queue_impl::MIsProfilingEnabled
const bool MIsProfilingEnabled
Definition: queue_impl.hpp:764
sycl::_V1::detail::queue_impl::registerStreamServiceEvent
void registerStreamServiceEvent(const EventImplPtr &Event)
Definition: queue_impl.hpp:578
sycl::_V1::detail::queue_impl::MHasDiscardEventsSupport
const bool MHasDiscardEventsSupport
Definition: queue_impl.hpp:772
sycl::_V1::detail::queue_impl::getAssertHappenedBuffer
buffer< AssertHappened, 1 > & getAssertHappenedBuffer()
Definition: queue_impl.hpp:574
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:978
sycl::_V1::detail::pi::PiContext
::pi_context PiContext
Definition: pi.hpp:128
sycl::_V1::handler
Command group handler class.
Definition: handler.hpp:315
sycl::_V1::detail::queue_impl::MIsInorder
const bool MIsInorder
Definition: queue_impl.hpp:747
sycl::_V1::detail::OOO
@ OOO
Definition: queue_impl.hpp:57
sycl::_V1::detail::pi::PiQueue
::pi_queue PiQueue
Definition: pi.hpp:132
global_handler.hpp
sycl::_V1::detail::queue_impl::get_device
device get_device() const
Definition: queue_impl.hpp:291
sycl::_V1::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:49
sycl::_V1::detail::queue_impl::getDeviceImplPtr
const DeviceImplPtr & getDeviceImplPtr() const
Definition: queue_impl.hpp:288
PI_QUEUE_INFO_DEVICE
@ PI_QUEUE_INFO_DEVICE
Definition: pi.h:366
sycl::_V1::detail::queue_impl::reportAsyncException
void reportAsyncException(const std::exception_ptr &ExceptionPtr)
Puts exception to the list of asynchronous ecxeptions.
Definition: queue_impl.hpp:560
sycl::_V1::detail::EventImplPtr
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:42
queue_properties.hpp
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:133
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:1000
sycl::_V1::detail::queue_impl
Definition: queue_impl.hpp:59
sycl::_V1::detail::queue_impl::MPropList
const property_list MPropList
Definition: queue_impl.hpp:723
sycl::_V1::detail::queue_impl::MAsyncHandler
const async_handler MAsyncHandler
Definition: queue_impl.hpp:722
sycl::_V1::detail::queue_impl::has_discard_events_support
bool has_discard_events_support() const
Definition: queue_impl.hpp:297
sycl::_V1::detail::CG::CGTYPE
CGTYPE
Type of the command group.
Definition: cg.hpp:55
sycl::_V1::detail::plugin::call_nocheck
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:170
PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
Definition: pi.h:629
sycl::_V1::detail::queue_impl::getContextImplPtr
const ContextImplPtr & getContextImplPtr() const
Definition: queue_impl.hpp:286
sycl::_V1::detail::CUDAContextT
CUDAContextT
Possible CUDA context types supported by PI CUDA backend TODO: Implement this as a property once ther...
Definition: queue_impl.hpp:52
sycl::_V1::detail::code_location
Definition: common.hpp:66
sycl::_V1::detail::queue_impl::queue_impl
queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue with an async_handler and property_list provided form a device and a context.
Definition: queue_impl.hpp:95
sycl::_V1::detail::queue_impl::MLastEventMtx
std::mutex MLastEventMtx
Definition: queue_impl.hpp:741
handler.hpp
sycl::_V1::exception_list::size
size_type size() const
Definition: exception_list.cpp:17
exception.hpp
sycl::_V1::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
piQueueGetInfo
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:974
sycl::_V1::detail::queue_impl::createQueue
RT::PiQueue createQueue(QueueOrder Order)
Creates PI queue.
Definition: queue_impl.hpp:392
sycl::_V1::detail::queue_impl::get_context
context get_context() const
Definition: queue_impl.hpp:280
sycl::_V1::detail::queue_impl::SubmitPostProcessF
std::function< void(bool, bool, event &)> SubmitPostProcessF
Definition: queue_impl.hpp:306
sycl::_V1::detail::queue_impl::finalizeHandler
void finalizeHandler(HandlerType &Handler, const CG::CGTYPE &Type, event &EventRet)
Definition: queue_impl.hpp:608
sycl::_V1::async_handler
std::function< void(sycl::exception_list)> async_handler
Definition: exception_list.hpp:54
sycl::_V1::detail::ThreadPool
Definition: thread_pool.hpp:25
exception_list.hpp
sycl::_V1::detail::queue_impl::is_in_fusion_mode
bool is_in_fusion_mode()
Check whether the queue is in fusion mode.
Definition: queue_impl.hpp:588
PI_QUEUE_FLAG_PROFILING_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
Definition: pi.h:625
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
Definition: pi.h:624
sycl::_V1::detail::CUDAContextT::primary
@ primary
kernel_impl.hpp
sycl::_V1::detail::MaxNumQueues
static constexpr size_t MaxNumQueues
Sets max number of queues supported by FPGA RT.
Definition: queue_impl.hpp:48
assert_happened.hpp
sycl::_V1::detail::queue_impl::getHandleRef
RT::PiQueue & getHandleRef()
Definition: queue_impl.hpp:499
sycl::_V1::detail::queue_impl::MEventsWeak
std::vector< std::weak_ptr< event_impl > > MEventsWeak
These events are tracked, but not owned, by the queue.
Definition: queue_impl.hpp:715
thread_pool.hpp
sycl::_V1::detail::queue_impl::MDevice
DeviceImplPtr MDevice
Definition: queue_impl.hpp:711
piextQueueCreate
pi_result piextQueueCreate(pi_context context, pi_device device, pi_queue_properties *properties, pi_queue *queue)
Definition: pi_esimd_emulator.cpp:932
sycl::_V1::detail::queue_impl::queue_impl
queue_impl(RT::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler)
Constructs a SYCL queue from plugin interoperability handle.
Definition: queue_impl.hpp:186
sycl::_V1::detail::queue_impl::queue_impl
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue from a device using an async_handler and property_list provided.
Definition: queue_impl.hpp:82
sycl::_V1::detail::queue_impl::isInOrder
bool isInOrder() const
Definition: queue_impl.hpp:299
sycl::_V1::detail::Ordered
@ Ordered
Definition: queue_impl.hpp:57
sycl::_V1::detail::queue_impl::MEventsShared
std::vector< event > MEventsShared
Events without data dependencies (such as USM) need an owner, additionally, USM operations are not ad...
Definition: queue_impl.hpp:720
sycl::_V1::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
PI_QUEUE_FLAGS
constexpr pi_queue_properties PI_QUEUE_FLAGS
Definition: pi.h:621
sycl::_V1::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:41