DPC++ Runtime
Runtime libraries for oneAPI DPC++
queue_impl.hpp
Go to the documentation of this file.
1 //==------------------ queue_impl.hpp - SYCL queue -------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/sycl/context.hpp>
14 #include <CL/sycl/device.hpp>
15 #include <CL/sycl/event.hpp>
16 #include <CL/sycl/exception.hpp>
18 #include <CL/sycl/handler.hpp>
22 #include <CL/sycl/stl.hpp>
23 #include <detail/config.hpp>
24 #include <detail/context_impl.hpp>
25 #include <detail/device_impl.hpp>
26 #include <detail/event_impl.hpp>
28 #include <detail/kernel_impl.hpp>
29 #include <detail/plugin.hpp>
31 #include <detail/thread_pool.hpp>
32 
33 #include <utility>
34 
36 namespace sycl {
37 namespace detail {
38 
39 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
40 using DeviceImplPtr = std::shared_ptr<detail::device_impl>;
41 
43 static constexpr size_t MaxNumQueues = 256;
44 
47 enum class CUDAContextT : char { primary, custom };
48 
50 constexpr CUDAContextT DefaultContextType = CUDAContextT::custom;
51 
53 
54 class queue_impl {
55 public:
56  // \return a default context for the platform if it includes the device
57  // passed and default contexts are enabled, a new context otherwise.
61  context{createSyclObjFromImpl<device>(Device), {}, {}});
62 
63  ContextImplPtr DefaultContext = detail::getSyclObjImpl(
64  Device->get_platform().ext_oneapi_get_default_context());
65 
66  if (DefaultContext->hasDevice(Device))
67  return DefaultContext;
68 
70  context{createSyclObjFromImpl<device>(Device), {}, {}});
71  }
79  queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler,
80  const property_list &PropList)
81  : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};
82 
92  queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context,
93  const async_handler &AsyncHandler, const property_list &PropList)
94  : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
95  MPropList(PropList), MHostQueue(MDevice->is_host()),
96  MAssertHappenedBuffer(range<1>{1}),
97  MIsInorder(has_property<property::queue::in_order>()),
98  MDiscardEvents(
99  has_property<ext::oneapi::property::queue::discard_events>()),
100  MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
101  MHasDiscardEventsSupport(
102  MDiscardEvents &&
103  (MHostQueue ? true
104  : (MIsInorder && getPlugin().getBackend() !=
105  backend::ext_oneapi_level_zero))) {
106  if (has_property<ext::oneapi::property::queue::discard_events>() &&
107  has_property<property::queue::enable_profiling>()) {
108  throw sycl::exception(make_error_code(errc::invalid),
109  "Queue cannot be constructed with both of "
110  "discard_events and enable_profiling.");
111  }
112  if (!Context->hasDevice(Device))
113  throw cl::sycl::invalid_object_error(
114  "Queue cannot be constructed with the given context and device "
115  "as the context does not contain the given device.",
117  if (!MHostQueue) {
118  const QueueOrder QOrder =
119  MPropList.has_property<property::queue::in_order>()
121  : QueueOrder::OOO;
122  MQueues.push_back(createQueue(QOrder));
123  }
124  }
125 
133  const async_handler &AsyncHandler)
134  : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(),
135  MHostQueue(false), MAssertHappenedBuffer(range<1>{1}),
136  MIsInorder(has_property<property::queue::in_order>()),
137  MDiscardEvents(
138  has_property<ext::oneapi::property::queue::discard_events>()),
139  MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
140  MHasDiscardEventsSupport(
141  MDiscardEvents &&
142  (MHostQueue ? true
143  : (MIsInorder && getPlugin().getBackend() !=
144  backend::ext_oneapi_level_zero))) {
145  if (has_property<ext::oneapi::property::queue::discard_events>() &&
146  has_property<property::queue::enable_profiling>()) {
147  throw sycl::exception(make_error_code(errc::invalid),
148  "Queue cannot be constructed with both of "
149  "discard_events and enable_profiling.");
150  }
151 
152  MQueues.push_back(pi::cast<RT::PiQueue>(PiQueue));
153 
154  RT::PiDevice Device{};
155  const detail::plugin &Plugin = getPlugin();
156  // TODO catch an exception and put it to list of asynchronous exceptions
157  Plugin.call<PiApiKind::piQueueGetInfo>(MQueues[0], PI_QUEUE_INFO_DEVICE,
158  sizeof(Device), &Device, nullptr);
159  MDevice =
160  DeviceImplPtr(new device_impl(Device, Context->getPlatformImpl()));
161  }
162 
164  throw_asynchronous();
165  if (!MHostQueue) {
167  }
168  }
169 
171  cl_command_queue get() {
172  if (MHostQueue) {
173  throw invalid_object_error(
174  "This instance of queue doesn't support OpenCL interoperability",
176  }
177  getPlugin().call<PiApiKind::piQueueRetain>(MQueues[0]);
178  return pi::cast<cl_command_queue>(MQueues[0]);
179  }
180 
183  return createSyclObjFromImpl<context>(MContext);
184  }
185 
186  const plugin &getPlugin() const { return MContext->getPlugin(); }
187 
188  const ContextImplPtr &getContextImplPtr() const { return MContext; }
189 
190  const DeviceImplPtr &getDeviceImplPtr() const { return MDevice; }
191 
193  device get_device() const { return createSyclObjFromImpl<device>(MDevice); }
194 
196  bool is_host() const { return MHostQueue; }
197 
199  bool has_discard_events_support() const { return MHasDiscardEventsSupport; }
200 
204  template <info::queue Param>
205  typename info::param_traits<info::queue, Param>::return_type get_info() const;
206 
207  using SubmitPostProcessF = std::function<void(bool, bool, event &)>;
208 
222  event submit(const std::function<void(handler &)> &CGF,
223  const std::shared_ptr<queue_impl> &Self,
224  const std::shared_ptr<queue_impl> &SecondQueue,
225  const detail::code_location &Loc,
226  const SubmitPostProcessF *PostProcess = nullptr) {
227  try {
228  return submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess);
229  } catch (...) {
230  {
231  std::lock_guard<std::mutex> Lock(MMutex);
232  MExceptions.PushBack(std::current_exception());
233  }
234  return SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, Loc,
235  PostProcess);
236  }
237  }
238 
247  event submit(const std::function<void(handler &)> &CGF,
248  const std::shared_ptr<queue_impl> &Self,
249  const detail::code_location &Loc,
250  const SubmitPostProcessF *PostProcess = nullptr) {
251  return submit_impl(CGF, Self, Self, nullptr, Loc, PostProcess);
252  }
253 
259  void wait(const detail::code_location &Loc = {});
260 
262  exception_list getExceptionList() const { return MExceptions; }
263 
265  void wait_and_throw(const detail::code_location &Loc = {}) {
266  wait(Loc);
267  throw_asynchronous();
268  }
269 
278  if (!MAsyncHandler)
279  return;
280 
281  exception_list Exceptions;
282  {
283  std::lock_guard<std::mutex> Lock(MMutex);
284  std::swap(Exceptions, MExceptions);
285  }
286  // Unlock the mutex before calling user-provided handler to avoid
287  // potential deadlock if the same queue is somehow referenced in the
288  // handler.
289  if (Exceptions.size())
290  MAsyncHandler(std::move(Exceptions));
291  }
292 
298  RT::PiQueueProperties CreationFlags = 0;
299 
300  if (Order == QueueOrder::OOO) {
302  }
303  if (MPropList.has_property<property::queue::enable_profiling>()) {
304  CreationFlags |= PI_QUEUE_PROFILING_ENABLE;
305  }
306  if (MPropList.has_property<
308  CreationFlags |= __SYCL_PI_CUDA_USE_DEFAULT_STREAM;
309  }
310  RT::PiQueue Queue{};
311  RT::PiContext Context = MContext->getHandleRef();
312  RT::PiDevice Device = MDevice->getHandleRef();
313  const detail::plugin &Plugin = getPlugin();
314 
315  assert(Plugin.getBackend() == MDevice->getPlugin().getBackend());
317  Context, Device, CreationFlags, &Queue);
318 
319  // If creating out-of-order queue failed and this property is not
320  // supported (for example, on FPGA), it will return
321  // PI_INVALID_QUEUE_PROPERTIES and will try to create in-order queue.
322  if (MSupportOOO && Error == PI_INVALID_QUEUE_PROPERTIES) {
323  MSupportOOO = false;
324  Queue = createQueue(QueueOrder::Ordered);
325  } else {
326  Plugin.checkPiResult(Error);
327  }
328 
329  return Queue;
330  }
331 
335  RT::PiQueue *PIQ = nullptr;
336  bool ReuseQueue = false;
337  {
338  std::lock_guard<std::mutex> Lock(MMutex);
339 
340  // To achieve parallelism for FPGA with in order execution model with
341  // possibility of two kernels to share data with each other we shall
342  // create a queue for every kernel enqueued.
343  if (MQueues.size() < MaxNumQueues) {
344  MQueues.push_back({});
345  PIQ = &MQueues.back();
346  } else {
347  // If the limit of OpenCL queues is going to be exceeded - take the
348  // earliest used queue, wait until it finished and then reuse it.
349  PIQ = &MQueues[MNextQueueIdx];
350  MNextQueueIdx = (MNextQueueIdx + 1) % MaxNumQueues;
351  ReuseQueue = true;
352  }
353  }
354 
355  if (!ReuseQueue)
356  *PIQ = createQueue(QueueOrder::Ordered);
357  else
359 
360  return *PIQ;
361  }
362 
366  if (MSupportOOO)
367  return MQueues[0];
368 
369  return getExclusiveQueueHandleRef();
370  }
371 
374  template <typename propertyT> bool has_property() const {
375  return MPropList.has_property<propertyT>();
376  }
377 
381  template <typename propertyT> propertyT get_property() const {
382  return MPropList.get_property<propertyT>();
383  }
384 
394  event memset(const std::shared_ptr<queue_impl> &Self, void *Ptr, int Value,
395  size_t Count, const std::vector<event> &DepEvents);
406  event memcpy(const std::shared_ptr<queue_impl> &Self, void *Dest,
407  const void *Src, size_t Count,
408  const std::vector<event> &DepEvents);
419  event mem_advise(const std::shared_ptr<queue_impl> &Self, const void *Ptr,
420  size_t Length, pi_mem_advice Advice,
421  const std::vector<event> &DepEvents);
422 
426  void reportAsyncException(const std::exception_ptr &ExceptionPtr) {
427  std::lock_guard<std::mutex> Lock(MMutex);
428  MExceptions.PushBack(ExceptionPtr);
429  }
430 
432  return GlobalHandler::instance().getHostTaskThreadPool();
433  }
434 
438  pi_native_handle getNative() const;
439 
441  return MAssertHappenedBuffer;
442  }
443 
444 protected:
445  // template is needed for proper unit testing
446  template <typename HandlerType = handler>
447  void finalizeHandler(HandlerType &Handler, const CG::CGTYPE &Type,
448  event &EventRet) {
449  if (MIsInorder) {
450 
451  auto IsExpDepManaged = [](const CG::CGTYPE &Type) {
452  return (Type == CG::CGTYPE::CodeplayHostTask ||
453  Type == CG::CGTYPE::CodeplayInteropTask);
454  };
455 
456  // Accessing and changing of an event isn't atomic operation.
457  // Hence, here is the lock for thread-safety.
458  std::lock_guard<std::mutex> Lock{MLastEventMtx};
459 
460  if (MLastCGType == CG::CGTYPE::None)
461  MLastCGType = Type;
462  // Also handles case when sync model changes. E.g. Last is host, new is
463  // kernel.
464  bool NeedSeparateDependencyMgmt =
465  IsExpDepManaged(Type) || IsExpDepManaged(MLastCGType);
466 
467  if (NeedSeparateDependencyMgmt)
468  Handler.depends_on(MLastEvent);
469 
470  EventRet = Handler.finalize();
471 
472  MLastEvent = EventRet;
473  MLastCGType = Type;
474  } else
475  EventRet = Handler.finalize();
476  }
477 
478 private:
489  event submit_impl(const std::function<void(handler &)> &CGF,
490  const std::shared_ptr<queue_impl> &Self,
491  const std::shared_ptr<queue_impl> &PrimaryQueue,
492  const std::shared_ptr<queue_impl> &SecondaryQueue,
493  const detail::code_location &Loc,
494  const SubmitPostProcessF *PostProcess) {
495  handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue);
496  Handler.saveCodeLoc(Loc);
497  CGF(Handler);
498 
499  // Scheduler will later omit events, that are not required to execute tasks.
500  // Host and interop tasks, however, are not submitted to low-level runtimes
501  // and require separate dependency management.
502  const CG::CGTYPE Type = Handler.getType();
503  event Event;
504 
505  if (PostProcess) {
506  bool IsKernel = Type == CG::Kernel;
507  bool KernelUsesAssert = false;
508 
509  if (IsKernel)
510  // Kernel only uses assert if it's non interop one
511  KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) &&
512  ProgramManager::getInstance().kernelUsesAssert(
513  Handler.MOSModuleHandle, Handler.MKernelName);
514 
515  finalizeHandler(Handler, Type, Event);
516 
517  (*PostProcess)(IsKernel, KernelUsesAssert, Event);
518  } else
519  finalizeHandler(Handler, Type, Event);
520 
521  addEvent(Event);
522  return Event;
523  }
524 
525  // When instrumentation is enabled emits trace event for wait begin and
526  // returns the telemetry event generated for the wait
527  void *instrumentationProlog(const detail::code_location &CodeLoc,
528  std::string &Name, int32_t StreamID,
529  uint64_t &iid);
530  // Uses events generated by the Prolog and emits wait done event
531  void instrumentationEpilog(void *TelementryEvent, std::string &Name,
532  int32_t StreamID, uint64_t IId);
533 
539  void addSharedEvent(const event &Event);
540 
544  void addEvent(const event &Event);
545 
547  std::mutex MMutex;
548 
549  DeviceImplPtr MDevice;
550  const ContextImplPtr MContext;
551 
553  std::vector<std::weak_ptr<event_impl>> MEventsWeak;
554 
558  std::vector<event> MEventsShared;
559  exception_list MExceptions;
560  const async_handler MAsyncHandler;
561  const property_list MPropList;
562 
564  std::vector<RT::PiQueue> MQueues;
566  size_t MNextQueueIdx = 0;
567 
568  const bool MHostQueue = false;
569  // Assume OOO support by default.
570  bool MSupportOOO = true;
571 
572  // Buffer to store assert failure descriptor
573  buffer<AssertHappened, 1> MAssertHappenedBuffer;
574 
575  // This event is employed for enhanced dependency tracking with in-order queue
576  // Access to the event should be guarded with MLastEventMtx
577  event MLastEvent;
578  std::mutex MLastEventMtx;
579  // Used for in-order queues in pair with MLastEvent
580  // Host tasks are explicitly synchronized in RT, pi tasks - implicitly by
581  // backend. Using type to setup explicit sync between host and pi tasks.
582  CG::CGTYPE MLastCGType = CG::CGTYPE::None;
583 
584  const bool MIsInorder;
585 
586 public:
587  // Queue constructed with the discard_events property
588  const bool MDiscardEvents;
590 
591 private:
592  // This flag says if we can discard events based on a queue "setup" which will
593  // be common for all operations submitted to the queue. This is a must
594  // condition for discarding, but even if it's true, in some cases, we won't be
595  // able to discard events, because the final decision is made right before the
596  // operation itself.
597  const bool MHasDiscardEventsSupport;
598 };
599 
600 } // namespace detail
601 } // namespace sycl
602 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::queue_impl::queue_impl
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue from a device using an async_handler and property_list provided.
Definition: queue_impl.hpp:79
cl::sycl::detail::queue_impl::getDeviceImplPtr
const DeviceImplPtr & getDeviceImplPtr() const
Definition: queue_impl.hpp:190
property_list.hpp
cl::sycl::detail::queue_impl::MDiscardEvents
const bool MDiscardEvents
Definition: queue_impl.hpp:588
event_impl.hpp
cl::sycl::detail::pi::getPlugin
const plugin & getPlugin()
Definition: pi.cpp:511
cl::sycl::detail::ContextImplPtr
std::shared_ptr< detail::context_impl > ContextImplPtr
Definition: memory_manager.hpp:32
context_impl.hpp
cl::sycl::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:31
cl::sycl::info::param_traits
Definition: info_desc.hpp:310
cl::sycl::detail::queue_impl::queue_impl
queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue with an async_handler and property_list provided form a device and a context.
Definition: queue_impl.hpp:92
cl::sycl::detail::queue_impl::getExclusiveQueueHandleRef
RT::PiQueue & getExclusiveQueueHandleRef()
Definition: queue_impl.hpp:334
config.hpp
cl::sycl::detail::pi::PiQueueProperties
::pi_queue_properties PiQueueProperties
Definition: pi.hpp:110
cl::sycl::detail::CUDAContextT
CUDAContextT
Possible CUDA context types supported by PI CUDA backend TODO: Implement this as a property once ther...
Definition: queue_impl.hpp:47
stl.hpp
device.hpp
cl::sycl::detail::queue_impl::reportAsyncException
void reportAsyncException(const std::exception_ptr &ExceptionPtr)
Puts exception to the list of asynchronous ecxeptions.
Definition: queue_impl.hpp:426
cl::sycl::detail::queue_impl::has_discard_events_support
bool has_discard_events_support() const
Definition: queue_impl.hpp:199
_pi_mem_advice
_pi_mem_advice
Definition: pi.h:459
cl::sycl::detail::queue_impl::queue_impl
queue_impl(RT::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler)
Constructs a SYCL queue from plugin interoperability handle.
Definition: queue_impl.hpp:132
cl::sycl::detail::SYCLConfig
Definition: config.hpp:105
_pi_result
_pi_result
Definition: pi.h:85
context_properties.hpp
context.hpp
event.hpp
piQueueRelease
pi_result piQueueRelease(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:970
cl::sycl::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:121
cl::sycl::detail::queue_impl::is_host
bool is_host() const
Definition: queue_impl.hpp:196
cuda_definitions.hpp
sycl
Definition: invoke_simd.hpp:68
cl::sycl::detail::queue_impl::wait_and_throw
void wait_and_throw(const detail::code_location &Loc={})
Definition: queue_impl.hpp:265
device_impl.hpp
cl::sycl::property_list
Objects of the property_list class are containers for the SYCL properties.
Definition: property_list.hpp:26
cl::sycl::detail::queue_impl::has_property
bool has_property() const
Definition: queue_impl.hpp:374
plugin.hpp
cl::sycl::property::queue::in_order
Definition: queue_properties.hpp:18
cl::sycl::detail::queue_impl::throw_asynchronous
void throw_asynchronous()
Performs a blocking wait for the completion of all enqueued tasks in the queue.
Definition: queue_impl.hpp:277
cl::sycl::ext::oneapi::cuda::property::queue::use_default_stream
Definition: queue_properties.hpp:37
cl::sycl::detail::QueueOrder
QueueOrder
Definition: queue_impl.hpp:52
cl::sycl::detail::queue_impl::getThreadPool
ThreadPool & getThreadPool()
Definition: queue_impl.hpp:431
cl::sycl::detail::code_location
Definition: common.hpp:54
cl::sycl::buffer
Defines a shared array that can be used by kernels in queues.
Definition: buffer.hpp:58
scheduler.hpp
cl::sycl::detail::plugin::call_nocheck
RT::PiResult call_nocheck(ArgsT... Args) const
Calls the PiApi, traces the call, and returns the result.
Definition: plugin.hpp:170
cl::sycl::range
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: buffer.hpp:24
PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
constexpr pi_queue_properties PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
Definition: pi.h:624
__SYCL_PI_CUDA_USE_DEFAULT_STREAM
#define __SYCL_PI_CUDA_USE_DEFAULT_STREAM
Definition: cuda_definitions.hpp:22
cl::sycl::detail::memcpy
void memcpy(void *Dst, const void *Src, std::size_t Size)
cl::sycl::detail::plugin::getBackend
backend getBackend(void) const
Definition: plugin.hpp:229
cl::sycl::detail::CUDAContextT::primary
@ primary
cl::sycl::detail::CG::CGTYPE
CGTYPE
Type of the command group.
Definition: cg.hpp:156
char
piQueueCreate
pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue)
Definition: pi_esimd_emulator.cpp:930
_pi_queue
PI queue mapping on to CUstream objects.
Definition: pi_cuda.hpp:378
cl::sycl::detail::pi::PiQueue
::pi_queue PiQueue
Definition: pi.hpp:109
cl::sycl::detail::MaxNumQueues
static constexpr size_t MaxNumQueues
Sets max number of queues supported by FPGA RT.
Definition: queue_impl.hpp:43
cl::sycl::device
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:35
cl::sycl::detail::Ordered
@ Ordered
Definition: queue_impl.hpp:52
cl::sycl::exception_list::size
size_type size() const
Definition: exception_list.cpp:17
cl::sycl::property::queue::enable_profiling
Definition: queue_properties.hpp:19
PI_INVALID_QUEUE_PROPERTIES
@ PI_INVALID_QUEUE_PROPERTIES
Definition: pi.h:90
cl::sycl::detail::queue_impl::SubmitPostProcessF
std::function< void(bool, bool, event &)> SubmitPostProcessF
Definition: queue_impl.hpp:207
cl::sycl::detail::plugin::call
void call(ArgsT... Args) const
Calls the API, traces the call, checks the result.
Definition: plugin.hpp:217
piQueueRetain
pi_result piQueueRetain(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:962
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
PI_INVALID_QUEUE
@ PI_INVALID_QUEUE
Definition: pi.h:96
cl::sycl::detail::OOO
@ OOO
Definition: queue_impl.hpp:52
global_handler.hpp
PI_QUEUE_INFO_DEVICE
@ PI_QUEUE_INFO_DEVICE
Definition: pi.h:352
cl::sycl::detail::ThreadPool
Definition: thread_pool.hpp:25
cl::sycl::detail::queue_impl::get_property
propertyT get_property() const
Definition: queue_impl.hpp:381
cl::sycl::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
cl::sycl::aspect::custom
@ custom
cl::sycl::detail::queue_impl::getHandleRef
RT::PiQueue & getHandleRef()
Definition: queue_impl.hpp:365
queue_properties.hpp
cl::sycl::detail::queue_impl::getAssertHappenedBuffer
buffer< AssertHappened, 1 > & getAssertHappenedBuffer()
Definition: queue_impl.hpp:440
pi_native_handle
uintptr_t pi_native_handle
Definition: pi.h:76
piQueueFinish
pi_result piQueueFinish(pi_queue command_queue)
Definition: pi_esimd_emulator.cpp:984
cl::sycl::handler
Command group handler class.
Definition: handler.hpp:361
cl::sycl::detail::queue_impl
Definition: queue_impl.hpp:54
cl::sycl::detail::queue_impl::getExceptionList
exception_list getExceptionList() const
Definition: queue_impl.hpp:262
cl::sycl::detail::queue_impl::get
cl_command_queue get()
Definition: queue_impl.hpp:171
cl::sycl::detail::queue_impl::~queue_impl
~queue_impl()
Definition: queue_impl.hpp:163
cl::sycl::detail::queue_impl::getPlugin
const plugin & getPlugin() const
Definition: queue_impl.hpp:186
cl::sycl::detail::DefaultContextType
constexpr CUDAContextT DefaultContextType
Default context type created for CUDA backend.
Definition: queue_impl.hpp:50
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:198
cl::sycl::detail::plugin::checkPiResult
void checkPiResult(RT::PiResult pi_result) const
Checks return value from PI calls.
Definition: plugin.hpp:116
handler.hpp
exception.hpp
piQueueGetInfo
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_esimd_emulator.cpp:958
cl::sycl::detail::queue_impl::submit
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue_impl.hpp:222
cl::sycl::context
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:35
cl::sycl::detail::queue_impl::get_context
context get_context() const
Definition: queue_impl.hpp:182
exception_list.hpp
cl::sycl::detail::DeviceImplPtr
std::shared_ptr< device_impl > DeviceImplPtr
Definition: program_manager.hpp:55
cl::sycl::detail::queue_impl::submit
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue_impl.hpp:247
cl::sycl::detail::queue_impl::get_device
device get_device() const
Definition: queue_impl.hpp:193
cl::sycl::detail::queue_impl::getContextImplPtr
const ContextImplPtr & getContextImplPtr() const
Definition: queue_impl.hpp:188
cl::sycl::detail::queue_impl::createQueue
RT::PiQueue createQueue(QueueOrder Order)
Creates PI queue.
Definition: queue_impl.hpp:297
cl::sycl::detail::queue_impl::getDefaultOrNew
static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device)
Definition: queue_impl.hpp:58
cl::sycl::exception
Definition: exception.hpp:63
kernel_impl.hpp
cl::sycl::exception_list
A list of asynchronous exceptions.
Definition: exception_list.hpp:30
assert_happened.hpp
thread_pool.hpp
cl::sycl::async_handler
std::function< void(cl::sycl::exception_list)> async_handler
Definition: exception_list.hpp:53
cl::sycl::detail::queue_impl::MIsProfilingEnabled
const bool MIsProfilingEnabled
Definition: queue_impl.hpp:589
PI_INVALID_DEVICE
@ PI_INVALID_DEVICE
Definition: pi.h:94
PI_QUEUE_PROFILING_ENABLE
constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE
Definition: pi.h:626
_pi_context
PI context mapping to a CUDA context object.
Definition: pi_cuda.hpp:150
cl::sycl::detail::queue_impl::finalizeHandler
void finalizeHandler(HandlerType &Handler, const CG::CGTYPE &Type, event &EventRet)
Definition: queue_impl.hpp:447
_pi_device
PI device mapping to a CUdevice.
Definition: pi_cuda.hpp:73
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12