35 #ifdef XPTI_ENABLE_INSTRUMENTATION
36 #include "xpti/xpti_trace_framework.hpp"
66 context{createSyclObjFromImpl<device>(Device), {}, {}});
69 Device->get_platform().ext_oneapi_get_default_context());
70 if (DefaultContext->isDeviceValid(Device))
71 return DefaultContext;
73 context{createSyclObjFromImpl<device>(Device), {}, {}});
84 :
queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};
97 : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
98 MPropList(PropList), MHostQueue(MDevice->is_host()),
99 MAssertHappenedBuffer(
range<1>{1}),
100 MIsInorder(has_property<property::queue::in_order>()),
102 has_property<ext::oneapi::property::queue::discard_events>()),
103 MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
104 MHasDiscardEventsSupport(MDiscardEvents &&
105 (MHostQueue ?
true : MIsInorder)) {
108 #if XPTI_ENABLE_INSTRUMENTATION
109 XPTIScope PrepareNotify((
void *)
this,
112 (uint16_t)xpti::trace_point_type_t::queue_create,
115 if (xptiTraceEnabled()) {
116 MTraceEvent = (
void *)PrepareNotify.traceEvent();
117 MStreamID = PrepareNotify.streamID();
118 MInstanceID = PrepareNotify.instanceID();
121 PrepareNotify.addMetadata([&](
auto TEvent) {
122 xpti::addMetadata(TEvent,
"sycl_context",
123 reinterpret_cast<size_t>(MContext->getHandleRef()));
125 xpti::addMetadata(TEvent,
"sycl_device_name", MDevice->getDeviceName());
127 TEvent,
"sycl_device",
128 reinterpret_cast<size_t>(
129 MDevice->is_host() ? 0 : MDevice->getHandleRef()));
131 xpti::addMetadata(TEvent,
"is_inorder", MIsInorder);
133 PrepareNotify.notify();
135 if (has_property<property::queue::enable_profiling>()) {
136 if (has_property<ext::oneapi::property::queue::discard_events>())
138 "Queue cannot be constructed with both of "
139 "discard_events and enable_profiling.");
140 if (!MDevice->has(aspect::queue_profiling))
142 "Cannot enable profiling, the associated device "
143 "does not have the queue_profiling aspect");
145 if (has_property<ext::intel::property::queue::compute_index>()) {
146 int Idx = get_property<ext::intel::property::queue::compute_index>()
149 createSyclObjFromImpl<device>(Device)
150 .get_info<ext::intel::info::device::max_compute_queue_indices>();
151 if (Idx < 0 || Idx >= NumIndices)
152 throw sycl::exception(
154 "Queue compute index must be a non-negative number less than "
155 "device's number of available compute queue indices.");
157 if (!Context->isDeviceValid(Device)) {
158 if (!Context->is_host() &&
159 Context->getPlugin().getBackend() == backend::opencl)
160 throw sycl::invalid_object_error(
161 "Queue cannot be constructed with the given context and device "
162 "since the device is not a member of the context (descendants of "
163 "devices from the context are not supported on OpenCL yet).",
164 PI_ERROR_INVALID_DEVICE);
165 throw sycl::invalid_object_error(
166 "Queue cannot be constructed with the given context and device "
167 "since the device is neither a member of the context nor a "
168 "descendant of its member.",
169 PI_ERROR_INVALID_DEVICE);
174 MQueues.push_back(createQueue(QOrder));
188 : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(),
189 MHostQueue(false), MAssertHappenedBuffer(
range<1>{1}),
190 MIsInorder(has_property<property::queue::in_order>()),
192 has_property<ext::oneapi::property::queue::discard_events>()),
193 MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
194 MHasDiscardEventsSupport(MDiscardEvents &&
195 (MHostQueue ?
true : MIsInorder)) {
200 #if XPTI_ENABLE_INSTRUMENTATION
201 XPTIScope PrepareNotify((
void *)
this,
204 (uint16_t)xpti::trace_point_type_t::queue_create,
206 if (xptiTraceEnabled()) {
208 MTraceEvent = (
void *)PrepareNotify.traceEvent();
209 MStreamID = PrepareNotify.streamID();
210 MInstanceID = PrepareNotify.instanceID();
213 PrepareNotify.addMetadata([&](
auto TEvent) {
214 xpti::addMetadata(TEvent,
"sycl_context",
215 reinterpret_cast<size_t>(MContext->getHandleRef()));
217 xpti::addMetadata(TEvent,
"sycl_device_name", MDevice->getDeviceName());
219 TEvent,
"sycl_device",
220 reinterpret_cast<size_t>(
221 MDevice->is_host() ? 0 : MDevice->getHandleRef()));
223 xpti::addMetadata(TEvent,
"is_inorder", MIsInorder);
225 PrepareNotify.notify();
227 if (has_property<ext::oneapi::property::queue::discard_events>() &&
228 has_property<property::queue::enable_profiling>()) {
230 "Queue cannot be constructed with both of "
231 "discard_events and enable_profiling.");
234 MQueues.push_back(pi::cast<RT::PiQueue>(
PiQueue));
237 const detail::plugin &Plugin =
getPlugin();
241 MDevice = MContext->findMatchingDeviceImpl(DevicePI);
242 if (MDevice ==
nullptr) {
243 throw sycl::exception(
245 "Device provided by native Queue not found in Context.");
253 #if XPTI_ENABLE_INSTRUMENTATION
254 if (xptiTraceEnabled()) {
256 xptiNotifySubscribers(
257 MStreamID, (uint16_t)xpti::trace_point_type_t::queue_destroy,
nullptr,
258 (xpti::trace_event_data_t *)MTraceEvent, MInstanceID,
259 static_cast<const void *
>(
"queue_destroy"));
262 throw_asynchronous();
271 throw invalid_object_error(
272 "This instance of queue doesn't support OpenCL interoperability",
273 PI_ERROR_INVALID_QUEUE);
276 return pi::cast<cl_command_queue>(MQueues[0]);
281 return createSyclObjFromImpl<context>(MContext);
304 template <
typename Param>
typename Param::return_type get_info()
const;
322 const std::shared_ptr<queue_impl> &Self,
323 const std::shared_ptr<queue_impl> &SecondQueue,
327 return submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess);
329 return SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, Loc,
343 const std::shared_ptr<queue_impl> &Self,
346 return submit_impl(CGF, Self, Self,
nullptr, Loc, PostProcess);
362 throw_asynchronous();
378 std::lock_guard<std::mutex> Lock(MMutex);
379 std::swap(Exceptions, MExceptions);
384 if (Exceptions.
size())
385 MAsyncHandler(std::move(Exceptions));
398 if (MPropList.has_property<property::queue::enable_profiling>()) {
401 if (MPropList.has_property<
402 ext::oneapi::cuda::property::queue::use_default_stream>()) {
406 .has_property<ext::oneapi::property::queue::discard_events>()) {
412 bool PrioritySeen =
false;
414 .has_property<ext::oneapi::property::queue::priority_normal>()) {
418 if (MPropList.has_property<ext::oneapi::property::queue::priority_low>()) {
420 throw sycl::exception(
422 "Queue cannot be constructed with different priorities.");
427 if (MPropList.has_property<ext::oneapi::property::queue::priority_high>()) {
429 throw sycl::exception(
431 "Queue cannot be constructed with different priorities.");
441 assert(Plugin.
getBackend() == MDevice->getPlugin().getBackend());
444 if (has_property<ext::intel::property::queue::compute_index>()) {
445 int Idx = get_property<ext::intel::property::queue::compute_index>()
451 Context, Device, Properties, &Queue);
456 if (!MEmulateOOO && Error == PI_ERROR_INVALID_QUEUE_PROPERTIES) {
470 bool ReuseQueue =
false;
472 std::lock_guard<std::mutex> Lock(MMutex);
478 MQueues.push_back({});
479 PIQ = &MQueues.back();
483 PIQ = &MQueues[MNextQueueIdx];
503 return getExclusiveQueueHandleRef();
509 return MPropList.has_property<propertyT>();
516 return MPropList.get_property<propertyT>();
528 event memset(
const std::shared_ptr<queue_impl> &Self,
void *Ptr,
int Value,
529 size_t Count,
const std::vector<event> &DepEvents);
540 event memcpy(
const std::shared_ptr<queue_impl> &Self,
void *Dest,
541 const void *Src,
size_t Count,
542 const std::vector<event> &DepEvents);
553 event mem_advise(
const std::shared_ptr<queue_impl> &Self,
const void *Ptr,
555 const std::vector<event> &DepEvents);
561 std::lock_guard<std::mutex> Lock(MMutex);
562 MExceptions.PushBack(ExceptionPtr);
566 return GlobalHandler::instance().getHostTaskThreadPool();
575 return MAssertHappenedBuffer;
579 std::lock_guard<std::mutex> Lock(MMutex);
580 MStreamsServiceEvents.push_back(Event);
583 bool ext_oneapi_empty()
const;
589 return detail::Scheduler::getInstance().isInFusionMode(
590 std::hash<
typename std::shared_ptr<queue_impl>::element_type *>()(
594 event memcpyToDeviceGlobal(
const std::shared_ptr<queue_impl> &Self,
595 void *DeviceGlobalPtr,
const void *Src,
596 bool IsDeviceImageScope,
size_t NumBytes,
598 const std::vector<event> &DepEvents);
599 event memcpyFromDeviceGlobal(
const std::shared_ptr<queue_impl> &Self,
600 void *Dest,
const void *DeviceGlobalPtr,
601 bool IsDeviceImageScope,
size_t NumBytes,
603 const std::vector<event> &DepEvents);
607 template <
typename HandlerType = handler>
612 auto IsExpDepManaged = [](
const CG::CGTYPE &Type) {
613 return (Type == CG::CGTYPE::CodeplayHostTask ||
614 Type == CG::CGTYPE::CodeplayInteropTask);
619 std::lock_guard<std::mutex> Lock{MLastEventMtx};
621 if (MLastCGType == CG::CGTYPE::None)
625 bool NeedSeparateDependencyMgmt =
626 IsExpDepManaged(Type) || IsExpDepManaged(MLastCGType);
628 if (NeedSeparateDependencyMgmt)
629 Handler.depends_on(MLastEvent);
631 EventRet = Handler.finalize();
633 MLastEvent = EventRet;
636 EventRet = Handler.finalize();
651 const std::shared_ptr<queue_impl> &Self,
652 const std::shared_ptr<queue_impl> &PrimaryQueue,
653 const std::shared_ptr<queue_impl> &SecondaryQueue,
656 handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue);
657 Handler.saveCodeLoc(Loc);
664 event Event = detail::createSyclObjFromImpl<event>(
665 std::make_shared<detail::event_impl>());
668 bool IsKernel = Type == CG::Kernel;
669 bool KernelUsesAssert =
false;
673 KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) &&
674 ProgramManager::getInstance().kernelUsesAssert(
675 Handler.MOSModuleHandle, Handler.MKernelName);
679 (*PostProcess)(IsKernel, KernelUsesAssert, Event);
690 std::string &Name, int32_t StreamID,
693 void instrumentationEpilog(
void *TelementryEvent, std::string &Name,
694 int32_t StreamID, uint64_t IId);
701 void addSharedEvent(
const event &Event);
706 void addEvent(
const event &Event);
728 size_t MNextQueueIdx = 0;
730 const bool MHostQueue =
false;
733 bool MEmulateOOO =
false;
755 void *MTraceEvent =
nullptr;
759 uint64_t MInstanceID = 0;