20 #ifdef XPTI_ENABLE_INSTRUMENTATION
21 #include "xpti/xpti_trace_framework.hpp"
30 uint32_t queue_impl::get_info<info::queue::reference_count>()
const {
39 template <>
context queue_impl::get_info<info::queue::context>()
const {
43 template <>
device queue_impl::get_info<info::queue::device>()
const {
50 auto EventImpl = std::make_shared<detail::event_impl>(QueueImpl);
51 EventImpl->getHandleRef() = NativeEvent;
53 EventImpl->setStateIncomplete();
54 return detail::createSyclObjFromImpl<event>(EventImpl);
59 std::make_shared<event_impl>(event_impl::HES_Discarded);
60 return createSyclObjFromImpl<event>(EventImpl);
63 event queue_impl::memset(
const std::shared_ptr<detail::queue_impl> &Self,
64 void *Ptr,
int Value,
size_t Count,
65 const std::vector<event> &DepEvents) {
66 #if XPTI_ENABLE_INSTRUMENTATION
70 XPTIScope PrepareNotify((
void *)
this,
71 (uint16_t)xpti::trace_point_type_t::node_create,
73 PrepareNotify.addMetadata([&](
auto TEvent) {
74 xpti::addMetadata(TEvent,
"sycl_device",
75 reinterpret_cast<size_t>(
76 MDevice->is_host() ? 0 : MDevice->getHandleRef()));
77 xpti::addMetadata(TEvent,
"memory_ptr",
reinterpret_cast<size_t>(Ptr));
78 xpti::addMetadata(TEvent,
"value_set", Value);
79 xpti::addMetadata(TEvent,
"memory_size", Count);
82 PrepareNotify.notify();
84 PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
86 if (MHasDiscardEventsSupport) {
87 MemoryManager::fill_usm(Ptr, Self, Count, Value,
95 auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
96 : std::unique_lock<std::mutex>();
99 if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
100 MLastCGType == CG::CGTYPE::CodeplayInteropTask))
104 MemoryManager::fill_usm(Ptr, Self, Count, Value,
107 if (MContext->is_host())
112 MLastEvent = ResEvent;
116 MLastCGType = CG::CGTYPE::None;
121 addSharedEvent(ResEvent);
126 void *Dest,
const void *Src,
size_t Count,
127 const std::vector<event> &DepEvents) {
128 #if XPTI_ENABLE_INSTRUMENTATION
132 XPTIScope PrepareNotify((
void *)
this,
133 (uint16_t)xpti::trace_point_type_t::node_create,
135 PrepareNotify.addMetadata([&](
auto TEvent) {
136 xpti::addMetadata(TEvent,
"sycl_device",
137 reinterpret_cast<size_t>(
138 MDevice->is_host() ? 0 : MDevice->getHandleRef()));
139 xpti::addMetadata(TEvent,
"src_memory_ptr",
reinterpret_cast<size_t>(Src));
140 xpti::addMetadata(TEvent,
"dest_memory_ptr",
141 reinterpret_cast<size_t>(Dest));
142 xpti::addMetadata(TEvent,
"memory_size", Count);
145 PrepareNotify.notify();
147 PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
149 if (MHasDiscardEventsSupport) {
150 MemoryManager::copy_usm(Src, Self, Count, Dest,
158 auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
159 : std::unique_lock<std::mutex>();
162 if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
163 MLastCGType == CG::CGTYPE::CodeplayInteropTask))
167 MemoryManager::copy_usm(Src, Self, Count, Dest,
170 if (MContext->is_host())
175 MLastEvent = ResEvent;
179 MLastCGType = CG::CGTYPE::None;
184 addSharedEvent(ResEvent);
188 event queue_impl::mem_advise(
const std::shared_ptr<detail::queue_impl> &Self,
189 const void *Ptr,
size_t Length,
191 const std::vector<event> &DepEvents) {
192 if (MHasDiscardEventsSupport) {
193 MemoryManager::advise_usm(Ptr, Self, Length, Advice,
201 auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
202 : std::unique_lock<std::mutex>();
205 if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
206 MLastCGType == CG::CGTYPE::CodeplayInteropTask))
210 MemoryManager::advise_usm(Ptr, Self, Length, Advice,
214 if (MContext->is_host())
219 MLastEvent = ResEvent;
223 MLastCGType = CG::CGTYPE::None;
228 addSharedEvent(ResEvent);
232 event queue_impl::memcpyToDeviceGlobal(
233 const std::shared_ptr<detail::queue_impl> &Self,
void *DeviceGlobalPtr,
234 const void *Src,
bool IsDeviceImageScope,
size_t NumBytes,
size_t Offset,
235 const std::vector<event> &DepEvents) {
236 if (MHasDiscardEventsSupport) {
237 MemoryManager::copy_to_device_global(
238 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src,
239 OSUtil::ExeModuleHandle,
getOrWaitEvents(DepEvents, MContext),
nullptr);
246 auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
247 : std::unique_lock<std::mutex>();
250 if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
251 MLastCGType == CG::CGTYPE::CodeplayInteropTask))
255 MemoryManager::copy_to_device_global(
256 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src,
260 if (MContext->is_host())
266 MLastEvent = ResEvent;
270 MLastCGType = CG::CGTYPE::None;
275 addSharedEvent(ResEvent);
279 event queue_impl::memcpyFromDeviceGlobal(
280 const std::shared_ptr<detail::queue_impl> &Self,
void *Dest,
281 const void *DeviceGlobalPtr,
bool IsDeviceImageScope,
size_t NumBytes,
282 size_t Offset,
const std::vector<event> &DepEvents) {
283 if (MHasDiscardEventsSupport) {
284 MemoryManager::copy_from_device_global(
285 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest,
286 OSUtil::ExeModuleHandle,
getOrWaitEvents(DepEvents, MContext),
nullptr);
293 auto ScopeLock = isInOrder() ? std::unique_lock<std::mutex>(MLastEventMtx)
294 : std::unique_lock<std::mutex>();
297 if (isInOrder() && (MLastCGType == CG::CGTYPE::CodeplayHostTask ||
298 MLastCGType == CG::CGTYPE::CodeplayInteropTask))
302 MemoryManager::copy_from_device_global(
303 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest,
307 if (MContext->is_host())
313 MLastEvent = ResEvent;
317 MLastCGType = CG::CGTYPE::None;
322 addSharedEvent(ResEvent);
326 void queue_impl::addEvent(
const event &Event) {
328 assert(EImpl &&
"Event implementation is missing");
329 auto *Cmd =
static_cast<Command *
>(EImpl->getCommand());
334 if (is_host() || MEmulateOOO)
335 addSharedEvent(Event);
339 else if (is_host() || MEmulateOOO || EImpl->getHandleRef() ==
nullptr) {
340 std::weak_ptr<event_impl> EventWeakPtr{EImpl};
341 std::lock_guard<std::mutex> Lock{MMutex};
342 MEventsWeak.push_back(std::move(EventWeakPtr));
349 void queue_impl::addSharedEvent(
const event &Event) {
350 assert(is_host() || MEmulateOOO);
351 std::lock_guard<std::mutex> Lock(MMutex);
357 const size_t EventThreshold = 128;
358 if (MEventsShared.size() >= EventThreshold) {
368 MEventsShared.begin(),
370 MEventsShared.begin(), MEventsShared.end(), [](
const event &E) {
371 return E.get_info<info::event::command_execution_status>() !=
372 info::event_command_status::complete;
375 MEventsShared.push_back(Event);
379 std::string &Name, int32_t StreamID,
381 void *TraceEvent =
nullptr;
386 #ifdef XPTI_ENABLE_INSTRUMENTATION
387 xpti::trace_event_data_t *WaitEvent =
nullptr;
388 if (!xptiTraceEnabled())
391 xpti::payload_t Payload;
392 bool HasSourceInfo =
false;
395 xpti::utils::StringHelper NG;
396 Name = NG.nameWithAddress<
queue_impl *>(
"queue.wait",
this);
403 HasSourceInfo =
true;
406 Payload = xpti::payload_t(Name.c_str(), (
void *)
this);
411 uint64_t QWaitInstanceNo = 0;
412 WaitEvent = xptiMakeEvent(Name.c_str(), &Payload, xpti::trace_graph_event,
413 xpti_at::active, &QWaitInstanceNo);
414 IId = QWaitInstanceNo;
425 DevStr =
"ACCELERATOR";
428 xpti::addMetadata(WaitEvent,
"sycl_device", DevStr);
430 xpti::addMetadata(WaitEvent,
"sym_function_name", CodeLoc.
functionName());
431 xpti::addMetadata(WaitEvent,
"sym_source_file_name", CodeLoc.
fileName());
432 xpti::addMetadata(WaitEvent,
"sym_line_no",
433 static_cast<int32_t
>((CodeLoc.
lineNumber())));
434 xpti::addMetadata(WaitEvent,
"sym_column_no",
437 xptiNotifySubscribers(StreamID, xpti::trace_wait_begin,
nullptr, WaitEvent,
439 static_cast<const void *
>(Name.c_str()));
440 TraceEvent = (
void *)WaitEvent;
446 void queue_impl::instrumentationEpilog(
void *TelemetryEvent, std::string &Name,
447 int32_t StreamID, uint64_t IId) {
448 (void)TelemetryEvent;
452 #ifdef XPTI_ENABLE_INSTRUMENTATION
453 if (!(xptiTraceEnabled() && TelemetryEvent))
456 xpti::trace_event_data_t *TraceEvent =
457 (xpti::trace_event_data_t *)TelemetryEvent;
458 xptiNotifySubscribers(StreamID, xpti::trace_wait_end,
nullptr, TraceEvent,
459 IId,
static_cast<const void *
>(Name.c_str()));
465 #ifdef XPTI_ENABLE_INSTRUMENTATION
466 void *TelemetryEvent =
nullptr;
470 TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
473 std::vector<std::weak_ptr<event_impl>> WeakEvents;
474 std::vector<event> SharedEvents;
476 std::lock_guard<std::mutex> Lock(MMutex);
477 WeakEvents.swap(MEventsWeak);
478 SharedEvents.swap(MEventsShared);
485 const bool SupportsPiFinish = !is_host() && !MEmulateOOO;
486 for (
auto EventImplWeakPtrIt = WeakEvents.rbegin();
487 EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
488 if (std::shared_ptr<event_impl> EventImplSharedPtr =
489 EventImplWeakPtrIt->lock()) {
492 if (!SupportsPiFinish ||
nullptr == EventImplSharedPtr->getHandleRef()) {
493 EventImplSharedPtr->wait(EventImplSharedPtr);
497 if (SupportsPiFinish) {
500 assert(SharedEvents.empty() &&
"Queues that support calling piQueueFinish "
501 "shouldn't have shared events");
503 for (
event &Event : SharedEvents)
507 std::vector<EventImplPtr> StreamsServiceEvents;
509 std::lock_guard<std::mutex> Lock(MMutex);
510 StreamsServiceEvents.swap(MStreamsServiceEvents);
515 #ifdef XPTI_ENABLE_INSTRUMENTATION
516 instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
529 bool queue_impl::ext_oneapi_empty()
const {
532 if (isInOrder() && !MDiscardEvents) {
533 std::lock_guard<std::mutex> Lock(MLastEventMtx);
534 return MLastEvent.get_info<info::event::command_execution_status>() ==
535 info::event_command_status::complete;
550 std::lock_guard<std::mutex> Lock(MMutex);
551 for (
event Event : MEventsShared)
552 if (Event.get_info<info::event::command_execution_status>() !=
553 info::event_command_status::complete)
556 for (
auto EventImplWeakPtrIt = MEventsWeak.begin();
557 EventImplWeakPtrIt != MEventsWeak.end(); ++EventImplWeakPtrIt)
558 if (std::shared_ptr<event_impl> EventImplSharedPtr =
559 EventImplWeakPtrIt->lock())
560 if (EventImplSharedPtr->is_host() &&
562 ->get_info<info::event::command_execution_status>() !=
563 info::event_command_status::complete)