20 #ifdef XPTI_ENABLE_INSTRUMENTATION
21 #include "xpti/xpti_trace_framework.hpp"
27 inline namespace _V1 {
31 static std::vector<sycl::detail::pi::PiEvent>
33 std::vector<sycl::detail::pi::PiEvent> RetPiEvents;
36 if (EventImpl->getHandleRef() !=
nullptr)
37 RetPiEvents.push_back(EventImpl->getHandleRef());
43 uint32_t queue_impl::get_info<info::queue::reference_count>()
const {
52 template <>
context queue_impl::get_info<info::queue::context>()
const {
56 template <>
device queue_impl::get_info<info::queue::device>()
const {
61 typename info::platform::version::return_type
62 queue_impl::get_backend_info<info::platform::version>()
const {
65 "the info::platform::version info descriptor can "
66 "only be queried with an OpenCL backend");
68 return get_device().get_platform().get_info<info::platform::version>();
72 typename info::device::version::return_type
73 queue_impl::get_backend_info<info::device::version>()
const {
76 "the info::device::version info descriptor can only "
77 "be queried with an OpenCL backend");
79 return get_device().get_info<info::device::version>();
83 typename info::device::backend_version::return_type
84 queue_impl::get_backend_info<info::device::backend_version>()
const {
87 "the info::device::backend_version info descriptor "
88 "can only be queried with a Level Zero backend");
97 const std::shared_ptr<detail::queue_impl> &QueueImpl) {
98 auto EventImpl = std::make_shared<detail::event_impl>(QueueImpl);
100 EventImpl->setStateIncomplete();
101 return detail::createSyclObjFromImpl<event>(EventImpl);
107 return createSyclObjFromImpl<event>(EventImpl);
110 const std::vector<event> &
112 std::vector<event> &MutableVec,
113 std::unique_lock<std::mutex> &QueueLock) {
122 if (!ExternalEvent && !ExtraEvent)
125 MutableVec = DepEvents;
127 MutableVec.push_back(*ExternalEvent);
129 MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
134 void *Ptr,
int Value,
size_t Count,
135 const std::vector<event> &DepEvents) {
136 #if XPTI_ENABLE_INSTRUMENTATION
140 XPTIScope PrepareNotify((
void *)
this,
141 (uint16_t)xpti::trace_point_type_t::node_create,
143 PrepareNotify.addMetadata([&](
auto TEvent) {
144 xpti::addMetadata(TEvent,
"sycl_device",
145 reinterpret_cast<size_t>(
147 xpti::addMetadata(TEvent,
"memory_ptr",
reinterpret_cast<size_t>(Ptr));
148 xpti::addMetadata(TEvent,
"value_set", Value);
149 xpti::addMetadata(TEvent,
"memory_size", Count);
150 xpti::addMetadata(TEvent,
"queue_id",
MQueueID);
154 xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueueID);
156 PrepareNotify.notify();
158 PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
162 Self, DepEvents, [&](
handler &CGH) { CGH.
memset(Ptr, Value, Count); },
181 void *Dest,
const void *Src,
size_t Count,
182 const std::vector<event> &DepEvents,
184 #if XPTI_ENABLE_INSTRUMENTATION
188 XPTIScope PrepareNotify((
void *)
this,
189 (uint16_t)xpti::trace_point_type_t::node_create,
191 PrepareNotify.addMetadata([&](
auto TEvent) {
192 xpti::addMetadata(TEvent,
"sycl_device",
193 reinterpret_cast<size_t>(
195 xpti::addMetadata(TEvent,
"src_memory_ptr",
reinterpret_cast<size_t>(Src));
196 xpti::addMetadata(TEvent,
"dest_memory_ptr",
197 reinterpret_cast<size_t>(Dest));
198 xpti::addMetadata(TEvent,
"memory_size", Count);
199 xpti::addMetadata(TEvent,
"queue_id",
MQueueID);
201 xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueueID);
203 PrepareNotify.notify();
205 PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin);
208 if ((!Src || !Dest) && Count != 0) {
210 throw runtime_error(
"NULL pointer argument in memory copy operation.",
211 PI_ERROR_INVALID_VALUE);
214 Self, DepEvents, [&](
handler &CGH) { CGH.
memcpy(Dest, Src, Count); },
220 const void *Ptr,
size_t Length,
222 const std::vector<event> &DepEvents) {
227 Self, Length, Advice);
231 const std::shared_ptr<detail::queue_impl> &Self,
void *DeviceGlobalPtr,
232 const void *Src,
bool IsDeviceImageScope,
size_t NumBytes,
size_t Offset,
233 const std::vector<event> &DepEvents) {
237 CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope,
240 [](
const auto &...Args) {
243 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Src);
247 const std::shared_ptr<detail::queue_impl> &Self,
void *Dest,
248 const void *DeviceGlobalPtr,
bool IsDeviceImageScope,
size_t NumBytes,
249 size_t Offset,
const std::vector<event> &DepEvents) {
253 CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope,
256 [](
const auto &...Args) {
259 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
263 std::lock_guard<std::mutex> Lock{
MMutex};
275 assert(EImpl &&
"Event implementation is missing");
276 auto *Cmd =
static_cast<Command *
>(EImpl->getCommand());
287 std::weak_ptr<event_impl> EventWeakPtr{EImpl};
288 std::lock_guard<std::mutex> Lock{
MMutex};
298 std::lock_guard<std::mutex> Lock(
MMutex);
304 const size_t EventThreshold = 128;
318 return E.get_info<info::event::command_execution_status>() !=
319 info::event_command_status::complete;
325 template <
typename HandlerFuncT>
327 const std::vector<event> &DepEvents,
328 HandlerFuncT HandlerFunc) {
337 template <
typename HandlerFuncT,
typename MemOpFuncT,
typename... MemOpArgTs>
339 const std::vector<event> &DepEvents,
340 HandlerFuncT HandlerFunc,
341 MemOpFuncT MemOpFunc,
342 MemOpArgTs... MemOpArgs) {
346 std::unique_lock<std::mutex> Lock(
MMutex, std::defer_lock);
348 std::vector<event> MutableDepEvents;
349 const std::vector<event> &ExpandedDepEvents =
357 MemOpFunc(MemOpArgs...,
getPIEvents(ExpandedDepEvents),
364 MemOpFunc(MemOpArgs...,
getPIEvents(ExpandedDepEvents),
365 &EventImpl->getHandleRef(), EventImpl);
371 auto &EventToStoreIn =
373 EventToStoreIn = EventImpl;
385 std::string &Name, int32_t StreamID,
387 void *TraceEvent =
nullptr;
392 #ifdef XPTI_ENABLE_INSTRUMENTATION
393 constexpr uint16_t NotificationTraceType = xpti::trace_wait_begin;
394 if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
397 xpti::payload_t Payload;
398 bool HasSourceInfo =
false;
401 xpti::utils::StringHelper NG;
402 Name = NG.nameWithAddress<
queue_impl *>(
"queue.wait",
this);
409 HasSourceInfo =
true;
412 Payload = xpti::payload_t(Name.c_str(), (
void *)
this);
417 uint64_t QWaitInstanceNo = 0;
418 xpti::trace_event_data_t *WaitEvent =
419 xptiMakeEvent(Name.c_str(), &Payload, xpti::trace_graph_event,
420 xpti_at::active, &QWaitInstanceNo);
421 IId = QWaitInstanceNo;
432 DevStr =
"ACCELERATOR";
435 xpti::addMetadata(WaitEvent,
"sycl_device_type", DevStr);
437 xpti::addMetadata(WaitEvent,
"sym_function_name", CodeLoc.
functionName());
438 xpti::addMetadata(WaitEvent,
"sym_source_file_name", CodeLoc.
fileName());
439 xpti::addMetadata(WaitEvent,
"sym_line_no",
440 static_cast<int32_t
>((CodeLoc.
lineNumber())));
441 xpti::addMetadata(WaitEvent,
"sym_column_no",
444 xptiNotifySubscribers(StreamID, xpti::trace_wait_begin,
nullptr, WaitEvent,
446 static_cast<const void *
>(Name.c_str()));
447 TraceEvent = (
void *)WaitEvent;
454 int32_t StreamID, uint64_t IId) {
455 (void)TelemetryEvent;
459 #ifdef XPTI_ENABLE_INSTRUMENTATION
460 constexpr uint16_t NotificationTraceType = xpti::trace_wait_end;
461 if (!(xptiCheckTraceEnabled(StreamID, NotificationTraceType) &&
465 xpti::trace_event_data_t *TraceEvent =
466 (xpti::trace_event_data_t *)TelemetryEvent;
467 xptiNotifySubscribers(StreamID, NotificationTraceType,
nullptr, TraceEvent,
468 IId,
static_cast<const void *
>(Name.c_str()));
474 #ifdef XPTI_ENABLE_INSTRUMENTATION
475 void *TelemetryEvent =
nullptr;
484 "wait cannot be called for a queue which is "
485 "recording to a command graph.");
488 std::vector<std::weak_ptr<event_impl>> WeakEvents;
489 std::vector<event> SharedEvents;
491 std::lock_guard<std::mutex> Lock(
MMutex);
501 for (
auto EventImplWeakPtrIt = WeakEvents.rbegin();
502 EventImplWeakPtrIt != WeakEvents.rend(); ++EventImplWeakPtrIt) {
503 if (std::shared_ptr<event_impl> EventImplSharedPtr =
504 EventImplWeakPtrIt->lock()) {
507 if (!SupportsPiFinish ||
nullptr == EventImplSharedPtr->getHandleRef()) {
508 EventImplSharedPtr->wait(EventImplSharedPtr);
512 if (SupportsPiFinish) {
515 assert(SharedEvents.empty() &&
"Queues that support calling piQueueFinish "
516 "shouldn't have shared events");
518 for (
event &Event : SharedEvents)
522 std::vector<EventImplPtr> StreamsServiceEvents;
533 ExternalEvent->wait();
535 #ifdef XPTI_ENABLE_INSTRUMENTATION
560 std::lock_guard<std::mutex> Lock(
MMutex);
562 MLastEventPtr->get_info<info::event::command_execution_status>() ==
578 std::lock_guard<std::mutex> Lock(
MMutex);
580 if (Event.get_info<info::event::command_execution_status>() !=
584 for (
auto EventImplWeakPtrIt =
MEventsWeak.begin();
585 EventImplWeakPtrIt !=
MEventsWeak.end(); ++EventImplWeakPtrIt)
586 if (std::shared_ptr<event_impl> EventImplSharedPtr =
587 EventImplWeakPtrIt->lock())
588 if (EventImplSharedPtr->is_host() &&
590 ->get_info<info::event::command_execution_status>() !=
The context class represents a SYCL context on which kernel functions may be executed.
The Command class represents some action that needs to be performed on one or more memory objects.
static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, pi_mem_advice Advice, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, void *DstMem, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, const void *SrcMem, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void copy_from_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, const std::vector< sycl::detail::pi::PiEvent > &DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector< sycl::detail::pi::PiEvent > DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl)
static Scheduler & getInstance()
void cleanUpCmdFusion(sycl::detail::queue_impl *Queue)
static bool isInstanceAlive()
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
event discard_or_return(const event &Event)
const bool MSupportsDiscardingPiEvents
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
std::vector< EventImplPtr > MStreamsServiceEvents
std::optional< event > popExternalEvent()
static std::atomic< unsigned long long > MNextAvailableQueueID
void addEvent(const event &Event)
Stores an event that should be associated with the queue.
std::vector< sycl::detail::pi::PiQueue > MQueues
List of queues created for FPGA device from a single SYCL queue.
pi_native_handle getNative(int32_t &NativeHandleDesc) const
Gets the native handle of the SYCL queue.
unsigned long long MQueueID
std::vector< std::weak_ptr< event_impl > > MEventsWeak
These events are tracked, but not owned, by the queue.
event submitMemOpHelper(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs)
Performs submission of a memory operation directly if scheduler can be bypassed, or with a handler ot...
std::vector< event > MEventsShared
Events without data dependencies (such as USM) need an owner, additionally, USM operations are not ad...
std::mutex MMutex
Protects all the fields that can be changed by class' methods.
void cleanup_fusion_cmd()
device get_device() const
event submitWithHandler(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc)
Helper function for submitting a memory operation with a handler.
void addSharedEvent(const event &Event)
queue_impl.addEvent tracks events with weak pointers but some events have no other owners.
const ContextImplPtr MContext
sycl::detail::pi::PiQueue & getHandleRef()
event memset(const std::shared_ptr< queue_impl > &Self, void *Ptr, int Value, size_t Count, const std::vector< event > &DepEvents)
Fills the memory pointed by a USM pointer with the value specified.
const PluginPtr & getPlugin() const
event mem_advise(const std::shared_ptr< queue_impl > &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector< event > &DepEvents)
Provides additional information to the underlying runtime about how different allocations are used.
event memcpyToDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
bool ext_oneapi_empty() const
event memcpyFromDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
event memcpy(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *Src, size_t Count, const std::vector< event > &DepEvents, const code_location &CodeLoc)
Copies data from one memory region to another, both pointed by USM pointers.
const bool MDiscardEvents
void wait(const detail::code_location &Loc={})
Performs a blocking wait for the completion of all enqueued tasks in the queue.
bool MEmulateOOO
Indicates that a native out-of-order queue could not be created and we need to emulate it with multip...
void instrumentationEpilog(void *TelementryEvent, std::string &Name, int32_t StreamID, uint64_t IId)
EventImplPtr MGraphLastEventPtr
const ContextImplPtr & getContextImplPtr() const
void * instrumentationProlog(const detail::code_location &CodeLoc, std::string &Name, int32_t StreamID, uint64_t &iid)
std::mutex MStreamsServiceEventsMutex
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
const std::vector< event > & getExtendDependencyList(const std::vector< event > &DepEvents, std::vector< event > &MutableVec, std::unique_lock< std::mutex > &QueueLock)
EventImplPtr MLastEventPtr
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
bool is_accelerator() const
Check if device is an accelerator device.
bool is_gpu() const
Check if device is a GPU device.
bool is_cpu() const
Check if device is a CPU device.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Command group handler class.
void depends_on(event Event)
Registers event dependencies on this command group.
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr const char * SYCL_STREAM_NAME
static event createDiscardedEvent()
static std::vector< sycl::detail::pi::PiEvent > getPIEvents(const std::vector< sycl::event > &DepEvents)
static const PluginPtr & getPlugin(backend Backend)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
void report(const code_location &CodeLoc)
static event prepareSYCLEventAssociatedWithQueue(const std::shared_ptr< detail::queue_impl > &QueueImpl)
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static device_ext & get_device(unsigned int id)
Util function to get a device by id.
pi_result piQueueFinish(pi_queue command_queue)
uintptr_t pi_native_handle
@ PI_QUEUE_INFO_REFERENCE_COUNT
@ PI_EXT_ONEAPI_QUEUE_INFO_EMPTY
pi_result piextQueueGetNativeHandle(pi_queue queue, pi_native_handle *nativeHandle, int32_t *nativeHandleDesc)
Gets the native handle of a PI queue object.
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piQueueRetain(pi_queue command_queue)
C++ wrapper of extern "C" PI interfaces.
constexpr unsigned long columnNumber() const noexcept
constexpr const char * fileName() const noexcept
constexpr const char * functionName() const noexcept
constexpr unsigned long lineNumber() const noexcept