40 #ifdef XPTI_ENABLE_INSTRUMENTATION
41 #include "xpti/xpti_trace_framework.hpp"
46 inline namespace _V1 {
50 namespace ext::oneapi::experimental::detail {
78 context{createSyclObjFromImpl<device>(Device), {}, {}});
81 Device->get_platform().ext_oneapi_get_default_context());
82 if (DefaultContext->isDeviceValid(Device))
83 return DefaultContext;
85 context{createSyclObjFromImpl<device>(Device), {}, {}});
117 if (has_property<property::queue::enable_profiling>()) {
118 if (has_property<ext::oneapi::property::queue::discard_events>())
120 "Queue cannot be constructed with both of "
121 "discard_events and enable_profiling.");
123 if (
MDevice->has(aspect::queue_profiling)) {
130 "Cannot enable profiling, the associated device "
131 "does not have the queue_profiling aspect");
134 if (has_property<ext::intel::property::queue::compute_index>()) {
135 int Idx = get_property<ext::intel::property::queue::compute_index>()
138 createSyclObjFromImpl<device>(Device)
139 .get_info<ext::intel::info::device::max_compute_queue_indices>();
140 if (Idx < 0 || Idx >= NumIndices)
143 "Queue compute index must be a non-negative number less than "
144 "device's number of available compute queue indices.");
149 ext::codeplay::experimental::info::device::supports_fusion>()) {
152 "Cannot enable fusion if device does not support fusion");
154 if (!Context->isDeviceValid(Device)) {
158 "Queue cannot be constructed with the given context and device "
159 "since the device is not a member of the context (descendants of "
160 "devices from the context are not supported on OpenCL yet).");
163 "Queue cannot be constructed with the given context and device "
164 "since the device is neither a member of the context nor a "
165 "descendant of its member.");
176 #if XPTI_ENABLE_INSTRUMENTATION
177 constexpr uint16_t NotificationTraceType =
178 static_cast<uint16_t
>(xpti::trace_point_type_t::queue_create);
181 XPTIScope PrepareNotify((
void *)
this, NotificationTraceType,
184 if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
185 NotificationTraceType)) {
190 PrepareNotify.addMetadata([&](
auto TEvent) {
191 xpti::addMetadata(TEvent,
"sycl_context",
192 reinterpret_cast<size_t>(
MContext->getHandleRef()));
194 xpti::addMetadata(TEvent,
"sycl_device_name",
196 xpti::addMetadata(TEvent,
"sycl_device",
197 reinterpret_cast<size_t>(
MDevice->getHandleRef()));
199 xpti::addMetadata(TEvent,
"is_inorder",
MIsInorder);
200 xpti::addMetadata(TEvent,
"queue_id",
MQueueID);
201 xpti::addMetadata(TEvent,
"queue_handle",
205 xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueueID);
206 PrepareNotify.notify();
215 if (has_property<ext::oneapi::property::queue::discard_events>() &&
216 has_property<property::queue::enable_profiling>()) {
218 "Queue cannot be constructed with both of "
219 "discard_events and enable_profiling.");
233 "Device provided by native Queue not found in Context.");
239 #if XPTI_ENABLE_INSTRUMENTATION
240 constexpr uint16_t NotificationTraceType =
241 static_cast<uint16_t
>(xpti::trace_point_type_t::queue_create);
242 XPTIScope PrepareNotify((
void *)
this, NotificationTraceType,
244 if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
245 NotificationTraceType)) {
252 PrepareNotify.addMetadata([&](
auto TEvent) {
253 xpti::addMetadata(TEvent,
"sycl_context",
254 reinterpret_cast<size_t>(
MContext->getHandleRef()));
256 xpti::addMetadata(TEvent,
"sycl_device_name",
258 xpti::addMetadata(TEvent,
"sycl_device",
259 reinterpret_cast<size_t>(
MDevice->getHandleRef()));
261 xpti::addMetadata(TEvent,
"is_inorder",
MIsInorder);
262 xpti::addMetadata(TEvent,
"queue_id",
MQueueID);
263 xpti::addMetadata(TEvent,
"queue_handle",
getHandleRef());
266 xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueueID);
267 PrepareNotify.notify();
313 #if XPTI_ENABLE_INSTRUMENTATION
314 constexpr uint16_t NotificationTraceType =
315 static_cast<uint16_t
>(xpti::trace_point_type_t::queue_destroy);
316 if (xptiCheckTraceEnabled(
MStreamID, NotificationTraceType)) {
318 xptiNotifySubscribers(
MStreamID, NotificationTraceType,
nullptr,
321 static_cast<const void *
>(
"queue_destroy"));
322 xptiReleaseEvent((xpti::trace_event_data_t *)
MTraceEvent);
328 }
catch (std::exception &e) {
336 return pi::cast<cl_command_queue>(
MQueues[0]);
341 return createSyclObjFromImpl<context>(
MContext);
364 template <
typename Param>
typename Param::return_type
get_info()
const;
369 template <
typename Param>
378 "flush cannot be called for a queue which is "
379 "recording to a command graph.");
402 const std::shared_ptr<queue_impl> &Self,
403 const std::shared_ptr<queue_impl> &SecondQueue,
408 ResEvent =
submit_impl(CGF, Self, Self, SecondQueue,
409 true, Loc, PostProcess);
412 SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue,
413 true, Loc, PostProcess);
427 const std::shared_ptr<queue_impl> &Self,
430 auto ResEvent =
submit_impl(CGF, Self, Self,
nullptr,
431 true, Loc, PostProcess);
436 const std::shared_ptr<queue_impl> &Self,
472 std::lock_guard<std::mutex> Lock(
MMutex);
478 if (Exceptions.
size())
494 if (PropList.
has_property<property::queue::enable_profiling>()) {
498 ext::oneapi::cuda::property::queue::use_default_stream>()) {
501 if (PropList.
has_property<ext::oneapi::property::queue::discard_events>()) {
507 bool PrioritySeen =
false;
509 .has_property<ext::oneapi::property::queue::priority_normal>()) {
513 if (PropList.
has_property<ext::oneapi::property::queue::priority_low>()) {
517 "Queue cannot be constructed with different priorities.");
522 if (PropList.
has_property<ext::oneapi::property::queue::priority_high>()) {
526 "Queue cannot be constructed with different priorities.");
531 bool SubmissionSeen =
false;
533 ext::intel::property::queue::no_immediate_command_list>()) {
534 SubmissionSeen =
true;
538 ext::intel::property::queue::immediate_command_list>()) {
539 if (SubmissionSeen) {
542 "Queue cannot be constructed with different submission modes.");
544 SubmissionSeen =
true;
547 return CreationFlags;
562 if (has_property<ext::intel::property::queue::compute_index>()) {
563 int Idx = get_property<ext::intel::property::queue::compute_index>()
575 if (!
MEmulateOOO && Error == PI_ERROR_INVALID_QUEUE_PROPERTIES) {
579 Plugin->checkPiResult(Error);
589 bool ReuseQueue =
false;
591 std::lock_guard<std::mutex> Lock(
MMutex);
648 event memset(
const std::shared_ptr<queue_impl> &Self,
void *Ptr,
int Value,
649 size_t Count,
const std::vector<event> &DepEvents,
650 bool CallerNeedsEvent);
662 event memcpy(
const std::shared_ptr<queue_impl> &Self,
void *Dest,
663 const void *Src,
size_t Count,
664 const std::vector<event> &DepEvents,
bool CallerNeedsEvent,
677 event mem_advise(
const std::shared_ptr<queue_impl> &Self,
const void *Ptr,
679 const std::vector<event> &DepEvents,
bool CallerNeedsEvent);
685 std::lock_guard<std::mutex> Lock(
MMutex);
710 std::hash<
typename std::shared_ptr<queue_impl>::element_type *>()(
715 void *DeviceGlobalPtr,
const void *Src,
716 bool IsDeviceImageScope,
size_t NumBytes,
717 size_t Offset,
const std::vector<event> &DepEvents,
718 bool CallerNeedsEvent);
720 void *Dest,
const void *DeviceGlobalPtr,
721 bool IsDeviceImageScope,
size_t NumBytes,
723 const std::vector<event> &DepEvents,
724 bool CallerNeedsEvent);
729 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
730 std::lock_guard<std::mutex> Lock(
MMutex);
735 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
749 std::optional<event> Result = std::nullopt;
754 const std::vector<event> &
756 std::vector<event> &MutableVec,
757 std::unique_lock<std::mutex> &QueueLock);
772 return Queue ? Queue->getContextImplPtr() :
nullptr;
777 const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
787 template <
typename HandlerType = handler>
789 auto ResEvent = std::make_shared<detail::event_impl>(Handler.MQueue);
791 Handler.MQueue->getHandleRef(), 0,
nullptr, &ResEvent->getHandleRef());
796 template <
typename HandlerType = handler>
801 std::lock_guard<std::mutex> Lock{
MMutex};
813 if (EventToBuildDeps) {
819 if (EventToBuildDeps->isDiscarded() &&
823 if (!EventToBuildDeps->isDiscarded())
824 Handler.depends_on(EventToBuildDeps);
832 Handler.depends_on(*ExternalEvent);
834 EventRet = Handler.finalize();
838 std::lock_guard<std::mutex> Lock{
MMutex};
852 Handler.depends_on(Deps.UnenqueuedCmdEvents);
854 if (Deps.LastBarrier)
855 Handler.depends_on(Deps.LastBarrier);
856 EventRet = Handler.finalize();
859 Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
860 else if (!EventRetImpl->isEnqueued()) {
862 Deps.LastBarrier = EventRetImpl;
863 Deps.UnenqueuedCmdEvents.clear();
865 Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
883 const std::shared_ptr<queue_impl> &Self,
884 const std::shared_ptr<queue_impl> &PrimaryQueue,
885 const std::shared_ptr<queue_impl> &SecondaryQueue,
894 template <
typename HandlerFuncT>
896 const std::vector<event> &DepEvents,
897 HandlerFuncT HandlerFunc);
914 template <
typename HandlerFuncT,
typename MemMngrFuncT,
915 typename... MemMngrArgTs>
917 const std::vector<event> &DepEvents,
918 bool CallerNeedsEvent, HandlerFuncT HandlerFunc,
919 MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs);
924 std::string &Name, int32_t StreamID,
928 int32_t StreamID, uint64_t IId);
960 std::vector<sycl::detail::pi::PiQueue>
MQueues;
1018 std::weak_ptr<ext::oneapi::experimental::detail::graph_impl>
MGraph{};
1023 std::deque<std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>>
The context class represents a SYCL context on which kernel functions may be executed.
ThreadPool & getHostTaskThreadPool()
static GlobalHandler & instance()
bool isInFusionMode(QueueIdT Queue)
static Scheduler & getInstance()
event discard_or_return(const event &Event)
std::deque< std::shared_ptr< ext::oneapi::experimental::detail::graph_impl > > MMissedCleanupRequests
const property_list MPropList
bool hasDiscardEventsProperty() const
static ContextImplPtr getContext(const QueueImplPtr &Queue)
bool is_in_fusion_mode()
Check whether the queue is in fusion mode.
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask)
uint64_t MInstanceID
The instance ID of the trace event for queue object.
std::vector< EventImplPtr > MStreamsServiceEvents
const property_list & getPropList() const
void wait_and_throw(const detail::code_location &Loc={})
event submitMemOpHelper(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs)
Performs submission of a memory operation directly if scheduler can be bypassed, or with a handler ot...
Param::return_type get_info() const
Queries SYCL queue for information.
std::optional< event > MInOrderExternalEvent
std::optional< event > popExternalEvent()
sycl::detail::pi::PiQueue createQueue(QueueOrder Order)
Creates PI queue.
void registerStreamServiceEvent(const EventImplPtr &Event)
static std::atomic< unsigned long long > MNextAvailableQueueID
void addEvent(const event &Event)
Stores an event that should be associated with the queue.
std::vector< sycl::detail::pi::PiQueue > MQueues
List of queues created for FPGA device from a single SYCL queue.
pi_native_handle getNative(int32_t &NativeHandleDesc) const
Gets the native handle of the SYCL queue.
struct sycl::_V1::detail::queue_impl::DependencyTrackingItems MExtGraphDeps
sycl::detail::pi::PiQueue & getExclusiveQueueHandleRef()
void tryToResetEnqueuedBarrierDep(const EventImplPtr &EnqueuedBarrierEvent)
unsigned long long MQueueID
std::vector< std::weak_ptr< event_impl > > MEventsWeak
These events are tracked, but not owned, by the queue.
bool has_property() const noexcept
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
std::vector< event > MEventsShared
Events without data dependencies (such as USM) need an owner, additionally, USM operations are not ad...
std::mutex MMissedCleanupRequestsMtx
std::mutex MMutex
Protects all the fields that can be changed by class' methods.
void submit_without_event(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
void setCommandGraph(std::shared_ptr< ext::oneapi::experimental::detail::graph_impl > Graph)
void cleanup_fusion_cmd()
event memcpyToDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, bool CallerNeedsEvent)
queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler)
Constructs a SYCL queue from plugin interoperability handle.
device get_device() const
bool supportsDiscardingPiEvents() const
static ThreadPool & getThreadPool()
event submitWithHandler(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc)
Helper function for submitting a memory operation with a handler.
void doUnenqueuedCommandCleanup(const std::shared_ptr< ext::oneapi::experimental::detail::graph_impl > &Graph)
event mem_advise(const std::shared_ptr< queue_impl > &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector< event > &DepEvents, bool CallerNeedsEvent)
Provides additional information to the underlying runtime about how different allocations are used.
event submit_impl(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &PrimaryQueue, const std::shared_ptr< queue_impl > &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess)
Performs command group submission to the queue.
std::function< void(bool, bool, event &)> SubmitPostProcessF
event memcpyFromDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents, bool CallerNeedsEvent)
void addSharedEvent(const event &Event)
queue_impl.addEvent tracks events with weak pointers but some events have no other owners.
const ContextImplPtr MContext
void finalizeHandler(HandlerType &Handler, event &EventRet)
void setExternalEvent(const event &Event)
std::mutex MInOrderExternalEventMtx
sycl::detail::pi::PiQueue & getHandleRef()
void throw_asynchronous()
Performs a blocking wait for the completion of all enqueued tasks in the queue.
queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue with an async_handler and property_list provided form a device and a context.
void flush()
Provides a hint to the backend to execute previously issued commands on this queue.
const PluginPtr & getPlugin() const
const DeviceImplPtr & getDeviceImplPtr() const
exception_list MExceptions
const async_handler MAsyncHandler
exception_list getExceptionList() const
bool ext_oneapi_empty() const
const bool MDiscardEvents
uint8_t MStreamID
The stream under which the traces are emitted from the queue object.
void reportAsyncException(const std::exception_ptr &ExceptionPtr)
Puts exception to the list of asynchronous ecxeptions.
context get_context() const
void wait(const detail::code_location &Loc={})
Performs a blocking wait for the completion of all enqueued tasks in the queue.
std::shared_ptr< ext::oneapi::experimental::detail::graph_impl > getCommandGraph() const
bool isProfilingFallback()
struct sycl::_V1::detail::queue_impl::DependencyTrackingItems MDefaultGraphDeps
EventImplPtr insertHelperBarrier(const HandlerType &Handler)
bool MEmulateOOO
Indicates that a native out-of-order queue could not be created and we need to emulate it with multip...
void instrumentationEpilog(void *TelementryEvent, std::string &Name, int32_t StreamID, uint64_t IId)
static sycl::detail::pi::PiQueueProperties createPiQueueProperties(const property_list &PropList, QueueOrder Order)
Creates PI properties array.
const ContextImplPtr & getContextImplPtr() const
void * instrumentationProlog(const detail::code_location &CodeLoc, std::string &Name, int32_t StreamID, uint64_t &iid)
size_t MNextQueueIdx
Iterator through MQueues.
std::mutex MStreamsServiceEventsMutex
const bool MIsProfilingEnabled
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
event memset(const std::shared_ptr< queue_impl > &Self, void *Ptr, int Value, size_t Count, const std::vector< event > &DepEvents, bool CallerNeedsEvent)
Fills the memory pointed by a USM pointer with the value specified.
event memcpy(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *Src, size_t Count, const std::vector< event > &DepEvents, bool CallerNeedsEvent, const code_location &CodeLoc)
Copies data from one memory region to another, both pointed by USM pointers.
Param::return_type get_backend_info() const
Queries SYCL queue for SYCL backend-specific information.
const std::vector< event > & getExtendDependencyList(const std::vector< event > &DepEvents, std::vector< event > &MutableVec, std::unique_lock< std::mutex > &QueueLock)
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue from a device using an async_handler and property_list provided.
static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device)
unsigned long long getQueueID()
queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue from plugin interoperability handle.
propertyT get_property() const
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
A list of asynchronous exceptions.
Implementation of node class from SYCL_EXT_ONEAPI_GRAPH.
Command group handler class.
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
PropT get_property() const
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
#define __SYCL_PI_CUDA_USE_DEFAULT_STREAM
#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e)
::pi_queue_properties PiQueueProperties
CUDAContextT
Possible CUDA context types supported by PI CUDA backend TODO: Implement this as a property once ther...
decltype(Obj::impl) const & getSyclObjImpl(const Obj &SyclObject)
constexpr const char * SYCL_STREAM_NAME
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
std::shared_ptr< device_impl > DeviceImplPtr
static constexpr size_t MaxNumQueues
Sets max number of queues supported by FPGA RT.
CGType
Type of the command group.
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
constexpr CUDAContextT DefaultContextType
Default context type created for CUDA backend.
constexpr auto memory_order_relaxed
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX
pi_result piQueueFinish(pi_queue command_queue)
uintptr_t pi_native_handle
constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE
pi_result piEnqueueEventsWaitWithBarrier(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
pi_result piextQueueCreate(pi_context context, pi_device device, pi_queue_properties *properties, pi_queue *queue)
pi_result piQueueRelease(pi_queue command_queue)
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
constexpr pi_queue_properties PI_QUEUE_FLAGS
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
pi_result piQueueFlush(pi_queue command_queue)
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piQueueRetain(pi_queue command_queue)
_Abi const simd< _Tp, _Abi > & noexcept
std::vector< EventImplPtr > UnenqueuedCmdEvents
EventImplPtr LastEventPtr