39 #ifdef XPTI_ENABLE_INSTRUMENTATION
40 #include "xpti/xpti_trace_framework.hpp"
45 inline namespace _V1 {
49 namespace ext::oneapi::experimental::detail {
77 context{createSyclObjFromImpl<device>(Device), {}, {}});
80 Device->get_platform().ext_oneapi_get_default_context());
81 if (DefaultContext->isDeviceValid(Device))
82 return DefaultContext;
84 context{createSyclObjFromImpl<device>(Device), {}, {}});
118 if (has_property<property::queue::enable_profiling>()) {
119 if (has_property<ext::oneapi::property::queue::discard_events>())
121 "Queue cannot be constructed with both of "
122 "discard_events and enable_profiling.");
124 if (
MDevice->has(aspect::queue_profiling)) {
132 "Cannot enable profiling, the associated device "
133 "does not have the queue_profiling aspect");
136 if (has_property<ext::intel::property::queue::compute_index>()) {
137 int Idx = get_property<ext::intel::property::queue::compute_index>()
140 createSyclObjFromImpl<device>(Device)
141 .get_info<ext::intel::info::device::max_compute_queue_indices>();
142 if (Idx < 0 || Idx >= NumIndices)
145 "Queue compute index must be a non-negative number less than "
146 "device's number of available compute queue indices.");
151 ext::codeplay::experimental::info::device::supports_fusion>()) {
154 "Cannot enable fusion if device does not support fusion");
156 if (!Context->isDeviceValid(Device)) {
158 throw sycl::invalid_object_error(
159 "Queue cannot be constructed with the given context and device "
160 "since the device is not a member of the context (descendants of "
161 "devices from the context are not supported on OpenCL yet).",
162 PI_ERROR_INVALID_DEVICE);
163 throw sycl::invalid_object_error(
164 "Queue cannot be constructed with the given context and device "
165 "since the device is neither a member of the context nor a "
166 "descendant of its member.",
167 PI_ERROR_INVALID_DEVICE);
179 #if XPTI_ENABLE_INSTRUMENTATION
180 constexpr uint16_t NotificationTraceType =
181 static_cast<uint16_t
>(xpti::trace_point_type_t::queue_create);
184 XPTIScope PrepareNotify((
void *)
this, NotificationTraceType,
187 if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
188 NotificationTraceType)) {
193 PrepareNotify.addMetadata([&](
auto TEvent) {
194 xpti::addMetadata(TEvent,
"sycl_context",
195 reinterpret_cast<size_t>(
MContext->getHandleRef()));
197 xpti::addMetadata(TEvent,
"sycl_device_name",
200 TEvent,
"sycl_device",
201 reinterpret_cast<size_t>(
204 xpti::addMetadata(TEvent,
"is_inorder",
MIsInorder);
205 xpti::addMetadata(TEvent,
"queue_id",
MQueueID);
207 xpti::addMetadata(TEvent,
"queue_handle",
211 xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueueID);
212 PrepareNotify.notify();
221 if (has_property<ext::oneapi::property::queue::discard_events>() &&
222 has_property<property::queue::enable_profiling>()) {
224 "Queue cannot be constructed with both of "
225 "discard_events and enable_profiling.");
239 "Device provided by native Queue not found in Context.");
245 #if XPTI_ENABLE_INSTRUMENTATION
246 constexpr uint16_t NotificationTraceType =
247 static_cast<uint16_t
>(xpti::trace_point_type_t::queue_create);
248 XPTIScope PrepareNotify((
void *)
this, NotificationTraceType,
250 if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
251 NotificationTraceType)) {
258 PrepareNotify.addMetadata([&](
auto TEvent) {
259 xpti::addMetadata(TEvent,
"sycl_context",
260 reinterpret_cast<size_t>(
MContext->getHandleRef()));
262 xpti::addMetadata(TEvent,
"sycl_device_name",
265 TEvent,
"sycl_device",
266 reinterpret_cast<size_t>(
269 xpti::addMetadata(TEvent,
"is_inorder",
MIsInorder);
270 xpti::addMetadata(TEvent,
"queue_id",
MQueueID);
272 xpti::addMetadata(TEvent,
"queue_handle",
getHandleRef());
275 xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueueID);
276 PrepareNotify.notify();
326 #if XPTI_ENABLE_INSTRUMENTATION
327 constexpr uint16_t NotificationTraceType =
328 static_cast<uint16_t
>(xpti::trace_point_type_t::queue_destroy);
329 if (xptiCheckTraceEnabled(
MStreamID, NotificationTraceType)) {
331 xptiNotifySubscribers(
MStreamID, NotificationTraceType,
nullptr,
334 static_cast<const void *
>(
"queue_destroy"));
335 xptiReleaseEvent((xpti::trace_event_data_t *)
MTraceEvent);
348 throw invalid_object_error(
349 "This instance of queue doesn't support OpenCL interoperability",
350 PI_ERROR_INVALID_QUEUE);
353 return pi::cast<cl_command_queue>(
MQueues[0]);
358 return createSyclObjFromImpl<context>(
MContext);
383 template <
typename Param>
typename Param::return_type
get_info()
const;
388 template <
typename Param>
407 const std::shared_ptr<queue_impl> &Self,
408 const std::shared_ptr<queue_impl> &SecondQueue,
413 ResEvent =
submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess);
415 ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue,
430 const std::shared_ptr<queue_impl> &Self,
433 auto ResEvent =
submit_impl(CGF, Self, Self,
nullptr, Loc, PostProcess);
466 std::lock_guard<std::mutex> Lock(
MMutex);
472 if (Exceptions.
size())
488 if (PropList.
has_property<property::queue::enable_profiling>()) {
492 ext::oneapi::cuda::property::queue::use_default_stream>()) {
495 if (PropList.
has_property<ext::oneapi::property::queue::discard_events>()) {
501 bool PrioritySeen =
false;
503 .has_property<ext::oneapi::property::queue::priority_normal>()) {
507 if (PropList.
has_property<ext::oneapi::property::queue::priority_low>()) {
511 "Queue cannot be constructed with different priorities.");
516 if (PropList.
has_property<ext::oneapi::property::queue::priority_high>()) {
520 "Queue cannot be constructed with different priorities.");
525 bool SubmissionSeen =
false;
527 ext::intel::property::queue::no_immediate_command_list>()) {
528 SubmissionSeen =
true;
532 ext::intel::property::queue::immediate_command_list>()) {
533 if (SubmissionSeen) {
536 "Queue cannot be constructed with different submission modes.");
538 SubmissionSeen =
true;
541 return CreationFlags;
556 if (has_property<ext::intel::property::queue::compute_index>()) {
557 int Idx = get_property<ext::intel::property::queue::compute_index>()
569 if (!
MEmulateOOO && Error == PI_ERROR_INVALID_QUEUE_PROPERTIES) {
573 Plugin->checkPiResult(Error);
583 bool ReuseQueue =
false;
585 std::lock_guard<std::mutex> Lock(
MMutex);
641 event memset(
const std::shared_ptr<queue_impl> &Self,
void *Ptr,
int Value,
642 size_t Count,
const std::vector<event> &DepEvents);
653 event memcpy(
const std::shared_ptr<queue_impl> &Self,
void *Dest,
654 const void *Src,
size_t Count,
655 const std::vector<event> &DepEvents,
667 event mem_advise(
const std::shared_ptr<queue_impl> &Self,
const void *Ptr,
669 const std::vector<event> &DepEvents);
675 std::lock_guard<std::mutex> Lock(
MMutex);
700 std::hash<
typename std::shared_ptr<queue_impl>::element_type *>()(
705 void *DeviceGlobalPtr,
const void *Src,
706 bool IsDeviceImageScope,
size_t NumBytes,
708 const std::vector<event> &DepEvents);
710 void *Dest,
const void *DeviceGlobalPtr,
711 bool IsDeviceImageScope,
size_t NumBytes,
713 const std::vector<event> &DepEvents);
718 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
719 std::lock_guard<std::mutex> Lock(
MMutex);
724 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
738 std::optional<event> Result = std::nullopt;
743 const std::vector<event> &
745 std::vector<event> &MutableVec,
746 std::unique_lock<std::mutex> &QueueLock);
754 template <
typename HandlerType = handler>
759 std::lock_guard<std::mutex> Lock{
MMutex};
767 auto &EventToBuildDeps =
769 if (EventToBuildDeps)
771 createSyclObjFromImpl<sycl::event>(EventToBuildDeps));
778 Handler.depends_on(*ExternalEvent);
780 EventRet = Handler.finalize();
783 EventRet = Handler.finalize();
797 const std::shared_ptr<queue_impl> &Self,
798 const std::shared_ptr<queue_impl> &PrimaryQueue,
799 const std::shared_ptr<queue_impl> &SecondaryQueue,
803 thread_local
static bool PreventSubmit =
false;
808 "Calls to sycl::queue::submit cannot be nested. Command group "
809 "function objects should use the sycl::handler API instead.");
813 Handler.saveCodeLoc(Loc);
814 PreventSubmit =
true;
818 PreventSubmit =
false;
821 PreventSubmit =
false;
827 event Event = detail::createSyclObjFromImpl<event>(
828 std::make_shared<detail::event_impl>());
832 bool KernelUsesAssert =
false;
836 KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) &&
838 Handler.MKernelName.
c_str());
841 (*PostProcess)(IsKernel, KernelUsesAssert, Event);
854 template <
typename HandlerFuncT>
856 const std::vector<event> &DepEvents,
857 HandlerFuncT HandlerFunc);
872 template <
typename HandlerFuncT,
typename MemMngrFuncT,
873 typename... MemMngrArgTs>
875 const std::vector<event> &DepEvents,
876 HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc,
877 MemMngrArgTs... MemOpArgs);
882 std::string &Name, int32_t StreamID,
886 int32_t StreamID, uint64_t IId);
918 std::vector<sycl::detail::pi::PiQueue>
MQueues;
974 std::weak_ptr<ext::oneapi::experimental::detail::graph_impl>
MGraph{};
The context class represents a SYCL context on which kernel functions may be executed.
CGTYPE
Type of the command group.
ThreadPool & getHostTaskThreadPool()
static GlobalHandler & instance()
static ProgramManager & getInstance()
bool kernelUsesAssert(const std::string &KernelName) const
bool isInFusionMode(QueueIdT Queue)
static Scheduler & getInstance()
event discard_or_return(const event &Event)
const bool MSupportsDiscardingPiEvents
const property_list MPropList
bool is_in_fusion_mode()
Check whether the queue is in fusion mode.
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &SecondQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
uint64_t MInstanceID
The instance ID of the trace event for queue object.
std::vector< EventImplPtr > MStreamsServiceEvents
void wait_and_throw(const detail::code_location &Loc={})
Param::return_type get_info() const
Queries SYCL queue for information.
std::optional< event > MInOrderExternalEvent
std::optional< event > popExternalEvent()
sycl::detail::pi::PiQueue createQueue(QueueOrder Order)
Creates PI queue.
void registerStreamServiceEvent(const EventImplPtr &Event)
static std::atomic< unsigned long long > MNextAvailableQueueID
void addEvent(const event &Event)
Stores an event that should be associated with the queue.
std::vector< sycl::detail::pi::PiQueue > MQueues
List of queues created for FPGA device from a single SYCL queue.
pi_native_handle getNative(int32_t &NativeHandleDesc) const
Gets the native handle of the SYCL queue.
sycl::detail::pi::PiQueue & getExclusiveQueueHandleRef()
unsigned long long MQueueID
std::vector< std::weak_ptr< event_impl > > MEventsWeak
These events are tracked, but not owned, by the queue.
event submitMemOpHelper(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs)
Performs submission of a memory operation directly if scheduler can be bypassed, or with a handler ot...
bool has_property() const noexcept
event submit(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess=nullptr)
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
std::vector< event > MEventsShared
Events without data dependencies (such as USM) need an owner, additionally, USM operations are not ad...
std::mutex MMutex
Protects all the fields that can be changed by class' methods.
void setCommandGraph(std::shared_ptr< ext::oneapi::experimental::detail::graph_impl > Graph)
void cleanup_fusion_cmd()
queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler)
Constructs a SYCL queue from plugin interoperability handle.
device get_device() const
ThreadPool & getThreadPool()
bool supportsDiscardingPiEvents() const
event submitWithHandler(const std::shared_ptr< queue_impl > &Self, const std::vector< event > &DepEvents, HandlerFuncT HandlerFunc)
Helper function for submitting a memory operation with a handler.
std::function< void(bool, bool, event &)> SubmitPostProcessF
void addSharedEvent(const event &Event)
queue_impl.addEvent tracks events with weak pointers but some events have no other owners.
const ContextImplPtr MContext
void finalizeHandler(HandlerType &Handler, event &EventRet)
void setExternalEvent(const event &Event)
std::mutex MInOrderExternalEventMtx
sycl::detail::pi::PiQueue & getHandleRef()
void throw_asynchronous()
Performs a blocking wait for the completion of all enqueued tasks in the queue.
event memset(const std::shared_ptr< queue_impl > &Self, void *Ptr, int Value, size_t Count, const std::vector< event > &DepEvents)
Fills the memory pointed by a USM pointer with the value specified.
queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue with an async_handler and property_list provided form a device and a context.
const PluginPtr & getPlugin() const
event submit_impl(const std::function< void(handler &)> &CGF, const std::shared_ptr< queue_impl > &Self, const std::shared_ptr< queue_impl > &PrimaryQueue, const std::shared_ptr< queue_impl > &SecondaryQueue, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess)
Performs command group submission to the queue.
const DeviceImplPtr & getDeviceImplPtr() const
exception_list MExceptions
event mem_advise(const std::shared_ptr< queue_impl > &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector< event > &DepEvents)
Provides additional information to the underlying runtime about how different allocations are used.
const async_handler MAsyncHandler
event memcpyToDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
exception_list getExceptionList() const
bool ext_oneapi_empty() const
event memcpyFromDeviceGlobal(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector< event > &DepEvents)
event memcpy(const std::shared_ptr< queue_impl > &Self, void *Dest, const void *Src, size_t Count, const std::vector< event > &DepEvents, const code_location &CodeLoc)
Copies data from one memory region to another, both pointed by USM pointers.
const bool MDiscardEvents
uint8_t MStreamID
The stream under which the traces are emitted from the queue object.
void reportAsyncException(const std::exception_ptr &ExceptionPtr)
Puts exception to the list of asynchronous ecxeptions.
context get_context() const
void wait(const detail::code_location &Loc={})
Performs a blocking wait for the completion of all enqueued tasks in the queue.
std::shared_ptr< ext::oneapi::experimental::detail::graph_impl > getCommandGraph() const
bool isProfilingFallback()
bool MEmulateOOO
Indicates that a native out-of-order queue could not be created and we need to emulate it with multip...
void instrumentationEpilog(void *TelementryEvent, std::string &Name, int32_t StreamID, uint64_t IId)
EventImplPtr MGraphLastEventPtr
static sycl::detail::pi::PiQueueProperties createPiQueueProperties(const property_list &PropList, QueueOrder Order)
Creates PI properties array.
const ContextImplPtr & getContextImplPtr() const
void * instrumentationProlog(const detail::code_location &CodeLoc, std::string &Name, int32_t StreamID, uint64_t &iid)
size_t MNextQueueIdx
Iterator through MQueues.
std::mutex MStreamsServiceEventsMutex
const bool MIsProfilingEnabled
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
Param::return_type get_backend_info() const
Queries SYCL queue for SYCL backend-specific information.
const std::vector< event > & getExtendDependencyList(const std::vector< event > &DepEvents, std::vector< event > &MutableVec, std::unique_lock< std::mutex > &QueueLock)
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue from a device using an async_handler and property_list provided.
static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device)
EventImplPtr MLastEventPtr
unsigned long long getQueueID()
queue_impl(sycl::detail::pi::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList)
Constructs a SYCL queue from plugin interoperability handle.
propertyT get_property() const
const char * c_str() const noexcept
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
A list of asynchronous exceptions.
Implementation of node class from SYCL_EXT_ONEAPI_GRAPH.
Command group handler class.
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
PropT get_property() const
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
#define __SYCL_PI_CUDA_USE_DEFAULT_STREAM
::pi_queue_properties PiQueueProperties
CUDAContextT
Possible CUDA context types supported by PI CUDA backend TODO: Implement this as a property once ther...
constexpr const char * SYCL_STREAM_NAME
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
std::shared_ptr< event_impl > EventImplPtr
std::shared_ptr< plugin > PluginPtr
std::shared_ptr< device_impl > DeviceImplPtr
static constexpr size_t MaxNumQueues
Sets max number of queues supported by FPGA RT.
constexpr CUDAContextT DefaultContextType
Default context type created for CUDA backend.
constexpr auto memory_order_relaxed
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX
pi_result piQueueFinish(pi_queue command_queue)
uintptr_t pi_native_handle
constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE
constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE
pi_result piextQueueCreate(pi_context context, pi_device device, pi_queue_properties *properties, pi_queue *queue)
pi_result piQueueRelease(pi_queue command_queue)
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH
constexpr pi_queue_properties PI_QUEUE_FLAGS
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
pi_result piQueueRetain(pi_queue command_queue)
_Abi const simd< _Tp, _Abi > & noexcept