PI queue mapping on to CUstream objects. More...
#include <cuda/pi_cuda.hpp>
Classes | |
struct | active_barriers |
struct | command_batch |
struct | pi_queue_group_t |
Public Types | |
using | native_type = CUstream |
using | native_type = hipStream_t |
using | queue_type = _pi_device::queue_group_info_t::type |
Static Public Attributes | |
static constexpr int | default_num_compute_streams = 128 |
static constexpr int | default_num_transfer_streams = 64 |
PI queue mapping on to CUstream objects.
PI queue mapping on to hipStream_t objects.
Definition at line 400 of file pi_cuda.hpp.
using _pi_queue::native_type = CUstream |
Definition at line 401 of file pi_cuda.hpp.
using _pi_queue::native_type = hipStream_t |
Definition at line 378 of file pi_hip.hpp.
Definition at line 565 of file pi_level_zero.hpp.
|
inline |
Definition at line 438 of file pi_cuda.hpp.
References context_, cuda_piContextRetain(), cuda_piDeviceRetain(), and device_.
Referenced by piextQueueCreate(), and piextQueueCreateWithNativeHandle().
|
inline |
Definition at line 456 of file pi_cuda.hpp.
References context_, cuda_piContextRelease(), cuda_piDeviceRelease(), and device_.
|
inline |
Definition at line 110 of file pi_esimd_emulator.hpp.
|
inline |
Definition at line 414 of file pi_hip.hpp.
References context_, device_, hip_piContextRetain(), and hip_piDeviceRetain().
|
inline |
Definition at line 432 of file pi_hip.hpp.
References context_, device_, hip_piContextRelease(), and hip_piDeviceRelease().
_pi_queue::_pi_queue | ( | std::vector< ze_command_queue_handle_t > & | ComputeQueues, |
std::vector< ze_command_queue_handle_t > & | CopyQueues, | ||
pi_context | Context, | ||
pi_device | Device, | ||
bool | OwnZeCommandQueue, | ||
pi_queue_properties | Properties = 0 , |
||
int | ForceComputeIndex = -1 |
||
) |
Definition at line 1101 of file pi_level_zero.cpp.
References CommandListMap, ComputeCommandBatch, ComputeQueueGroupsByTID, CopyCommandBatch, CopyQueueGroupsByTID, Device, sycl::_V1::detail::pi::die(), getRangeOfAllowedComputeEngines(), getRangeOfAllowedCopyEngines(), _pi_queue::command_batch::OpenCommandList, _pi_queue::command_batch::QueueBatchSize, _pi_device::QueueGroup, zeCommandListBatchConfig::startSize(), _pi_device::useImmediateCommandLists(), ZeCommandListBatchComputeConfig, ZeCommandListBatchCopyConfig, and _pi_queue::pi_queue_group_t::ZeQueues.
Definition at line 530 of file pi_level_zero.cpp.
References EventCaches, and _pi_event::isHostVisible().
Referenced by resetDiscardedEvent().
void _pi_queue::adjustBatchSizeForFullBatch | ( | bool | IsCopy | ) |
Definition at line 1533 of file pi_level_zero.cpp.
References ComputeCommandBatch, CopyCommandBatch, zeCommandListBatchConfig::dynamic(), zeCommandListBatchConfig::DynamicSizeMax, zeCommandListBatchConfig::DynamicSizeStep, zeCommandListBatchConfig::NumTimesClosedEarlyThreshold, zeCommandListBatchConfig::NumTimesClosedFullThreshold, ZeCommandListBatchComputeConfig, ZeCommandListBatchConfig(), ZeCommandListBatchCopyConfig, and zePrint().
Referenced by executeCommandList().
void _pi_queue::adjustBatchSizeForPartialBatch | ( | bool | IsCopy | ) |
Definition at line 1560 of file pi_level_zero.cpp.
References ComputeCommandBatch, CopyCommandBatch, zeCommandListBatchConfig::dynamic(), ZeCommandListBatchComputeConfig, ZeCommandListBatchConfig(), ZeCommandListBatchCopyConfig, and zePrint().
Referenced by executeOpenCommandList().
|
inline |
Definition at line 509 of file pi_cuda.hpp.
References all_of(), compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
Referenced by cuda_piQueueGetInfo(), and hip_piQueueGetInfo().
|
inline |
Definition at line 485 of file pi_hip.hpp.
References all_of(), compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
|
inlinenoexcept |
Definition at line 630 of file pi_cuda.hpp.
References has_ownership_.
Referenced by cuda_piQueueRelease().
|
inline |
Definition at line 487 of file pi_cuda.hpp.
References compute_stream_idx_, compute_streams_, has_been_synchronized(), and max().
Referenced by get_next_compute_stream().
|
inline |
Definition at line 463 of file pi_hip.hpp.
References compute_stream_idx_, compute_streams_, has_been_synchronized(), and max().
void _pi_queue::CaptureIndirectAccesses | ( | ) |
Definition at line 1586 of file pi_level_zero.cpp.
References _pi_platform::Contexts, Device, KernelsToBeSubmitted, and _pi_device::Platform.
Referenced by executeCommandList(), and piEnqueueKernelLaunch().
Definition at line 398 of file pi_cuda.cpp.
References barrier_event_, and compute_applied_barrier_.
Referenced by get_next_compute_stream().
void _pi_queue::compute_stream_wait_for_barrier_if_needed | ( | hipStream_t | stream, |
pi_uint32 | stream_i | ||
) |
Definition at line 433 of file pi_hip.cpp.
References barrier_event_, and compute_applied_barrier_.
pi_result _pi_queue::createCommandList | ( | bool | UseCopyEngine, |
pi_command_list_ptr_t & | CommandList, | ||
ze_command_queue_handle_t * | ForcedCmdQueue = nullptr |
||
) |
Definition at line 1502 of file pi_level_zero.cpp.
References CommandListMap, Context, Device, getQueueGroup(), insertActiveBarriers(), insertStartBarrierIfDiscardEventsMode(), PI_CALL, sycl::_V1::detail::tie(), ZE_CALL, _pi_context::ZeContext, and _pi_device::ZeDevice.
Referenced by _pi_context::getAvailableCommandList(), and piextQueueCreate().
|
inlinenoexcept |
Definition at line 624 of file pi_cuda.hpp.
References refCount_.
Referenced by cuda_piQueueRelease(), and hip_piQueueRelease().
|
inlinenoexcept |
Definition at line 600 of file pi_hip.hpp.
References refCount_.
bool _pi_queue::doReuseDiscardedEvents | ( | ) |
Definition at line 397 of file pi_level_zero.cpp.
References isDiscardEvents(), and isInOrderQueue().
Referenced by executeCommandList(), insertStartBarrierIfDiscardEventsMode(), and signalEventFromCmdListIfLastEventDiscarded().
pi_command_list_ptr_t _pi_queue::eventOpenCommandList | ( | pi_event | Event | ) |
Definition at line 1968 of file pi_level_zero.cpp.
References CommandListMap, ComputeCommandBatch, CopyCommandBatch, Device, hasOpenCommandList(), _pi_queue::command_batch::OpenCommandList, and _pi_device::useImmediateCommandLists().
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList().
|
inline |
Definition at line 789 of file pi_level_zero.hpp.
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList(), piQueueFinish(), and piQueueRelease().
pi_result _pi_queue::executeCommandList | ( | pi_command_list_ptr_t | CommandList, |
bool | IsBlocking = false , |
||
bool | OKToBatchCommand = false |
||
) |
Definition at line 1610 of file pi_level_zero.cpp.
References adjustBatchSizeForFullBatch(), CaptureIndirectAccesses(), _pi_event::CleanedUp, CommandListMap, ComputeCommandBatch, _pi_platform::ContextsMutex, CopyCommandBatch, createEventAndAssociateQueue(), Device, sycl::_V1::detail::pi::die(), doReuseDiscardedEvents(), zeCommandListBatchConfig::dynamic(), hasOpenCommandList(), Healthy, isBatchingAllowed(), _pi_event::IsDiscarded, LastCommandEvent, LastCommandInBatchHostVisible, LastUsedCommandList, PI_CALL, PI_COMMAND_TYPE_USER, piEventReleaseInternal(), _pi_device::Platform, PrintTrace, _pi_object::RefCount, resetDiscardedEvent(), signalEventFromCmdListIfLastEventDiscarded(), synchronize(), _pi_device::useImmediateCommandLists(), ZE_CALL, ZE_CALL_NOCHECK, ZeCommandListBatchComputeConfig, ZeCommandListBatchConfig(), ZeCommandListBatchCopyConfig, _pi_event::ZeEvent, ZeSerialize, and ZeSerializeBlock.
Referenced by enqueueMemCopyHelper(), enqueueMemCopyRectHelper(), enqueueMemFillHelper(), enqueueMemImageCommandHelper(), executeOpenCommandList(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueKernelLaunch(), piEnqueueMemBufferMap(), piEnqueueMemUnmap(), piextUSMEnqueueMemAdvise(), and piextUSMEnqueuePrefetch().
pi_result _pi_queue::executeOpenCommandList | ( | bool | IsCopy | ) |
Definition at line 2047 of file pi_level_zero.cpp.
References adjustBatchSizeForPartialBatch(), CommandListMap, ComputeCommandBatch, CopyCommandBatch, executeCommandList(), and hasOpenCommandList().
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList(), and _pi_context::getAvailableCommandList().
|
inline |
Definition at line 531 of file pi_cuda.hpp.
References compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
Referenced by cuda_piQueueRelease(), enqueueEventWait(), and hip_piQueueRelease().
|
inline |
Definition at line 507 of file pi_hip.hpp.
References compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
|
inline |
Definition at line 477 of file pi_cuda.hpp.
References get_next_compute_stream().
|
inline |
Definition at line 453 of file pi_hip.hpp.
References get_next_compute_stream().
|
inline |
Definition at line 618 of file pi_cuda.hpp.
References context_.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemcpy2D(), cuda_piextUSMEnqueueMemset(), cuda_piextUSMEnqueuePrefetch(), cuda_piQueueFinish(), cuda_piQueueRelease(), hip_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueKernelLaunch(), hip_piEnqueueMemBufferCopy(), hip_piEnqueueMemBufferCopyRect(), hip_piEnqueueMemBufferFill(), hip_piEnqueueMemBufferMap(), hip_piEnqueueMemBufferRead(), hip_piEnqueueMemBufferReadRect(), hip_piEnqueueMemBufferWrite(), hip_piEnqueueMemBufferWriteRect(), hip_piEnqueueMemImageCopy(), hip_piEnqueueMemImageRead(), hip_piEnqueueMemImageWrite(), hip_piEnqueueMemUnmap(), hip_piextQueueGetNativeHandle(), hip_piextUSMEnqueueMemcpy(), hip_piextUSMEnqueueMemcpy2D(), hip_piextUSMEnqueueMemset(), hip_piextUSMEnqueuePrefetch(), hip_piQueueFinish(), hip_piQueueRelease(), and _pi_event::make_native().
|
inline |
Definition at line 594 of file pi_hip.hpp.
References context_.
|
inline |
Definition at line 620 of file pi_cuda.hpp.
References device_.
Referenced by cuda_piEnqueueKernelLaunch(), and hip_piEnqueueKernelLaunch().
|
inline |
Definition at line 596 of file pi_hip.hpp.
References device_.
hipStream_t _pi_queue::get_next_compute_stream | ( | pi_uint32 * | stream_token = nullptr | ) |
Definition at line 414 of file pi_cuda.cpp.
References compute_stream_idx_, compute_stream_mutex_, compute_stream_wait_for_barrier_if_needed(), compute_streams_, delay_compute_, flags_, and num_compute_streams_.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMEnqueueMemset(), get(), get_next_transfer_stream(), hip_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueKernelLaunch(), hip_piextQueueGetNativeHandle(), and hip_piextUSMEnqueueMemset().
native_type _pi_queue::get_next_compute_stream | ( | pi_uint32 * | stream_token = nullptr | ) |
hipStream_t _pi_queue::get_next_compute_stream | ( | pi_uint32 | num_events_in_wait_list, |
const pi_event * | event_wait_list, | ||
_pi_stream_guard & | guard, | ||
pi_uint32 * | stream_token = nullptr |
||
) |
Definition at line 447 of file pi_cuda.cpp.
References can_reuse_stream(), compute_stream_sync_mutex_, compute_stream_wait_for_barrier_if_needed(), delay_compute_, _pi_event::get_compute_stream_token(), get_next_compute_stream(), and _pi_event::get_stream().
Referenced by get_next_compute_stream().
native_type _pi_queue::get_next_compute_stream | ( | pi_uint32 | num_events_in_wait_list, |
const pi_event * | event_wait_list, | ||
_pi_stream_guard & | guard, | ||
pi_uint32 * | stream_token = nullptr |
||
) |
|
inlinenoexcept |
Definition at line 628 of file pi_cuda.hpp.
References eventCount_.
Referenced by _pi_event::record().
|
inlinenoexcept |
Definition at line 604 of file pi_hip.hpp.
References eventCount_.
hipStream_t _pi_queue::get_next_transfer_stream | ( | ) |
Definition at line 475 of file pi_cuda.cpp.
References flags_, get_next_compute_stream(), num_transfer_streams_, transfer_stream_idx_, transfer_stream_mutex_, transfer_stream_wait_for_barrier_if_needed(), and transfer_streams_.
Referenced by cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemcpy2D(), cuda_piextUSMEnqueuePrefetch(), hip_piEnqueueMemBufferCopy(), hip_piEnqueueMemBufferCopyRect(), hip_piEnqueueMemBufferFill(), hip_piEnqueueMemBufferMap(), hip_piEnqueueMemBufferRead(), hip_piEnqueueMemBufferReadRect(), hip_piEnqueueMemBufferWrite(), hip_piEnqueueMemBufferWriteRect(), hip_piEnqueueMemImageCopy(), hip_piEnqueueMemImageRead(), hip_piEnqueueMemImageWrite(), hip_piEnqueueMemUnmap(), hip_piextUSMEnqueueMemcpy(), hip_piextUSMEnqueueMemcpy2D(), and hip_piextUSMEnqueuePrefetch().
native_type _pi_queue::get_next_transfer_stream | ( | ) |
|
inlinenoexcept |
Definition at line 626 of file pi_cuda.hpp.
References refCount_.
Referenced by cuda_piQueueGetInfo(), cuda_piQueueRetain(), hip_piQueueGetInfo(), and hip_piQueueRetain().
|
inlinenoexcept |
Definition at line 602 of file pi_hip.hpp.
References refCount_.
pi_event _pi_queue::getEventFromQueueCache | ( | bool | HostVisible | ) |
Definition at line 513 of file pi_level_zero.cpp.
References EventCaches.
Referenced by createEventAndAssociateQueue().
_pi_queue::pi_queue_group_t & _pi_queue::getQueueGroup | ( | bool | UseCopyEngine | ) |
Definition at line 1475 of file pi_level_zero.cpp.
References CommandListMap, ComputeQueueGroupsByTID, CopyQueueGroupsByTID, Device, _pi_queue::pi_queue_group_t::ImmCmdLists, PerThreadPerQueue, and _pi_device::useImmediateCommandLists().
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList(), createCommandList(), _pi_context::getAvailableCommandList(), and piextQueueCreate().
|
inline |
Definition at line 479 of file pi_cuda.hpp.
References last_sync_compute_streams_, and max().
Referenced by can_reuse_stream().
|
inline |
Definition at line 455 of file pi_hip.hpp.
References last_sync_compute_streams_, and max().
|
inline |
Definition at line 757 of file pi_level_zero.hpp.
References _pi_queue::command_batch::OpenCommandList.
Referenced by eventOpenCommandList(), executeCommandList(), executeOpenCommandList(), _pi_context::getAvailableCommandList(), and piQueueGetInfo().
|
inlinenoexcept |
Definition at line 622 of file pi_cuda.hpp.
References refCount_.
Referenced by cuda_piQueueRetain(), and hip_piQueueRetain().
|
inlinenoexcept |
Definition at line 598 of file pi_hip.hpp.
References refCount_.
pi_result _pi_queue::insertActiveBarriers | ( | pi_command_list_ptr_t & | CmdList, |
bool | UseCopyEngine | ||
) |
Definition at line 2006 of file pi_level_zero.cpp.
References ActiveBarriers, _pi_queue::active_barriers::add(), _pi_queue::active_barriers::clear(), _pi_ze_event_list_t::createAndRetainPiZeEventList(), createEventAndAssociateQueue(), _pi_queue::active_barriers::empty(), _pi_ze_event_list_t::Length, _pi_event::OwnZeEvent, PI_COMMAND_TYPE_USER, _pi_ze_event_list_t::PiEventList, _pi_queue::active_barriers::vector(), _pi_event::WaitList, ZE_CALL, and _pi_ze_event_list_t::ZeEventList.
Referenced by createCommandList(), and _pi_context::getAvailableCommandList().
pi_result _pi_queue::insertStartBarrierIfDiscardEventsMode | ( | pi_command_list_ptr_t & | CmdList | ) |
Definition at line 1993 of file pi_level_zero.cpp.
References doReuseDiscardedEvents(), LastCommandEvent, LastUsedCommandList, ZE_CALL, and _pi_event::ZeEvent.
Referenced by createCommandList(), and _pi_context::getAvailableCommandList().
bool _pi_queue::isBatchingAllowed | ( | bool | IsCopy | ) | const |
Definition at line 1813 of file pi_level_zero.cpp.
References ComputeCommandBatch, CopyCommandBatch, ZeSerialize, and ZeSerializeBlock.
Referenced by executeCommandList().
bool _pi_queue::isDiscardEvents | ( | ) | const |
Definition at line 897 of file pi_level_zero.cpp.
References PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS, and Properties.
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList(), doReuseDiscardedEvents(), and resetCommandList().
bool _pi_queue::isInOrderQueue | ( | ) | const |
Definition at line 891 of file pi_level_zero.cpp.
References PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE, and Properties.
Referenced by CleanupEventsInImmCmdLists(), _pi_ze_event_list_t::createAndRetainPiZeEventList(), doReuseDiscardedEvents(), piEnqueueEventsWaitWithBarrier(), piEnqueueMemBufferMap(), piEnqueueMemUnmap(), piQueueGetInfo(), resetCommandList(), and useCopyEngine().
bool _pi_queue::isPriorityHigh | ( | ) | const |
Definition at line 905 of file pi_level_zero.cpp.
References PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH, and Properties.
bool _pi_queue::isPriorityLow | ( | ) | const |
Definition at line 901 of file pi_level_zero.cpp.
References PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW, and Properties.
pi_result _pi_queue::resetCommandList | ( | pi_command_list_ptr_t | CommandList, |
bool | MakeAvailable, | ||
std::vector< pi_event > & | EventListToCleanup, | ||
bool | CheckStatus = true |
||
) |
Resets the command list and associated fence in the map and removes events from the command list.
CommandList | The caller must verify that this command list and fence have been signalled. |
MakeAvailable | If the reset command list should be made available, then MakeAvailable needs to be set to true. |
EventListToCleanup | The EventListToCleanup contains a list of events from the command list which need to be cleaned up. |
CheckStatus | Hint informing whether we need to check status of the events before removing them from the immediate command list. This is needed because immediate command lists are not associated with fences and in general status of the event needs to be checked. |
Definition at line 909 of file pi_level_zero.cpp.
References Context, Device, isDiscardEvents(), isInOrderQueue(), LastCommandEvent, ZE_CALL, ZE_CALL_NOCHECK, _pi_context::ZeCommandListCacheMutex, _pi_context::ZeComputeCommandListCache, _pi_context::ZeCopyCommandListCache, and _pi_device::ZeDevice.
Referenced by CleanupEventsInImmCmdLists(), _pi_context::getAvailableCommandList(), piextQueueCreate(), piQueueRelease(), and resetCommandLists().
pi_result _pi_queue::resetDiscardedEvent | ( | pi_command_list_ptr_t | CommandList | ) |
Definition at line 401 of file pi_level_zero.cpp.
References addEventToQueueCache(), Context, _pi_event::HostVisibleEvent, _pi_event::IsDiscarded, _pi_event::isHostVisible(), LastCommandEvent, PI_CALL, PI_COMMAND_TYPE_USER, ZE_CALL, _pi_event::ZeEvent, and _pi_event::ZeEventPool.
Referenced by executeCommandList().
pi_result _pi_queue::signalEventFromCmdListIfLastEventDiscarded | ( | pi_command_list_ptr_t | CommandList | ) |
Definition at line 494 of file pi_level_zero.cpp.
References createEventAndAssociateQueue(), doReuseDiscardedEvents(), _pi_event::IsDiscarded, LastCommandEvent, PI_CALL, PI_COMMAND_TYPE_USER, piEventReleaseInternal(), ZE_CALL, and _pi_event::ZeEvent.
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList(), and executeCommandList().
|
inline |
Definition at line 552 of file pi_cuda.hpp.
References compute_stream_idx_, compute_stream_mutex_, compute_stream_sync_mutex_, compute_streams_, delay_compute_, last_sync_compute_streams_, last_sync_transfer_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_idx_, transfer_stream_mutex_, and transfer_streams_.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), cuda_piQueueFinish(), hip_piEnqueueEventsWaitWithBarrier(), and hip_piQueueFinish().
|
inline |
Definition at line 528 of file pi_hip.hpp.
References compute_stream_idx_, compute_stream_mutex_, compute_stream_sync_mutex_, compute_streams_, delay_compute_, last_sync_compute_streams_, last_sync_transfer_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_idx_, transfer_stream_mutex_, and transfer_streams_.
pi_result _pi_queue::synchronize | ( | ) |
Definition at line 6820 of file pi_level_zero.cpp.
References ActiveBarriers, CleanupEventListFromResetCmdList(), _pi_queue::active_barriers::clear(), CommandListMap, _pi_event::Completed, ComputeQueueGroupsByTID, CopyQueueGroupsByTID, createEventAndAssociateQueue(), Device, Healthy, LastCommandEvent, PI_CALL, PI_COMMAND_TYPE_USER, piEventRelease(), _pi_device::useImmediateCommandLists(), ZE_CALL, and _pi_event::ZeEvent.
Referenced by executeCommandList(), piEnqueueEventsWait(), piQueueFinish(), and piQueueRelease().
Definition at line 406 of file pi_cuda.cpp.
References barrier_event_, and transfer_applied_barrier_.
Referenced by get_next_transfer_stream().
void _pi_queue::transfer_stream_wait_for_barrier_if_needed | ( | hipStream_t | stream, |
pi_uint32 | stream_i | ||
) |
Definition at line 441 of file pi_hip.cpp.
References barrier_event_, and transfer_applied_barrier_.
bool _pi_queue::useCopyEngine | ( | bool | PreferCopyEngine = true | ) | const |
Definition at line 6805 of file pi_level_zero.cpp.
References CopyQueueGroupsByTID, isInOrderQueue(), and PreferCopyEngine.
Referenced by enqueueMemCopyHelper(), enqueueMemCopyRectHelper(), enqueueMemFillHelper(), enqueueMemImageCommandHelper(), and piextQueueCreate().
active_barriers _pi_queue::ActiveBarriers |
Definition at line 815 of file pi_level_zero.hpp.
Referenced by insertActiveBarriers(), piEnqueueEventsWaitWithBarrier(), and synchronize().
CUevent _pi_queue::barrier_event_ = nullptr |
Definition at line 418 of file pi_cuda.hpp.
Referenced by compute_stream_wait_for_barrier_if_needed(), cuda_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueEventsWaitWithBarrier(), and transfer_stream_wait_for_barrier_if_needed().
hipEvent_t _pi_queue::barrier_event_ = nullptr |
Definition at line 395 of file pi_hip.hpp.
std::mutex _pi_queue::barrier_mutex_ |
Definition at line 435 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), and hip_piEnqueueEventsWaitWithBarrier().
CUevent _pi_queue::barrier_tmp_event_ = nullptr |
Definition at line 419 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), and hip_piEnqueueEventsWaitWithBarrier().
hipEvent_t _pi_queue::barrier_tmp_event_ = nullptr |
Definition at line 396 of file pi_hip.hpp.
cm_support::CmQueue* _pi_queue::CmQueuePtr = nullptr |
Definition at line 115 of file pi_esimd_emulator.hpp.
Referenced by piQueueRelease().
pi_command_list_map_t _pi_queue::CommandListMap |
Definition at line 669 of file pi_level_zero.hpp.
Referenced by _pi_queue(), CleanupEventsInImmCmdLists(), _pi_ze_event_list_t::createAndRetainPiZeEventList(), createCommandList(), createEventAndAssociateQueue(), eventOpenCommandList(), executeCommandList(), executeOpenCommandList(), _pi_context::getAvailableCommandList(), getQueueGroup(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueMemBufferMap(), piEnqueueMemUnmap(), piQueueGetInfo(), piQueueRelease(), resetCommandLists(), and synchronize().
std::vector< bool > _pi_queue::compute_applied_barrier_ |
Definition at line 413 of file pi_cuda.hpp.
Referenced by compute_stream_wait_for_barrier_if_needed(), cuda_piEnqueueEventsWaitWithBarrier(), and hip_piEnqueueEventsWaitWithBarrier().
std::atomic_uint32_t _pi_queue::compute_stream_idx_ |
Definition at line 422 of file pi_cuda.hpp.
Referenced by can_reuse_stream(), get_next_compute_stream(), and sync_streams().
std::mutex _pi_queue::compute_stream_mutex_ |
Definition at line 433 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), get_next_compute_stream(), and sync_streams().
std::mutex _pi_queue::compute_stream_sync_mutex_ |
Definition at line 432 of file pi_cuda.hpp.
Referenced by get_next_compute_stream(), and sync_streams().
std::vector< native_type > _pi_queue::compute_streams_ |
Definition at line 405 of file pi_cuda.hpp.
Referenced by all_of(), can_reuse_stream(), for_each_stream(), get_next_compute_stream(), and sync_streams().
command_batch _pi_queue::ComputeCommandBatch |
Definition at line 694 of file pi_level_zero.hpp.
Referenced by _pi_queue(), adjustBatchSizeForFullBatch(), adjustBatchSizeForPartialBatch(), eventOpenCommandList(), executeCommandList(), executeOpenCommandList(), _pi_context::getAvailableCommandList(), isBatchingAllowed(), and piQueueReleaseInternal().
std::unordered_map<std::thread::id, pi_queue_group_t> _pi_queue::ComputeQueueGroupsByTID |
Definition at line 617 of file pi_level_zero.hpp.
Referenced by _pi_queue(), getQueueGroup(), piEnqueueEventsWaitWithBarrier(), piextQueueCreate(), piextQueueGetNativeHandle(), piQueueFinish(), piQueueGetInfo(), piQueueReleaseInternal(), and synchronize().
pi_context _pi_queue::Context = nullptr |
Definition at line 114 of file pi_esimd_emulator.hpp.
Referenced by USMSharedMemoryAlloc::allocateImpl(), USMSharedReadOnlyMemoryAlloc::allocateImpl(), USMDeviceMemoryAlloc::allocateImpl(), USMHostMemoryAlloc::allocateImpl(), ContextReleaseHelper(), createCommandList(), createEventAndAssociateQueue(), USMMemoryAllocBase::deallocate(), enqueueMemCopyHelper(), enqueueMemCopyRectHelper(), enqueueMemFillHelper(), enqueueMemImageCommandHelper(), EventCreate(), _pi_buffer::free(), _pi_context::getAvailableCommandList(), _pi_buffer::getZeHandle(), IsDevicePointer(), piclProgramCreateWithSource(), piContextGetInfo(), piContextRelease(), piContextRetain(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueKernelLaunch(), piEnqueueMemBufferMap(), piEnqueueMemUnmap(), piEventCreate(), piEventReleaseInternal(), piextContextGetNativeHandle(), piextContextSetExtendedDeleter(), piextEnqueueDeviceGlobalVariableRead(), piextEnqueueDeviceGlobalVariableWrite(), piextEventCreateWithNativeHandle(), piextKernelCreateWithNativeHandle(), piextMemCreateWithNativeHandle(), piextProgramCreateWithNativeHandle(), piextQueueCreate(), piextQueueCreateWithNativeHandle(), piextUSMDeviceAlloc(), piextUSMEnqueueMemAdvise(), piextUSMEnqueueMemcpy(), piextUSMEnqueueMemcpy2D(), piextUSMEnqueuePrefetch(), piextUSMFree(), piextUSMGetMemAllocInfo(), piextUSMHostAlloc(), piextUSMSharedAlloc(), piMemBufferCreate(), piMemImageCreate(), piProgramCreate(), piProgramCreateWithBinary(), piProgramLink(), piQueueCreate(), piQueueGetInfo(), piSamplerCreate(), resetCommandList(), resetDiscardedEvent(), USMDeviceAllocImpl(), USMFreeHelper(), USMFreeImpl(), USMHostAllocImpl(), USMSharedAllocImpl(), ZeDeviceMemAllocHelper(), ZeHostMemAllocHelper(), and ZeMemFreeHelper().
const pi_context _pi_queue::Context |
Definition at line 640 of file pi_level_zero.hpp.
_pi_context * _pi_queue::context_ |
Definition at line 415 of file pi_cuda.hpp.
Referenced by _pi_queue(), cuda_piQueueGetInfo(), get_context(), hip_piQueueGetInfo(), and ~_pi_queue().
command_batch _pi_queue::CopyCommandBatch |
Definition at line 694 of file pi_level_zero.hpp.
Referenced by _pi_queue(), adjustBatchSizeForFullBatch(), adjustBatchSizeForPartialBatch(), eventOpenCommandList(), executeCommandList(), executeOpenCommandList(), _pi_context::getAvailableCommandList(), isBatchingAllowed(), and piQueueReleaseInternal().
std::unordered_map<std::thread::id, pi_queue_group_t> _pi_queue::CopyQueueGroupsByTID |
Definition at line 623 of file pi_level_zero.hpp.
Referenced by _pi_queue(), getQueueGroup(), piEnqueueEventsWaitWithBarrier(), piextQueueCreate(), piQueueFinish(), piQueueGetInfo(), piQueueReleaseInternal(), synchronize(), and useCopyEngine().
|
staticconstexpr |
Definition at line 402 of file pi_cuda.hpp.
Referenced by cuda_piQueueCreate(), and hip_piQueueCreate().
|
staticconstexpr |
Definition at line 403 of file pi_cuda.hpp.
Referenced by cuda_piQueueCreate(), and hip_piQueueCreate().
std::vector< bool > _pi_queue::delay_compute_ |
Definition at line 411 of file pi_cuda.hpp.
Referenced by get_next_compute_stream(), and sync_streams().
const pi_device _pi_queue::Device |
Definition at line 645 of file pi_level_zero.hpp.
Referenced by _pi_queue(), USMSharedMemoryAlloc::allocateImpl(), USMSharedReadOnlyMemoryAlloc::allocateImpl(), USMDeviceMemoryAlloc::allocateImpl(), CaptureIndirectAccesses(), CleanupEventsInImmCmdLists(), _pi_ze_event_list_t::createAndRetainPiZeEventList(), createCommandList(), enqueueMemFillHelper(), enqueueMemImageCommandHelper(), eventOpenCommandList(), executeCommandList(), _pi_context::getAvailableCommandList(), getQueueGroup(), _pi_queue::pi_queue_group_t::getQueueIndex(), _pi_buffer::getZeHandle(), _pi_buffer::getZeHandlePtr(), pi_command_list_info_t::isCopy(), piDeviceGetInfo(), piDevicePartition(), piDeviceRelease(), piDeviceRetain(), piEnqueueEventsWait(), piEnqueueEventsWaitWithBarrier(), piEnqueueKernelLaunch(), piEnqueueMemBufferCopy(), piEnqueueMemBufferCopyRect(), piEnqueueMemBufferFill(), piEnqueueMemBufferMap(), piEnqueueMemBufferRead(), piEnqueueMemBufferReadRect(), piEnqueueMemBufferWrite(), piEnqueueMemBufferWriteRect(), piEnqueueMemUnmap(), piEventGetProfilingInfo(), piextDeviceCreateWithNativeHandle(), piextDeviceGetNativeHandle(), piextDeviceSelectBinary(), piextGetDeviceFunctionPointer(), piextMemCreateWithNativeHandle(), piextQueueCreate(), piextQueueCreateWithNativeHandle(), piextUSMDeviceAlloc(), piextUSMEnqueueMemAdvise(), piextUSMGetMemAllocInfo(), piextUSMSharedAlloc(), piGetDeviceAndHostTimer(), piKernelGetGroupInfo(), piKernelGetSubGroupInfo(), piMemImageCreate(), piProgramGetBuildInfo(), piQueueCreate(), piQueueFinish(), piQueueGetInfo(), piSamplerCreate(), resetCommandList(), resetCommandLists(), synchronize(), USMDeviceAllocImpl(), USMFreeHelper(), USMSharedAllocImpl(), and ZeDeviceMemAllocHelper().
_pi_device * _pi_queue::device_ |
Definition at line 416 of file pi_cuda.hpp.
Referenced by _pi_queue(), cuda_piEnqueueKernelLaunch(), cuda_piQueueGetInfo(), get_device(), hip_piEnqueueKernelLaunch(), hip_piQueueGetInfo(), and ~_pi_queue().
std::vector<std::list<pi_event> > _pi_queue::EventCaches {2} |
Definition at line 883 of file pi_level_zero.hpp.
Referenced by addEventToQueueCache(), getEventFromQueueCache(), and piQueueReleaseInternal().
std::atomic_uint32_t _pi_queue::eventCount_ |
Definition at line 421 of file pi_cuda.hpp.
Referenced by get_next_event_id().
unsigned int _pi_queue::flags_ |
Definition at line 428 of file pi_cuda.hpp.
Referenced by get_next_compute_stream(), and get_next_transfer_stream().
bool _pi_queue::has_ownership_ |
Definition at line 436 of file pi_cuda.hpp.
Referenced by backend_has_ownership().
bool _pi_queue::Healthy {true} |
Definition at line 830 of file pi_level_zero.hpp.
Referenced by executeCommandList(), piQueueRelease(), and synchronize().
std::vector<pi_kernel> _pi_queue::KernelsToBeSubmitted |
Definition at line 659 of file pi_level_zero.hpp.
Referenced by CaptureIndirectAccesses(), and piEnqueueKernelLaunch().
unsigned int _pi_queue::last_sync_compute_streams_ |
Definition at line 426 of file pi_cuda.hpp.
Referenced by has_been_synchronized(), and sync_streams().
unsigned int _pi_queue::last_sync_transfer_streams_ |
Definition at line 427 of file pi_cuda.hpp.
Referenced by sync_streams().
pi_event _pi_queue::LastCommandEvent = nullptr |
Definition at line 651 of file pi_level_zero.hpp.
Referenced by CleanupCompletedEvent(), CleanupEventsInImmCmdLists(), _pi_ze_event_list_t::createAndRetainPiZeEventList(), executeCommandList(), insertStartBarrierIfDiscardEventsMode(), piEnqueueEventsWait(), piQueueFinish(), piQueueGetInfo(), resetCommandList(), resetDiscardedEvent(), signalEventFromCmdListIfLastEventDiscarded(), and synchronize().
pi_command_list_ptr_t _pi_queue::LastUsedCommandList = CommandListMap.end() |
Definition at line 876 of file pi_level_zero.hpp.
Referenced by _pi_ze_event_list_t::createAndRetainPiZeEventList(), executeCommandList(), and insertStartBarrierIfDiscardEventsMode().
unsigned int _pi_queue::num_compute_streams_ |
Definition at line 424 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), get_next_compute_stream(), and sync_streams().
unsigned int _pi_queue::num_transfer_streams_ |
Definition at line 425 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), get_next_transfer_stream(), and sync_streams().
bool _pi_queue::OwnZeCommandQueue |
Definition at line 666 of file pi_level_zero.hpp.
Referenced by piQueueReleaseInternal().
pi_queue_properties _pi_queue::Properties |
Definition at line 703 of file pi_level_zero.hpp.
Referenced by EventCreate(), _pi_kernel::initialize(), isDiscardEvents(), isInOrderQueue(), isPriorityHigh(), isPriorityLow(), piContextCreate(), piDevicePartition(), piEventGetProfilingInfo(), piextQueueCreate(), piextUSMDeviceAlloc(), piextUSMHostAlloc(), piextUSMSharedAlloc(), piQueueCreate(), USMDeviceAllocImpl(), and USMHostAllocImpl().
pi_queue_properties _pi_queue::properties_ |
Definition at line 417 of file pi_cuda.hpp.
Referenced by cuda_piEventGetProfilingInfo(), cuda_piQueueGetInfo(), hip_piEventGetProfilingInfo(), hip_piQueueGetInfo(), _pi_event::release(), and _pi_event::start().
std::atomic_uint32_t _pi_queue::refCount_ |
Definition at line 420 of file pi_cuda.hpp.
Referenced by decrement_reference_count(), get_reference_count(), and increment_reference_count().
pi_uint32 _pi_queue::RefCountExternal {1} |
Definition at line 827 of file pi_level_zero.hpp.
Referenced by piQueueRelease(), piQueueRetain(), and _pi_event::reset().
std::vector< bool > _pi_queue::transfer_applied_barrier_ |
Definition at line 414 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueEventsWaitWithBarrier(), and transfer_stream_wait_for_barrier_if_needed().
std::atomic_uint32_t _pi_queue::transfer_stream_idx_ |
Definition at line 423 of file pi_cuda.hpp.
Referenced by get_next_transfer_stream(), and sync_streams().
std::mutex _pi_queue::transfer_stream_mutex_ |
Definition at line 434 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), get_next_transfer_stream(), and sync_streams().
std::vector< native_type > _pi_queue::transfer_streams_ |
Definition at line 406 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), get_next_transfer_stream(), and sync_streams().