PI queue mapping on to CUstream objects. More...
#include <cuda/pi_cuda.hpp>
Public Types | |
using | native_type = CUstream |
using | native_type = hipStream_t |
Public Member Functions | |
_pi_queue (std::vector< CUstream > &&compute_streams, std::vector< CUstream > &&transfer_streams, _pi_context *context, _pi_device *device, pi_queue_properties properties, unsigned int flags, bool backend_owns=true) | |
~_pi_queue () | |
void | compute_stream_wait_for_barrier_if_needed (CUstream stream, pi_uint32 stream_i) |
void | transfer_stream_wait_for_barrier_if_needed (CUstream stream, pi_uint32 stream_i) |
native_type | get_next_compute_stream (pi_uint32 *stream_token=nullptr) |
native_type | get_next_compute_stream (pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, _pi_stream_guard &guard, pi_uint32 *stream_token=nullptr) |
native_type | get_next_transfer_stream () |
native_type | get () |
bool | has_been_synchronized (pi_uint32 stream_token) |
bool | can_reuse_stream (pi_uint32 stream_token) |
template<typename T > | |
bool | all_of (T &&f) |
template<typename T > | |
void | for_each_stream (T &&f) |
template<bool ResetUsed = false, typename T > | |
void | sync_streams (T &&f) |
_pi_context * | get_context () const |
_pi_device * | get_device () const |
pi_uint32 | increment_reference_count () noexcept |
pi_uint32 | decrement_reference_count () noexcept |
pi_uint32 | get_reference_count () const noexcept |
pi_uint32 | get_next_event_id () noexcept |
bool | backend_has_ownership () const noexcept |
_pi_queue (pi_context ContextArg, cm_support::CmQueue *CmQueueArg) | |
_pi_queue (std::vector< native_type > &&compute_streams, std::vector< native_type > &&transfer_streams, _pi_context *context, _pi_device *device, pi_queue_properties properties, unsigned int flags) | |
~_pi_queue () | |
void | compute_stream_wait_for_barrier_if_needed (hipStream_t stream, pi_uint32 stream_i) |
void | transfer_stream_wait_for_barrier_if_needed (hipStream_t stream, pi_uint32 stream_i) |
native_type | get_next_compute_stream (pi_uint32 *stream_token=nullptr) |
native_type | get_next_compute_stream (pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, _pi_stream_guard &guard, pi_uint32 *stream_token=nullptr) |
native_type | get_next_transfer_stream () |
native_type | get () |
bool | has_been_synchronized (pi_uint32 stream_token) |
bool | can_reuse_stream (pi_uint32 stream_token) |
template<typename T > | |
bool | all_of (T &&f) |
template<typename T > | |
void | for_each_stream (T &&f) |
template<bool ResetUsed = false, typename T > | |
void | sync_streams (T &&f) |
_pi_context * | get_context () const |
_pi_device * | get_device () const |
pi_uint32 | increment_reference_count () noexcept |
pi_uint32 | decrement_reference_count () noexcept |
pi_uint32 | get_reference_count () const noexcept |
pi_uint32 | get_next_event_id () noexcept |
Public Attributes | |
std::vector< native_type > | compute_streams_ |
std::vector< native_type > | transfer_streams_ |
std::vector< bool > | delay_compute_ |
std::vector< bool > | compute_applied_barrier_ |
std::vector< bool > | transfer_applied_barrier_ |
_pi_context * | context_ |
_pi_device * | device_ |
pi_queue_properties | properties_ |
CUevent | barrier_event_ = nullptr |
CUevent | barrier_tmp_event_ = nullptr |
std::atomic_uint32_t | refCount_ |
std::atomic_uint32_t | eventCount_ |
std::atomic_uint32_t | compute_stream_idx_ |
std::atomic_uint32_t | transfer_stream_idx_ |
unsigned int | num_compute_streams_ |
unsigned int | num_transfer_streams_ |
unsigned int | last_sync_compute_streams_ |
unsigned int | last_sync_transfer_streams_ |
unsigned int | flags_ |
std::mutex | compute_stream_sync_mutex_ |
std::mutex | compute_stream_mutex_ |
std::mutex | transfer_stream_mutex_ |
std::mutex | barrier_mutex_ |
bool | has_ownership_ |
pi_context | Context = nullptr |
cm_support::CmQueue * | CmQueuePtr = nullptr |
hipEvent_t | barrier_event_ = nullptr |
hipEvent_t | barrier_tmp_event_ = nullptr |
Static Public Attributes | |
static constexpr int | default_num_compute_streams = 128 |
static constexpr int | default_num_transfer_streams = 64 |
PI queue mapping on to CUstream objects.
PI queue mapping on to hipStream_t objects.
Definition at line 395 of file pi_cuda.hpp.
using _pi_queue::native_type = hipStream_t |
Definition at line 378 of file pi_hip.hpp.
using _pi_queue::native_type = CUstream |
Definition at line 396 of file pi_cuda.hpp.
|
inline |
Definition at line 433 of file pi_cuda.hpp.
|
inline |
Definition at line 451 of file pi_cuda.hpp.
References context_, cuda_piContextRelease(), cuda_piDeviceRelease(), and device_.
|
inline |
Definition at line 110 of file pi_esimd_emulator.hpp.
|
inline |
Definition at line 414 of file pi_hip.hpp.
|
inline |
Definition at line 432 of file pi_hip.hpp.
References context_, device_, hip_piContextRelease(), and hip_piDeviceRelease().
|
inline |
Definition at line 485 of file pi_hip.hpp.
References all_of(), compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
|
inline |
Definition at line 504 of file pi_cuda.hpp.
References all_of(), compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
Referenced by cuda_piQueueGetInfo(), and hip_piQueueGetInfo().
|
inlinenoexcept |
Definition at line 625 of file pi_cuda.hpp.
References has_ownership_.
Referenced by cuda_piQueueRelease().
|
inline |
Definition at line 463 of file pi_hip.hpp.
References compute_stream_idx_, compute_streams_, has_been_synchronized(), and max().
|
inline |
Definition at line 482 of file pi_cuda.hpp.
References compute_stream_idx_, compute_streams_, has_been_synchronized(), and max().
Definition at line 502 of file pi_cuda.cpp.
void _pi_queue::compute_stream_wait_for_barrier_if_needed | ( | hipStream_t | stream, |
pi_uint32 | stream_i | ||
) |
Definition at line 452 of file pi_hip.cpp.
References barrier_event_, and compute_applied_barrier_.
|
inlinenoexcept |
Definition at line 600 of file pi_hip.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 619 of file pi_cuda.hpp.
References refCount_.
Referenced by cuda_piQueueRelease(), and hip_piQueueRelease().
|
inline |
Definition at line 507 of file pi_hip.hpp.
References compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
|
inline |
Definition at line 526 of file pi_cuda.hpp.
References compute_stream_mutex_, compute_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_mutex_, and transfer_streams_.
Referenced by cuda_piQueueRelease(), enqueueEventWait(), and hip_piQueueRelease().
|
inline |
Definition at line 453 of file pi_hip.hpp.
References get_next_compute_stream().
|
inline |
Definition at line 472 of file pi_cuda.hpp.
References get_next_compute_stream().
|
inline |
Definition at line 594 of file pi_hip.hpp.
References context_.
|
inline |
Definition at line 613 of file pi_cuda.hpp.
References context_.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemcpy2D(), cuda_piextUSMEnqueueMemset(), cuda_piextUSMEnqueuePrefetch(), cuda_piQueueFinish(), cuda_piQueueRelease(), hip_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueKernelLaunch(), hip_piEnqueueMemBufferCopy(), hip_piEnqueueMemBufferCopyRect(), hip_piEnqueueMemBufferFill(), hip_piEnqueueMemBufferMap(), hip_piEnqueueMemBufferRead(), hip_piEnqueueMemBufferReadRect(), hip_piEnqueueMemBufferWrite(), hip_piEnqueueMemBufferWriteRect(), hip_piEnqueueMemImageCopy(), hip_piEnqueueMemImageRead(), hip_piEnqueueMemImageWrite(), hip_piEnqueueMemUnmap(), hip_piextQueueGetNativeHandle(), hip_piextUSMEnqueueMemcpy(), hip_piextUSMEnqueueMemcpy2D(), hip_piextUSMEnqueueMemset(), hip_piextUSMEnqueuePrefetch(), hip_piQueueFinish(), hip_piQueueRelease(), and _pi_event::make_native().
|
inline |
Definition at line 596 of file pi_hip.hpp.
References device_.
|
inline |
Definition at line 615 of file pi_cuda.hpp.
References device_.
Referenced by cuda_piEnqueueKernelLaunch(), _pi_event::get_end_time(), _pi_event::get_queued_time(), _pi_event::get_start_time(), and hip_piEnqueueKernelLaunch().
native_type _pi_queue::get_next_compute_stream | ( | pi_uint32 * | stream_token = nullptr | ) |
hipStream_t _pi_queue::get_next_compute_stream | ( | pi_uint32 * | stream_token = nullptr | ) |
Definition at line 518 of file pi_cuda.cpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), cuda_piEnqueueKernelLaunch(), cuda_piextQueueGetNativeHandle(), cuda_piextUSMEnqueueMemset(), get(), hip_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueKernelLaunch(), hip_piextQueueGetNativeHandle(), and hip_piextUSMEnqueueMemset().
native_type _pi_queue::get_next_compute_stream | ( | pi_uint32 | num_events_in_wait_list, |
const pi_event * | event_wait_list, | ||
_pi_stream_guard & | guard, | ||
pi_uint32 * | stream_token = nullptr |
||
) |
hipStream_t _pi_queue::get_next_compute_stream | ( | pi_uint32 | num_events_in_wait_list, |
const pi_event * | event_wait_list, | ||
_pi_stream_guard & | guard, | ||
pi_uint32 * | stream_token = nullptr |
||
) |
Definition at line 551 of file pi_cuda.cpp.
References _pi_event::get_compute_stream_token(), and _pi_event::get_stream().
|
inlinenoexcept |
Definition at line 604 of file pi_hip.hpp.
References eventCount_.
|
inlinenoexcept |
Definition at line 623 of file pi_cuda.hpp.
References eventCount_.
Referenced by _pi_event::record().
native_type _pi_queue::get_next_transfer_stream | ( | ) |
hipStream_t _pi_queue::get_next_transfer_stream | ( | ) |
Definition at line 579 of file pi_cuda.cpp.
Referenced by cuda_piEnqueueMemBufferCopy(), cuda_piEnqueueMemBufferCopyRect(), cuda_piEnqueueMemBufferFill(), cuda_piEnqueueMemBufferMap(), cuda_piEnqueueMemBufferRead(), cuda_piEnqueueMemBufferReadRect(), cuda_piEnqueueMemBufferWrite(), cuda_piEnqueueMemBufferWriteRect(), cuda_piEnqueueMemImageCopy(), cuda_piEnqueueMemImageRead(), cuda_piEnqueueMemImageWrite(), cuda_piEnqueueMemUnmap(), cuda_piextUSMEnqueueMemAdvise(), cuda_piextUSMEnqueueMemcpy(), cuda_piextUSMEnqueueMemcpy2D(), cuda_piextUSMEnqueuePrefetch(), hip_piEnqueueMemBufferCopy(), hip_piEnqueueMemBufferCopyRect(), hip_piEnqueueMemBufferFill(), hip_piEnqueueMemBufferMap(), hip_piEnqueueMemBufferRead(), hip_piEnqueueMemBufferReadRect(), hip_piEnqueueMemBufferWrite(), hip_piEnqueueMemBufferWriteRect(), hip_piEnqueueMemImageCopy(), hip_piEnqueueMemImageRead(), hip_piEnqueueMemImageWrite(), hip_piEnqueueMemUnmap(), hip_piextUSMEnqueueMemcpy(), hip_piextUSMEnqueueMemcpy2D(), and hip_piextUSMEnqueuePrefetch().
|
inlinenoexcept |
Definition at line 602 of file pi_hip.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 621 of file pi_cuda.hpp.
References refCount_.
Referenced by cuda_piQueueGetInfo(), cuda_piQueueRetain(), hip_piQueueGetInfo(), and hip_piQueueRetain().
|
inline |
Definition at line 455 of file pi_hip.hpp.
References last_sync_compute_streams_, and max().
|
inline |
Definition at line 474 of file pi_cuda.hpp.
References last_sync_compute_streams_, and max().
Referenced by can_reuse_stream().
|
inlinenoexcept |
Definition at line 598 of file pi_hip.hpp.
References refCount_.
|
inlinenoexcept |
Definition at line 617 of file pi_cuda.hpp.
References refCount_.
Referenced by cuda_piQueueRetain(), and hip_piQueueRetain().
|
inline |
Definition at line 528 of file pi_hip.hpp.
References compute_stream_idx_, compute_stream_mutex_, compute_stream_sync_mutex_, compute_streams_, delay_compute_, last_sync_compute_streams_, last_sync_transfer_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_idx_, transfer_stream_mutex_, and transfer_streams_.
|
inline |
Definition at line 547 of file pi_cuda.hpp.
References compute_stream_idx_, compute_stream_mutex_, compute_stream_sync_mutex_, compute_streams_, delay_compute_, last_sync_compute_streams_, last_sync_transfer_streams_, num_compute_streams_, num_transfer_streams_, transfer_stream_idx_, transfer_stream_mutex_, and transfer_streams_.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), cuda_piQueueFinish(), hip_piEnqueueEventsWaitWithBarrier(), and hip_piQueueFinish().
Definition at line 510 of file pi_cuda.cpp.
void _pi_queue::transfer_stream_wait_for_barrier_if_needed | ( | hipStream_t | stream, |
pi_uint32 | stream_i | ||
) |
Definition at line 460 of file pi_hip.cpp.
References barrier_event_, and transfer_applied_barrier_.
hipEvent_t _pi_queue::barrier_event_ = nullptr |
Definition at line 395 of file pi_hip.hpp.
CUevent _pi_queue::barrier_event_ = nullptr |
Definition at line 413 of file pi_cuda.hpp.
Referenced by compute_stream_wait_for_barrier_if_needed(), cuda_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueEventsWaitWithBarrier(), and transfer_stream_wait_for_barrier_if_needed().
std::mutex _pi_queue::barrier_mutex_ |
Definition at line 430 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), and hip_piEnqueueEventsWaitWithBarrier().
hipEvent_t _pi_queue::barrier_tmp_event_ = nullptr |
Definition at line 396 of file pi_hip.hpp.
CUevent _pi_queue::barrier_tmp_event_ = nullptr |
Definition at line 414 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), and hip_piEnqueueEventsWaitWithBarrier().
cm_support::CmQueue* _pi_queue::CmQueuePtr = nullptr |
Definition at line 115 of file pi_esimd_emulator.hpp.
Referenced by piQueueRelease().
std::vector< bool > _pi_queue::compute_applied_barrier_ |
Definition at line 408 of file pi_cuda.hpp.
Referenced by compute_stream_wait_for_barrier_if_needed(), cuda_piEnqueueEventsWaitWithBarrier(), and hip_piEnqueueEventsWaitWithBarrier().
std::atomic_uint32_t _pi_queue::compute_stream_idx_ |
Definition at line 417 of file pi_cuda.hpp.
Referenced by can_reuse_stream(), and sync_streams().
std::mutex _pi_queue::compute_stream_mutex_ |
Definition at line 428 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), and sync_streams().
std::mutex _pi_queue::compute_stream_sync_mutex_ |
Definition at line 427 of file pi_cuda.hpp.
Referenced by sync_streams().
std::vector< native_type > _pi_queue::compute_streams_ |
Definition at line 400 of file pi_cuda.hpp.
Referenced by all_of(), can_reuse_stream(), for_each_stream(), and sync_streams().
pi_context _pi_queue::Context = nullptr |
Definition at line 114 of file pi_esimd_emulator.hpp.
_pi_context * _pi_queue::context_ |
Definition at line 410 of file pi_cuda.hpp.
Referenced by cuda_piQueueGetInfo(), get_context(), hip_piQueueGetInfo(), and ~_pi_queue().
|
staticconstexpr |
Definition at line 397 of file pi_cuda.hpp.
Referenced by cuda_piQueueCreate(), and hip_piQueueCreate().
|
staticconstexpr |
Definition at line 398 of file pi_cuda.hpp.
Referenced by cuda_piQueueCreate(), and hip_piQueueCreate().
std::vector< bool > _pi_queue::delay_compute_ |
Definition at line 406 of file pi_cuda.hpp.
Referenced by sync_streams().
_pi_device * _pi_queue::device_ |
Definition at line 411 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueKernelLaunch(), cuda_piQueueGetInfo(), get_device(), hip_piEnqueueKernelLaunch(), hip_piQueueGetInfo(), and ~_pi_queue().
std::atomic_uint32_t _pi_queue::eventCount_ |
Definition at line 416 of file pi_cuda.hpp.
Referenced by get_next_event_id().
unsigned int _pi_queue::flags_ |
Definition at line 423 of file pi_cuda.hpp.
bool _pi_queue::has_ownership_ |
Definition at line 431 of file pi_cuda.hpp.
Referenced by backend_has_ownership().
unsigned int _pi_queue::last_sync_compute_streams_ |
Definition at line 421 of file pi_cuda.hpp.
Referenced by has_been_synchronized(), and sync_streams().
unsigned int _pi_queue::last_sync_transfer_streams_ |
Definition at line 422 of file pi_cuda.hpp.
Referenced by sync_streams().
unsigned int _pi_queue::num_compute_streams_ |
Definition at line 419 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), and sync_streams().
unsigned int _pi_queue::num_transfer_streams_ |
Definition at line 420 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), and sync_streams().
pi_queue_properties _pi_queue::properties_ |
Definition at line 412 of file pi_cuda.hpp.
Referenced by cuda_piEventGetProfilingInfo(), cuda_piQueueGetInfo(), hip_piEventGetProfilingInfo(), hip_piQueueGetInfo(), _pi_event::release(), and _pi_event::start().
std::atomic_uint32_t _pi_queue::refCount_ |
Definition at line 415 of file pi_cuda.hpp.
Referenced by decrement_reference_count(), get_reference_count(), and increment_reference_count().
std::vector< bool > _pi_queue::transfer_applied_barrier_ |
Definition at line 409 of file pi_cuda.hpp.
Referenced by cuda_piEnqueueEventsWaitWithBarrier(), hip_piEnqueueEventsWaitWithBarrier(), and transfer_stream_wait_for_barrier_if_needed().
std::atomic_uint32_t _pi_queue::transfer_stream_idx_ |
Definition at line 418 of file pi_cuda.hpp.
Referenced by sync_streams().
std::mutex _pi_queue::transfer_stream_mutex_ |
Definition at line 429 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), and sync_streams().
std::vector< native_type > _pi_queue::transfer_streams_ |
Definition at line 401 of file pi_cuda.hpp.
Referenced by all_of(), for_each_stream(), and sync_streams().