DPC++ Runtime
Runtime libraries for oneAPI DPC++
_pi_queue Struct Reference

PI queue mapping on to CUstream objects. More...

#include <cuda/pi_cuda.hpp>

Collaboration diagram for _pi_queue:

Public Types

using native_type = CUstream
 
using native_type = hipStream_t
 

Public Member Functions

 _pi_queue (std::vector< CUstream > &&compute_streams, std::vector< CUstream > &&transfer_streams, _pi_context *context, _pi_device *device, pi_queue_properties properties, unsigned int flags, bool backend_owns=true)
 
 ~_pi_queue ()
 
void compute_stream_wait_for_barrier_if_needed (CUstream stream, pi_uint32 stream_i)
 
void transfer_stream_wait_for_barrier_if_needed (CUstream stream, pi_uint32 stream_i)
 
native_type get_next_compute_stream (pi_uint32 *stream_token=nullptr)
 
native_type get_next_compute_stream (pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, _pi_stream_guard &guard, pi_uint32 *stream_token=nullptr)
 
native_type get_next_transfer_stream ()
 
native_type get ()
 
bool has_been_synchronized (pi_uint32 stream_token)
 
bool can_reuse_stream (pi_uint32 stream_token)
 
template<typename T >
bool all_of (T &&f)
 
template<typename T >
void for_each_stream (T &&f)
 
template<bool ResetUsed = false, typename T >
void sync_streams (T &&f)
 
_pi_contextget_context () const
 
_pi_deviceget_device () const
 
pi_uint32 increment_reference_count () noexcept
 
pi_uint32 decrement_reference_count () noexcept
 
pi_uint32 get_reference_count () const noexcept
 
pi_uint32 get_next_event_id () noexcept
 
bool backend_has_ownership () const noexcept
 
 _pi_queue (pi_context ContextArg, cm_support::CmQueue *CmQueueArg)
 
 _pi_queue (std::vector< native_type > &&compute_streams, std::vector< native_type > &&transfer_streams, _pi_context *context, _pi_device *device, pi_queue_properties properties, unsigned int flags)
 
 ~_pi_queue ()
 
void compute_stream_wait_for_barrier_if_needed (hipStream_t stream, pi_uint32 stream_i)
 
void transfer_stream_wait_for_barrier_if_needed (hipStream_t stream, pi_uint32 stream_i)
 
native_type get_next_compute_stream (pi_uint32 *stream_token=nullptr)
 
native_type get_next_compute_stream (pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, _pi_stream_guard &guard, pi_uint32 *stream_token=nullptr)
 
native_type get_next_transfer_stream ()
 
native_type get ()
 
bool has_been_synchronized (pi_uint32 stream_token)
 
bool can_reuse_stream (pi_uint32 stream_token)
 
template<typename T >
bool all_of (T &&f)
 
template<typename T >
void for_each_stream (T &&f)
 
template<bool ResetUsed = false, typename T >
void sync_streams (T &&f)
 
_pi_contextget_context () const
 
_pi_deviceget_device () const
 
pi_uint32 increment_reference_count () noexcept
 
pi_uint32 decrement_reference_count () noexcept
 
pi_uint32 get_reference_count () const noexcept
 
pi_uint32 get_next_event_id () noexcept
 

Public Attributes

std::vector< native_typecompute_streams_
 
std::vector< native_typetransfer_streams_
 
std::vector< bool > delay_compute_
 
std::vector< bool > compute_applied_barrier_
 
std::vector< bool > transfer_applied_barrier_
 
_pi_contextcontext_
 
_pi_devicedevice_
 
pi_queue_properties properties_
 
CUevent barrier_event_ = nullptr
 
CUevent barrier_tmp_event_ = nullptr
 
std::atomic_uint32_t refCount_
 
std::atomic_uint32_t eventCount_
 
std::atomic_uint32_t compute_stream_idx_
 
std::atomic_uint32_t transfer_stream_idx_
 
unsigned int num_compute_streams_
 
unsigned int num_transfer_streams_
 
unsigned int last_sync_compute_streams_
 
unsigned int last_sync_transfer_streams_
 
unsigned int flags_
 
std::mutex compute_stream_sync_mutex_
 
std::mutex compute_stream_mutex_
 
std::mutex transfer_stream_mutex_
 
std::mutex barrier_mutex_
 
bool has_ownership_
 
pi_context Context = nullptr
 
cm_support::CmQueue * CmQueuePtr = nullptr
 
hipEvent_t barrier_event_ = nullptr
 
hipEvent_t barrier_tmp_event_ = nullptr
 

Static Public Attributes

static constexpr int default_num_compute_streams = 128
 
static constexpr int default_num_transfer_streams = 64
 

Detailed Description

PI queue mapping on to CUstream objects.

PI queue mapping on to hipStream_t objects.

Definition at line 395 of file pi_cuda.hpp.

Member Typedef Documentation

◆ native_type [1/2]

using _pi_queue::native_type = hipStream_t

Definition at line 378 of file pi_hip.hpp.

◆ native_type [2/2]

Definition at line 396 of file pi_cuda.hpp.

Constructor & Destructor Documentation

◆ _pi_queue() [1/3]

_pi_queue::_pi_queue ( std::vector< CUstream > &&  compute_streams,
std::vector< CUstream > &&  transfer_streams,
_pi_context context,
_pi_device device,
pi_queue_properties  properties,
unsigned int  flags,
bool  backend_owns = true 
)
inline

Definition at line 433 of file pi_cuda.hpp.

◆ ~_pi_queue() [1/2]

_pi_queue::~_pi_queue ( )
inline

Definition at line 451 of file pi_cuda.hpp.

References context_, cuda_piContextRelease(), cuda_piDeviceRelease(), and device_.

◆ _pi_queue() [2/3]

_pi_queue::_pi_queue ( pi_context  ContextArg,
cm_support::CmQueue *  CmQueueArg 
)
inline

Definition at line 110 of file pi_esimd_emulator.hpp.

◆ _pi_queue() [3/3]

_pi_queue::_pi_queue ( std::vector< native_type > &&  compute_streams,
std::vector< native_type > &&  transfer_streams,
_pi_context context,
_pi_device device,
pi_queue_properties  properties,
unsigned int  flags 
)
inline

Definition at line 414 of file pi_hip.hpp.

◆ ~_pi_queue() [2/2]

_pi_queue::~_pi_queue ( )
inline

Definition at line 432 of file pi_hip.hpp.

References context_, device_, hip_piContextRelease(), and hip_piDeviceRelease().

Member Function Documentation

◆ all_of() [1/2]

template<typename T >
bool _pi_queue::all_of ( T &&  f)
inline

◆ all_of() [2/2]

template<typename T >
bool _pi_queue::all_of ( T &&  f)
inline

◆ backend_has_ownership()

bool _pi_queue::backend_has_ownership ( ) const
inlinenoexcept

Definition at line 625 of file pi_cuda.hpp.

References has_ownership_.

Referenced by cuda_piQueueRelease().

◆ can_reuse_stream() [1/2]

bool _pi_queue::can_reuse_stream ( pi_uint32  stream_token)
inline

Definition at line 463 of file pi_hip.hpp.

References compute_stream_idx_, compute_streams_, has_been_synchronized(), and max().

◆ can_reuse_stream() [2/2]

bool _pi_queue::can_reuse_stream ( pi_uint32  stream_token)
inline

Definition at line 482 of file pi_cuda.hpp.

References compute_stream_idx_, compute_streams_, has_been_synchronized(), and max().

◆ compute_stream_wait_for_barrier_if_needed() [1/2]

void _pi_queue::compute_stream_wait_for_barrier_if_needed ( CUstream  stream,
pi_uint32  stream_i 
)

Definition at line 502 of file pi_cuda.cpp.

◆ compute_stream_wait_for_barrier_if_needed() [2/2]

void _pi_queue::compute_stream_wait_for_barrier_if_needed ( hipStream_t  stream,
pi_uint32  stream_i 
)

Definition at line 452 of file pi_hip.cpp.

References barrier_event_, and compute_applied_barrier_.

◆ decrement_reference_count() [1/2]

pi_uint32 _pi_queue::decrement_reference_count ( )
inlinenoexcept

Definition at line 600 of file pi_hip.hpp.

References refCount_.

◆ decrement_reference_count() [2/2]

pi_uint32 _pi_queue::decrement_reference_count ( )
inlinenoexcept

Definition at line 619 of file pi_cuda.hpp.

References refCount_.

Referenced by cuda_piQueueRelease(), and hip_piQueueRelease().

◆ for_each_stream() [1/2]

template<typename T >
void _pi_queue::for_each_stream ( T &&  f)
inline

◆ for_each_stream() [2/2]

template<typename T >
void _pi_queue::for_each_stream ( T &&  f)
inline

◆ get() [1/2]

native_type _pi_queue::get ( )
inline

Definition at line 453 of file pi_hip.hpp.

References get_next_compute_stream().

◆ get() [2/2]

native_type _pi_queue::get ( )
inline

Definition at line 472 of file pi_cuda.hpp.

References get_next_compute_stream().

◆ get_context() [1/2]

_pi_context* _pi_queue::get_context ( ) const
inline

Definition at line 594 of file pi_hip.hpp.

References context_.

◆ get_context() [2/2]

◆ get_device() [1/2]

_pi_device* _pi_queue::get_device ( ) const
inline

Definition at line 596 of file pi_hip.hpp.

References device_.

◆ get_device() [2/2]

_pi_device* _pi_queue::get_device ( ) const
inline

◆ get_next_compute_stream() [1/4]

native_type _pi_queue::get_next_compute_stream ( pi_uint32 stream_token = nullptr)

◆ get_next_compute_stream() [2/4]

◆ get_next_compute_stream() [3/4]

native_type _pi_queue::get_next_compute_stream ( pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
_pi_stream_guard guard,
pi_uint32 stream_token = nullptr 
)

◆ get_next_compute_stream() [4/4]

hipStream_t _pi_queue::get_next_compute_stream ( pi_uint32  num_events_in_wait_list,
const pi_event event_wait_list,
_pi_stream_guard guard,
pi_uint32 stream_token = nullptr 
)

Definition at line 551 of file pi_cuda.cpp.

References _pi_event::get_compute_stream_token(), and _pi_event::get_stream().

◆ get_next_event_id() [1/2]

pi_uint32 _pi_queue::get_next_event_id ( )
inlinenoexcept

Definition at line 604 of file pi_hip.hpp.

References eventCount_.

◆ get_next_event_id() [2/2]

pi_uint32 _pi_queue::get_next_event_id ( )
inlinenoexcept

Definition at line 623 of file pi_cuda.hpp.

References eventCount_.

Referenced by _pi_event::record().

◆ get_next_transfer_stream() [1/2]

native_type _pi_queue::get_next_transfer_stream ( )

◆ get_next_transfer_stream() [2/2]

◆ get_reference_count() [1/2]

pi_uint32 _pi_queue::get_reference_count ( ) const
inlinenoexcept

Definition at line 602 of file pi_hip.hpp.

References refCount_.

◆ get_reference_count() [2/2]

pi_uint32 _pi_queue::get_reference_count ( ) const
inlinenoexcept

Definition at line 621 of file pi_cuda.hpp.

References refCount_.

Referenced by cuda_piQueueGetInfo(), cuda_piQueueRetain(), hip_piQueueGetInfo(), and hip_piQueueRetain().

◆ has_been_synchronized() [1/2]

bool _pi_queue::has_been_synchronized ( pi_uint32  stream_token)
inline

Definition at line 455 of file pi_hip.hpp.

References last_sync_compute_streams_, and max().

◆ has_been_synchronized() [2/2]

bool _pi_queue::has_been_synchronized ( pi_uint32  stream_token)
inline

Definition at line 474 of file pi_cuda.hpp.

References last_sync_compute_streams_, and max().

Referenced by can_reuse_stream().

◆ increment_reference_count() [1/2]

pi_uint32 _pi_queue::increment_reference_count ( )
inlinenoexcept

Definition at line 598 of file pi_hip.hpp.

References refCount_.

◆ increment_reference_count() [2/2]

pi_uint32 _pi_queue::increment_reference_count ( )
inlinenoexcept

Definition at line 617 of file pi_cuda.hpp.

References refCount_.

Referenced by cuda_piQueueRetain(), and hip_piQueueRetain().

◆ sync_streams() [1/2]

◆ sync_streams() [2/2]

◆ transfer_stream_wait_for_barrier_if_needed() [1/2]

void _pi_queue::transfer_stream_wait_for_barrier_if_needed ( CUstream  stream,
pi_uint32  stream_i 
)

Definition at line 510 of file pi_cuda.cpp.

◆ transfer_stream_wait_for_barrier_if_needed() [2/2]

void _pi_queue::transfer_stream_wait_for_barrier_if_needed ( hipStream_t  stream,
pi_uint32  stream_i 
)

Definition at line 460 of file pi_hip.cpp.

References barrier_event_, and transfer_applied_barrier_.

Member Data Documentation

◆ barrier_event_ [1/2]

hipEvent_t _pi_queue::barrier_event_ = nullptr

Definition at line 395 of file pi_hip.hpp.

◆ barrier_event_ [2/2]

◆ barrier_mutex_

std::mutex _pi_queue::barrier_mutex_

◆ barrier_tmp_event_ [1/2]

hipEvent_t _pi_queue::barrier_tmp_event_ = nullptr

Definition at line 396 of file pi_hip.hpp.

◆ barrier_tmp_event_ [2/2]

CUevent _pi_queue::barrier_tmp_event_ = nullptr

◆ CmQueuePtr

cm_support::CmQueue* _pi_queue::CmQueuePtr = nullptr

Definition at line 115 of file pi_esimd_emulator.hpp.

Referenced by piQueueRelease().

◆ compute_applied_barrier_

std::vector< bool > _pi_queue::compute_applied_barrier_

◆ compute_stream_idx_

std::atomic_uint32_t _pi_queue::compute_stream_idx_

Definition at line 417 of file pi_cuda.hpp.

Referenced by can_reuse_stream(), and sync_streams().

◆ compute_stream_mutex_

std::mutex _pi_queue::compute_stream_mutex_

Definition at line 428 of file pi_cuda.hpp.

Referenced by all_of(), for_each_stream(), and sync_streams().

◆ compute_stream_sync_mutex_

std::mutex _pi_queue::compute_stream_sync_mutex_

Definition at line 427 of file pi_cuda.hpp.

Referenced by sync_streams().

◆ compute_streams_

std::vector< native_type > _pi_queue::compute_streams_

Definition at line 400 of file pi_cuda.hpp.

Referenced by all_of(), can_reuse_stream(), for_each_stream(), and sync_streams().

◆ Context

pi_context _pi_queue::Context = nullptr

Definition at line 114 of file pi_esimd_emulator.hpp.

◆ context_

_pi_context * _pi_queue::context_

Definition at line 410 of file pi_cuda.hpp.

Referenced by cuda_piQueueGetInfo(), get_context(), hip_piQueueGetInfo(), and ~_pi_queue().

◆ default_num_compute_streams

static constexpr int _pi_queue::default_num_compute_streams = 128
staticconstexpr

Definition at line 397 of file pi_cuda.hpp.

Referenced by cuda_piQueueCreate(), and hip_piQueueCreate().

◆ default_num_transfer_streams

static constexpr int _pi_queue::default_num_transfer_streams = 64
staticconstexpr

Definition at line 398 of file pi_cuda.hpp.

Referenced by cuda_piQueueCreate(), and hip_piQueueCreate().

◆ delay_compute_

std::vector< bool > _pi_queue::delay_compute_

Definition at line 406 of file pi_cuda.hpp.

Referenced by sync_streams().

◆ device_

◆ eventCount_

std::atomic_uint32_t _pi_queue::eventCount_

Definition at line 416 of file pi_cuda.hpp.

Referenced by get_next_event_id().

◆ flags_

unsigned int _pi_queue::flags_

Definition at line 423 of file pi_cuda.hpp.

◆ has_ownership_

bool _pi_queue::has_ownership_

Definition at line 431 of file pi_cuda.hpp.

Referenced by backend_has_ownership().

◆ last_sync_compute_streams_

unsigned int _pi_queue::last_sync_compute_streams_

Definition at line 421 of file pi_cuda.hpp.

Referenced by has_been_synchronized(), and sync_streams().

◆ last_sync_transfer_streams_

unsigned int _pi_queue::last_sync_transfer_streams_

Definition at line 422 of file pi_cuda.hpp.

Referenced by sync_streams().

◆ num_compute_streams_

unsigned int _pi_queue::num_compute_streams_

Definition at line 419 of file pi_cuda.hpp.

Referenced by all_of(), for_each_stream(), and sync_streams().

◆ num_transfer_streams_

unsigned int _pi_queue::num_transfer_streams_

Definition at line 420 of file pi_cuda.hpp.

Referenced by all_of(), for_each_stream(), and sync_streams().

◆ properties_

◆ refCount_

std::atomic_uint32_t _pi_queue::refCount_

◆ transfer_applied_barrier_

std::vector< bool > _pi_queue::transfer_applied_barrier_

◆ transfer_stream_idx_

std::atomic_uint32_t _pi_queue::transfer_stream_idx_

Definition at line 418 of file pi_cuda.hpp.

Referenced by sync_streams().

◆ transfer_stream_mutex_

std::mutex _pi_queue::transfer_stream_mutex_

Definition at line 429 of file pi_cuda.hpp.

Referenced by all_of(), for_each_stream(), and sync_streams().

◆ transfer_streams_

std::vector< native_type > _pi_queue::transfer_streams_

Definition at line 401 of file pi_cuda.hpp.

Referenced by all_of(), for_each_stream(), and sync_streams().


The documentation for this struct was generated from the following files: