DPC++ Runtime
Runtime libraries for oneAPI Data Parallel C++
cl::sycl::ext::oneapi::detail Namespace Reference

Classes

class  __sycl_reduction_aux_kernel
 
class  __sycl_reduction_main_kernel
 These are the forward declaration for the classes that help to create names for additional kernels. More...
 
struct  AreAllButLastReductions
 Predicate returning true if all template type parameters except the last one are reductions. More...
 
struct  AreAllButLastReductions< T >
 Helper specialization of AreAllButLastReductions for one element only. More...
 
class  atomic_ref_base
 
class  atomic_ref_impl
 
class  atomic_ref_impl< T *, DefaultOrder, DefaultScope, AddressSpace >
 
class  atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_floating_point< T >::value > >
 
class  atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_integral< T >::value > >
 
struct  bit_equal
 
struct  bit_equal< double >
 
struct  bit_equal< float >
 
struct  bit_equal< T, typename detail::enable_if_t< std::is_integral< T >::value > >
 
struct  EmptyReductionPredicate
 
class  filter_selector_impl
 
struct  FilterElement
 
struct  get_reduction_aux_kernel_name_t
 
struct  get_reduction_aux_kernel_name_t< sycl::detail::auto_name, Type, B1, B2, T3 >
 
struct  get_reduction_main_kernel_name_t
 Helper structs to get additional kernel name types based on given Name and additional template parameters helping to distinguish kernels. More...
 
struct  get_reduction_main_kernel_name_t< sycl::detail::auto_name, Type, B1, B2, T3 >
 
struct  IsNonUsmReductionPredicate
 
struct  memory_order_traits
 
struct  memory_order_traits< memory_order::acq_rel >
 
struct  memory_order_traits< memory_order::relaxed >
 
struct  memory_order_traits< memory_order::seq_cst >
 
class  reducer
 Class that is used to represent objects that are passed to user's lambda functions and representing users' reduction variable. More...
 
class  reducer< T, BinaryOperation, enable_if_t< IsKnownIdentityOp< T, BinaryOperation >::value > >
 Specialization of the generic class 'reducer'. More...
 
class  reduction_impl
 This class encapsulates the reduction variable/accessor, the reduction operator and an optional operator identity. More...
 
class  reduction_impl_base
 Base non-template class which is a base class for all reduction implementation classes. More...
 

Typedefs

using memory_order = cl::sycl::ext::oneapi::memory_order
 
using memory_scope = cl::sycl::ext::oneapi::memory_scope
 
template<typename T >
using IsValidAtomicType = bool_constant< std::is_arithmetic< T >::value||std::is_pointer< T >::value >
 
template<cl::sycl::access::address_space AS>
using IsValidAtomicAddressSpace = bool_constant< AS==access::address_space::global_space||AS==access::address_space::local_space||AS==access::address_space::global_device_space >
 
template<memory_order Order>
using IsValidDefaultOrder = bool_constant< Order==memory_order::relaxed||Order==memory_order::acq_rel||Order==memory_order::seq_cst >
 
template<typename T , class BinaryOperation >
using IsReduOptForFastAtomicFetch = bool_constant< sycl::detail::is_sgeninteger< T >::value &&sycl::detail::IsValidAtomicType< T >::value &&(sycl::detail::IsPlus< T, BinaryOperation >::value||sycl::detail::IsMinimum< T, BinaryOperation >::value||sycl::detail::IsMaximum< T, BinaryOperation >::value||sycl::detail::IsBitOR< T, BinaryOperation >::value||sycl::detail::IsBitXOR< T, BinaryOperation >::value||sycl::detail::IsBitAND< T, BinaryOperation >::value)>
 
template<typename T , class BinaryOperation >
using IsReduOptForAtomic64Add = bool_constant< sycl::detail::IsPlus< T, BinaryOperation >::value &&sycl::detail::is_sgenfloat< T >::value &&(sizeof(T)==4||sizeof(T)==8)>
 
template<typename T , class BinaryOperation >
using IsReduOptForFastReduce = bool_constant<((sycl::detail::is_sgeninteger< T >::value &&(sizeof(T)==4||sizeof(T)==8))||sycl::detail::is_sgenfloat< T >::value) &&(sycl::detail::IsPlus< T, BinaryOperation >::value||sycl::detail::IsMinimum< T, BinaryOperation >::value||sycl::detail::IsMaximum< T, BinaryOperation >::value)>
 
template<typename... Ts>
using ReduTupleT = sycl::detail::tuple< Ts... >
 
typedef struct sycl::detail::device_filter filter
 

Functions

template<typename KernelName , typename KernelType , int Dims, class Reduction >
void reduCGFunc (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, size_t MaxWGSize, uint32_t NumConcurrentWorkGroups, Reduction &Redu)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
enable_if_t< Reduction::has_atomic_add_float64 > reduCGFuncAtomic64 (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
enable_if_t< Reduction::has_fast_atomics > reduCGFunc (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
enable_if_t<!Reduction::has_fast_atomics > reduCGFunc (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu)
 
template<typename KernelName , typename KernelType , class Reduction >
enable_if_t<!Reduction::has_fast_atomics, size_t > reduAuxCGFunc (handler &CGH, size_t NWorkItems, size_t MaxWGSize, Reduction &Redu)
 Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups. More...
 
template<typename KernelName , typename KernelType , int Dims, typename... Reductions, size_t... Is>
void reduCGFunc (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename KernelName , typename KernelType , typename... Reductions, size_t... Is>
size_t reduAuxCGFunc (handler &CGH, size_t NWorkItems, size_t MaxWGSize, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename KernelName , class Reduction >
std::enable_if_t<!Reduction::is_usm > reduSaveFinalResultToUserMem (handler &CGH, Reduction &Redu)
 Copies the final reduction result kept in read-write accessor to user's accessor. More...
 
template<typename KernelName , class Reduction >
std::enable_if_t< Reduction::is_usm > reduSaveFinalResultToUserMem (handler &CGH, Reduction &Redu)
 Copies the final reduction result kept in read-write accessor to user's USM memory. More...
 
template<typename... Reduction, size_t... Is>
std::shared_ptr< eventreduSaveFinalResultToUserMem (std::shared_ptr< detail::queue_impl > Queue, bool IsHost, std::tuple< Reduction... > &ReduTuple, std::index_sequence< Is... >)
 Creates additional kernels that copy the accumulated/final results from reductions accessors to either user's accessor or user's USM memory. More...
 
template<typename Reduction , typename... RestT>
std::enable_if_t<!Reduction::is_usm > reduSaveFinalResultToUserMemHelper (std::vector< event > &Events, std::shared_ptr< detail::queue_impl > Queue, bool IsHost, Reduction &Redu, RestT... Rest)
 
uint32_t reduGetMaxNumConcurrentWorkGroups (std::shared_ptr< queue_impl > Queue)
 
size_t reduGetMaxWGSize (std::shared_ptr< queue_impl > Queue, size_t LocalMemBytesPerWorkItem)
 
template<typename... ReductionT, size_t... Is>
size_t reduGetMemPerWorkItem (std::tuple< ReductionT... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename TupleT , std::size_t... Is>
std::tuple< std::tuple_element_t< Is, TupleT >... > tuple_select_elements (TupleT Tuple, std::index_sequence< Is... >)
 Utility function: for the given tuple. More...
 
static constexpr std::memory_order getStdMemoryOrder (::cl::sycl::ext::oneapi::memory_order order)
 
constexpr memory_order getLoadOrder (memory_order order)
 
template<typename... Ts>
ReduTupleT< Ts... > makeReduTupleT (Ts... Elements)
 
size_t reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize, size_t &NWorkGroups)
 
template<typename KernelFunc , int Dims, typename ReducerT >
void reductionLoop (const range< Dims > &Range, ReducerT &Reducer, const nd_item< 1 > &NdId, KernelFunc &F)
 Called in device code. More...
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t< Reduction::has_fast_atomics > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, const nd_range< 1 > &NDRange, Reduction &Redu)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t<!Reduction::has_fast_atomics &&Reduction::has_fast_reduce > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, const nd_range< 1 > &NDRange, Reduction &Redu)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t<!Reduction::has_fast_atomics &&!Reduction::has_fast_reduce > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, const nd_range< 1 > &NDRange, Reduction &Redu)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t< Reduction::has_fast_reduce &&Reduction::has_fast_atomics > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &, typename Reduction::rw_accessor_type Out)
 Implements a command group function that enqueues a kernel that calls user's lambda function KernelFunc and also does one iteration of reduction of elements computed in user's lambda function. More...
 
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t<!Reduction::has_fast_reduce &&Reduction::has_fast_atomics > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &, typename Reduction::rw_accessor_type Out)
 Implements a command group function that enqueues a kernel that calls user's lambda function KernelFunc and also does one iteration of reduction of elements computed in user's lambda function. More...
 
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t< Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu, typename Reduction::rw_accessor_type Out)
 Implements a command group function that enqueues a kernel that calls user's lambda function and does one iteration of reduction of elements in each of work-groups. More...
 
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t<!Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu, typename Reduction::rw_accessor_type Out)
 Implements a command group function that enqueues a kernel that calls user's lambda function. More...
 
template<typename KernelName , typename KernelType , bool UniformWG, class Reduction , typename InputT , typename OutputT >
enable_if_t< Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > reduAuxCGFuncImpl (handler &CGH, size_t NWorkItems, size_t NWorkGroups, size_t WGSize, Reduction &Redu, InputT In, OutputT Out)
 Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups. More...
 
template<typename KernelName , typename KernelType , bool UniformPow2WG, class Reduction , typename InputT , typename OutputT >
enable_if_t<!Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > reduAuxCGFuncImpl (handler &CGH, size_t NWorkItems, size_t NWorkGroups, size_t WGSize, Reduction &Redu, InputT In, OutputT Out)
 Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups. More...
 
template<typename... Reductions, size_t... Is>
auto createReduLocalAccs (size_t Size, handler &CGH, std::index_sequence< Is... >)
 For the given 'Reductions' types pack and indices enumerating only the reductions for which a local accessors are needed, this function creates those local accessors and returns a tuple consisting of them. More...
 
template<bool IsOneWG, typename... Reductions, size_t... Is>
auto createReduOutAccs (size_t NWorkGroups, handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 For the given 'Reductions' types pack and indices enumerating them this function either creates new temporary accessors for partial sums (if IsOneWG is false) or returns user's accessor/USM-pointer if (IsOneWG is true). More...
 
template<typename... Reductions, size_t... Is>
auto getReadAccsToPreviousPartialReds (handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 For the given 'Reductions' types pack and indices enumerating them this function returns accessors to buffers holding partial sums generated in the previous kernel invocation. More...
 
template<typename... Reductions, size_t... Is>
ReduTupleT< typename Reductions::result_type... > getReduIdentities (std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename... Reductions, size_t... Is>
ReduTupleT< typename Reductions::binary_operation... > getReduBOPs (std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename... Reductions, size_t... Is>
std::array< bool, sizeof...(Reductions)> getInitToIdentityProperties (std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename... Reductions, size_t... Is>
std::tuple< typename Reductions::reducer_type... > createReducers (ReduTupleT< typename Reductions::result_type... > Identities, ReduTupleT< typename Reductions::binary_operation... > BOPsTuple, std::index_sequence< Is... >)
 
template<typename KernelType , int Dims, typename... ReducerT, size_t... Is>
void callReduUserKernelFunc (KernelType KernelFunc, nd_item< Dims > NDIt, std::tuple< ReducerT... > &Reducers, std::index_sequence< Is... >)
 
template<bool Pow2WG, typename... LocalAccT, typename... ReducerT, typename... ResultT, size_t... Is>
void initReduLocalAccs (size_t LID, size_t WGSize, ReduTupleT< LocalAccT... > LocalAccs, const std::tuple< ReducerT... > &Reducers, ReduTupleT< ResultT... > Identities, std::index_sequence< Is... >)
 
template<bool UniformPow2WG, typename... LocalAccT, typename... InputAccT, typename... ResultT, size_t... Is>
void initReduLocalAccs (size_t LID, size_t GID, size_t NWorkItems, size_t WGSize, ReduTupleT< InputAccT... > LocalAccs, ReduTupleT< LocalAccT... > InputAccs, ReduTupleT< ResultT... > Identities, std::index_sequence< Is... >)
 
template<typename... LocalAccT, typename... BOPsT, size_t... Is>
void reduceReduLocalAccs (size_t IndexA, size_t IndexB, ReduTupleT< LocalAccT... > LocalAccs, ReduTupleT< BOPsT... > BOPs, std::index_sequence< Is... >)
 
template<bool Pow2WG, bool IsOneWG, typename... Reductions, typename... OutAccT, typename... LocalAccT, typename... BOPsT, typename... Ts, size_t... Is>
void writeReduSumsToOutAccs (size_t OutAccIndex, size_t WGSize, std::tuple< Reductions... > *, ReduTupleT< OutAccT... > OutAccs, ReduTupleT< LocalAccT... > LocalAccs, ReduTupleT< BOPsT... > BOPs, ReduTupleT< Ts... > IdentityVals, std::array< bool, sizeof...(Reductions)> IsInitializeToIdentity, std::index_sequence< Is... >)
 
constexpr std::index_sequence concat_sequences (std::index_sequence<>)
 
template<size_t I>
constexpr std::index_sequence< I > concat_sequences (std::index_sequence< I >)
 
template<size_t... Is, size_t... Js>
constexpr std::index_sequence< Is..., Js... > concat_sequences (std::index_sequence< Is... >, std::index_sequence< Js... >)
 
template<size_t... Is, size_t... Js, class... Rs>
constexpr auto concat_sequences (std::index_sequence< Is... >, std::index_sequence< Js... >, Rs...)
 
template<typename... T, typename FunctorT , size_t... Is, std::enable_if_t<(sizeof...(Is) > 0), int > Z = 0>
constexpr auto filterSequenceHelper (FunctorT, std::index_sequence< Is... >)
 For each index 'I' from the given indices pack 'Is' this function initially creates a number of short index_sequences, where each of such short index sequences is either empty (if the given Functor returns false for the type T[I]) or 1 element 'I' (otherwise). More...
 
template<typename... T, typename FunctorT , size_t... Is>
constexpr auto filterSequence (FunctorT F, std::index_sequence< Is... > Indices)
 For each index 'I' from the given indices pack 'Is' this function returns an index sequence consisting of only those 'I's for which the 'FunctorT' applied to 'T[I]' returns true. More...
 
template<typename KernelName , bool Pow2WG, bool IsOneWG, typename KernelType , int Dims, typename... Reductions, size_t... Is>
void reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... > ReduIndices)
 
template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t< Reduction::has_atomic_add_float64 > reduCGFuncImplAtomic64 (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &, typename Reduction::rw_accessor_type Out)
 
void associateReduAccsWithHandlerHelper (handler &)
 
template<typename ReductionT >
void associateReduAccsWithHandlerHelper (handler &CGH, ReductionT &Redu)
 
template<typename ReductionT , typename... RestT, enable_if_t<(sizeof...(RestT) > 0), int > Z = 0>
void associateReduAccsWithHandlerHelper (handler &CGH, ReductionT &Redu, RestT &... Rest)
 
template<typename... Reductions, size_t... Is>
void associateReduAccsWithHandler (handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename KernelName , typename KernelType , bool UniformPow2WG, bool IsOneWG, typename... Reductions, size_t... Is>
void reduAuxCGFuncImpl (handler &CGH, size_t NWorkItems, size_t NWorkGroups, size_t WGSize, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... > ReduIndices)
 
void reduSaveFinalResultToUserMemHelper (std::vector< event > &, std::shared_ptr< detail::queue_impl >, bool)
 
template<typename Reduction , typename... RestT>
std::enable_if_t< Reduction::is_usm > reduSaveFinalResultToUserMemHelper (std::vector< event > &Events, std::shared_ptr< detail::queue_impl > Queue, bool IsHost, Reduction &, RestT... Rest)
 
template<typename Reduction >
size_t reduGetMemPerWorkItemHelper (Reduction &)
 
template<typename Reduction , typename... RestT>
size_t reduGetMemPerWorkItemHelper (Reduction &, RestT... Rest)
 
std::vector< std::string > tokenize (const std::string &Filter, const std::string &Delim)
 
filter create_filter (const std::string &Input)
 
uint32_t reduGetMaxNumConcurrentWorkGroups (std::shared_ptr< sycl::detail::queue_impl > Queue)
 
size_t reduGetMaxWGSize (std::shared_ptr< sycl::detail::queue_impl > Queue, size_t LocalMemBytesPerWorkItem)
 

Typedef Documentation

◆ filter

◆ IsReduOptForAtomic64Add

template<typename T , class BinaryOperation >
using cl::sycl::ext::oneapi::detail::IsReduOptForAtomic64Add = typedef bool_constant<sycl::detail::IsPlus<T, BinaryOperation>::value && sycl::detail::is_sgenfloat<T>::value && (sizeof(T) == 4 || sizeof(T) == 8)>

Definition at line 76 of file reduction.hpp.

◆ IsReduOptForFastAtomicFetch

template<typename T , class BinaryOperation >
using cl::sycl::ext::oneapi::detail::IsReduOptForFastAtomicFetch = typedef bool_constant<sycl::detail::is_sgeninteger<T>::value && sycl::detail::IsValidAtomicType<T>::value && (sycl::detail::IsPlus<T, BinaryOperation>::value || sycl::detail::IsMinimum<T, BinaryOperation>::value || sycl::detail::IsMaximum<T, BinaryOperation>::value || sycl::detail::IsBitOR<T, BinaryOperation>::value || sycl::detail::IsBitXOR<T, BinaryOperation>::value || sycl::detail::IsBitAND<T, BinaryOperation>::value)>

Definition at line 54 of file reduction.hpp.

◆ IsReduOptForFastReduce

template<typename T , class BinaryOperation >
using cl::sycl::ext::oneapi::detail::IsReduOptForFastReduce = typedef bool_constant<((sycl::detail::is_sgeninteger<T>::value && (sizeof(T) == 4 || sizeof(T) == 8)) || sycl::detail::is_sgenfloat<T>::value) && (sycl::detail::IsPlus<T, BinaryOperation>::value || sycl::detail::IsMinimum<T, BinaryOperation>::value || sycl::detail::IsMaximum<T, BinaryOperation>::value)>

Definition at line 94 of file reduction.hpp.

◆ IsValidAtomicAddressSpace

template<cl::sycl::access::address_space AS>
using cl::sycl::ext::oneapi::detail::IsValidAtomicAddressSpace = typedef bool_constant<AS == access::address_space::global_space || AS == access::address_space::local_space || AS == access::address_space::global_device_space>

Definition at line 48 of file atomic_ref.hpp.

◆ IsValidAtomicType

template<typename T >
using cl::sycl::ext::oneapi::detail::IsValidAtomicType = typedef bool_constant<std::is_arithmetic<T>::value || std::is_pointer<T>::value>

Definition at line 42 of file atomic_ref.hpp.

◆ IsValidDefaultOrder

template<memory_order Order>
using cl::sycl::ext::oneapi::detail::IsValidDefaultOrder = typedef bool_constant<Order == memory_order::relaxed || Order == memory_order::acq_rel || Order == memory_order::seq_cst>

Definition at line 54 of file atomic_ref.hpp.

◆ memory_order

◆ memory_scope

◆ ReduTupleT

template<typename... Ts>
using cl::sycl::ext::oneapi::detail::ReduTupleT = typedef sycl::detail::tuple<Ts...>

Definition at line 101 of file reduction.hpp.

Function Documentation

◆ associateReduAccsWithHandler()

template<typename... Reductions, size_t... Is>
void cl::sycl::ext::oneapi::detail::associateReduAccsWithHandler ( handler CGH,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >   
)

Definition at line 1828 of file reduction.hpp.

References associateReduAccsWithHandlerHelper().

Referenced by reduAuxCGFuncImpl().

◆ associateReduAccsWithHandlerHelper() [1/3]

void cl::sycl::ext::oneapi::detail::associateReduAccsWithHandlerHelper ( handler )
inline

Definition at line 1812 of file reduction.hpp.

◆ associateReduAccsWithHandlerHelper() [2/3]

template<typename ReductionT >
void cl::sycl::ext::oneapi::detail::associateReduAccsWithHandlerHelper ( handler CGH,
ReductionT &  Redu 
)

Definition at line 1815 of file reduction.hpp.

◆ associateReduAccsWithHandlerHelper() [3/3]

template<typename ReductionT , typename... RestT, enable_if_t<(sizeof...(RestT) > 0), int > Z = 0>
void cl::sycl::ext::oneapi::detail::associateReduAccsWithHandlerHelper ( handler CGH,
ReductionT &  Redu,
RestT &...  Rest 
)

Definition at line 1821 of file reduction.hpp.

Referenced by associateReduAccsWithHandler().

◆ callReduUserKernelFunc()

template<typename KernelType , int Dims, typename... ReducerT, size_t... Is>
void cl::sycl::ext::oneapi::detail::callReduUserKernelFunc ( KernelType  KernelFunc,
nd_item< Dims >  NDIt,
std::tuple< ReducerT... > &  Reducers,
std::index_sequence< Is... >   
)

Definition at line 1512 of file reduction.hpp.

Referenced by reduCGFuncImpl().

◆ concat_sequences() [1/4]

template<size_t I>
constexpr std::index_sequence<I> cl::sycl::ext::oneapi::detail::concat_sequences ( std::index_sequence< I >  )
constexpr

Definition at line 1616 of file reduction.hpp.

◆ concat_sequences() [2/4]

template<size_t... Is, size_t... Js>
constexpr std::index_sequence<Is..., Js...> cl::sycl::ext::oneapi::detail::concat_sequences ( std::index_sequence< Is... >  ,
std::index_sequence< Js... >   
)
constexpr

Definition at line 1623 of file reduction.hpp.

◆ concat_sequences() [3/4]

template<size_t... Is, size_t... Js, class... Rs>
constexpr auto cl::sycl::ext::oneapi::detail::concat_sequences ( std::index_sequence< Is... >  ,
std::index_sequence< Js... >  ,
Rs...   
)
constexpr

Definition at line 1629 of file reduction.hpp.

Referenced by filterSequenceHelper().

◆ concat_sequences() [4/4]

constexpr std::index_sequence cl::sycl::ext::oneapi::detail::concat_sequences ( std::index_sequence<>  )
constexpr

Definition at line 1610 of file reduction.hpp.

◆ create_filter()

◆ createReducers()

template<typename... Reductions, size_t... Is>
std::tuple<typename Reductions::reducer_type...> cl::sycl::ext::oneapi::detail::createReducers ( ReduTupleT< typename Reductions::result_type... >  Identities,
ReduTupleT< typename Reductions::binary_operation... >  BOPsTuple,
std::index_sequence< Is... >   
)

Definition at line 1504 of file reduction.hpp.

Referenced by reduCGFuncImpl().

◆ createReduLocalAccs()

template<typename... Reductions, size_t... Is>
auto cl::sycl::ext::oneapi::detail::createReduLocalAccs ( size_t  Size,
handler CGH,
std::index_sequence< Is... >   
)

For the given 'Reductions' types pack and indices enumerating only the reductions for which a local accessors are needed, this function creates those local accessors and returns a tuple consisting of them.

Definition at line 1452 of file reduction.hpp.

References makeReduTupleT().

Referenced by reduAuxCGFuncImpl(), and reduCGFuncImpl().

◆ createReduOutAccs()

template<bool IsOneWG, typename... Reductions, size_t... Is>
auto cl::sycl::ext::oneapi::detail::createReduOutAccs ( size_t  NWorkGroups,
handler CGH,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >   
)

For the given 'Reductions' types pack and indices enumerating them this function either creates new temporary accessors for partial sums (if IsOneWG is false) or returns user's accessor/USM-pointer if (IsOneWG is true).

Definition at line 1463 of file reduction.hpp.

References makeReduTupleT().

◆ filterSequence()

template<typename... T, typename FunctorT , size_t... Is>
constexpr auto cl::sycl::ext::oneapi::detail::filterSequence ( FunctorT  F,
std::index_sequence< Is... >  Indices 
)
constexpr

For each index 'I' from the given indices pack 'Is' this function returns an index sequence consisting of only those 'I's for which the 'FunctorT' applied to 'T[I]' returns true.

Definition at line 1672 of file reduction.hpp.

References filterSequenceHelper().

Referenced by reduAuxCGFuncImpl().

◆ filterSequenceHelper()

template<typename... T, typename FunctorT , size_t... Is, std::enable_if_t<(sizeof...(Is) > 0), int > Z = 0>
constexpr auto cl::sycl::ext::oneapi::detail::filterSequenceHelper ( FunctorT  ,
std::index_sequence< Is... >   
)
constexpr

For each index 'I' from the given indices pack 'Is' this function initially creates a number of short index_sequences, where each of such short index sequences is either empty (if the given Functor returns false for the type T[I]) or 1 element 'I' (otherwise).

After that this function concatenates those short sequences into one and returns the result sequence.

Definition at line 1656 of file reduction.hpp.

References concat_sequences().

Referenced by filterSequence().

◆ getInitToIdentityProperties()

template<typename... Reductions, size_t... Is>
std::array<bool, sizeof...(Reductions)> cl::sycl::ext::oneapi::detail::getInitToIdentityProperties ( std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >   
)

Definition at line 1497 of file reduction.hpp.

Referenced by reduAuxCGFuncImpl(), and reduCGFuncImpl().

◆ getLoadOrder()

constexpr memory_order cl::sycl::ext::oneapi::detail::getLoadOrder ( memory_order  order)
inlineconstexpr

Definition at line 73 of file atomic_ref.hpp.

References cl::sycl::ext::oneapi::memory_order_acq_rel, cl::sycl::ext::oneapi::memory_order_acquire, cl::sycl::ext::oneapi::memory_order_relaxed, cl::sycl::ext::oneapi::memory_order_release, and cl::sycl::ext::oneapi::memory_order_seq_cst.

Referenced by cl::sycl::ext::oneapi::detail::atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_floating_point< T >::value > >::fetch_add(), cl::sycl::ext::oneapi::detail::atomic_ref_impl< T *, DefaultOrder, DefaultScope, AddressSpace >::fetch_add(), cl::sycl::ext::oneapi::detail::atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_integral< T >::value > >::fetch_max(), cl::sycl::ext::oneapi::detail::atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_floating_point< T >::value > >::fetch_max(), cl::sycl::ext::oneapi::detail::atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_integral< T >::value > >::fetch_min(), cl::sycl::ext::oneapi::detail::atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_floating_point< T >::value > >::fetch_min(), cl::sycl::ext::oneapi::detail::atomic_ref_impl< T, DefaultOrder, DefaultScope, AddressSpace, typename detail::enable_if_t< std::is_floating_point< T >::value > >::fetch_sub(), and cl::sycl::ext::oneapi::detail::atomic_ref_impl< T *, DefaultOrder, DefaultScope, AddressSpace >::fetch_sub().

◆ getReadAccsToPreviousPartialReds()

template<typename... Reductions, size_t... Is>
auto cl::sycl::ext::oneapi::detail::getReadAccsToPreviousPartialReds ( handler CGH,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >   
)

For the given 'Reductions' types pack and indices enumerating them this function returns accessors to buffers holding partial sums generated in the previous kernel invocation.

Definition at line 1475 of file reduction.hpp.

References makeReduTupleT().

Referenced by reduAuxCGFuncImpl().

◆ getReduBOPs()

template<typename... Reductions, size_t... Is>
ReduTupleT<typename Reductions::binary_operation...> cl::sycl::ext::oneapi::detail::getReduBOPs ( std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >   
)

Definition at line 1491 of file reduction.hpp.

Referenced by reduAuxCGFuncImpl(), and reduCGFuncImpl().

◆ getReduIdentities()

template<typename... Reductions, size_t... Is>
ReduTupleT<typename Reductions::result_type...> cl::sycl::ext::oneapi::detail::getReduIdentities ( std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >   
)

Definition at line 1484 of file reduction.hpp.

Referenced by reduAuxCGFuncImpl(), and reduCGFuncImpl().

◆ getStdMemoryOrder()

◆ initReduLocalAccs() [1/2]

template<bool UniformPow2WG, typename... LocalAccT, typename... InputAccT, typename... ResultT, size_t... Is>
void cl::sycl::ext::oneapi::detail::initReduLocalAccs ( size_t  LID,
size_t  GID,
size_t  NWorkItems,
size_t  WGSize,
ReduTupleT< InputAccT... >  LocalAccs,
ReduTupleT< LocalAccT... >  InputAccs,
ReduTupleT< ResultT... >  Identities,
std::index_sequence< Is... >   
)

Definition at line 1538 of file reduction.hpp.

References cl::sycl::detail::make_tuple(), and cl::sycl::detail::tie().

◆ initReduLocalAccs() [2/2]

template<bool Pow2WG, typename... LocalAccT, typename... ReducerT, typename... ResultT, size_t... Is>
void cl::sycl::ext::oneapi::detail::initReduLocalAccs ( size_t  LID,
size_t  WGSize,
ReduTupleT< LocalAccT... >  LocalAccs,
const std::tuple< ReducerT... > &  Reducers,
ReduTupleT< ResultT... >  Identities,
std::index_sequence< Is... >   
)

Definition at line 1520 of file reduction.hpp.

References cl::sycl::detail::make_tuple(), and cl::sycl::detail::tie().

◆ makeReduTupleT()

template<typename... Ts>
ReduTupleT<Ts...> cl::sycl::ext::oneapi::detail::makeReduTupleT ( Ts...  Elements)

◆ reduAuxCGFunc() [1/2]

template<typename KernelName , typename KernelType , class Reduction >
enable_if_t<!Reduction::has_fast_atomics, size_t > cl::sycl::ext::oneapi::detail::reduAuxCGFunc ( handler CGH,
size_t  NWorkItems,
size_t  MaxWGSize,
Reduction &  Redu 
)

Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups.

At the end of each work-group the partial sum is written to a global buffer. The function returns the number of the newly generated partial sums.

Definition at line 1385 of file reduction.hpp.

References reduComputeWGSize().

◆ reduAuxCGFunc() [2/2]

template<typename KernelName , typename KernelType , typename... Reductions, size_t... Is>
size_t cl::sycl::ext::oneapi::detail::reduAuxCGFunc ( handler CGH,
size_t  NWorkItems,
size_t  MaxWGSize,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >  ReduIndices 
)

Definition at line 1903 of file reduction.hpp.

References reduComputeWGSize().

◆ reduAuxCGFuncImpl() [1/3]

template<typename KernelName , typename KernelType , bool UniformWG, class Reduction , typename InputT , typename OutputT >
enable_if_t<Reduction::has_fast_reduce && !Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduAuxCGFuncImpl ( handler CGH,
size_t  NWorkItems,
size_t  NWorkGroups,
size_t  WGSize,
Reduction &  Redu,
InputT  In,
OutputT  Out 
)

Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups.

This version uses ext::oneapi::reduce() algorithm to reduce elements in each of work-groups. At the end of each work-groups the partial sum is written to a global buffer.

Briefly: aux kernel, ext::oneapi::reduce(), reproducible results, FP + ADD/MIN/MAX

Definition at line 1289 of file reduction.hpp.

References cl::sycl::handler::parallel_for().

◆ reduAuxCGFuncImpl() [2/3]

template<typename KernelName , typename KernelType , bool UniformPow2WG, class Reduction , typename InputT , typename OutputT >
enable_if_t<!Reduction::has_fast_reduce && !Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduAuxCGFuncImpl ( handler CGH,
size_t  NWorkItems,
size_t  NWorkGroups,
size_t  WGSize,
Reduction &  Redu,
InputT  In,
OutputT  Out 
)

Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups.

This version uses tree-reduction algorithm to reduce elements in each of work-groups. At the end of each work-group the partial sum is written to a global buffer.

Briefly: aux kernel, tree-reduction, CUSTOM types/ops.

Definition at line 1324 of file reduction.hpp.

References cl::sycl::handler::parallel_for().

◆ reduAuxCGFuncImpl() [3/3]

template<typename KernelName , typename KernelType , bool UniformPow2WG, bool IsOneWG, typename... Reductions, size_t... Is>
void cl::sycl::ext::oneapi::detail::reduAuxCGFuncImpl ( handler CGH,
size_t  NWorkItems,
size_t  NWorkGroups,
size_t  WGSize,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >  ReduIndices 
)

◆ reduceReduLocalAccs()

template<typename... LocalAccT, typename... BOPsT, size_t... Is>
void cl::sycl::ext::oneapi::detail::reduceReduLocalAccs ( size_t  IndexA,
size_t  IndexB,
ReduTupleT< LocalAccT... >  LocalAccs,
ReduTupleT< BOPsT... >  BOPs,
std::index_sequence< Is... >   
)

Definition at line 1565 of file reduction.hpp.

References cl::sycl::detail::make_tuple(), and cl::sycl::detail::tie().

Referenced by reduAuxCGFuncImpl(), and reduCGFuncImpl().

◆ reduCGFunc() [1/4]

template<typename KernelName , typename KernelType , int Dims, class Reduction >
enable_if_t< Reduction::has_fast_atomics > cl::sycl::ext::oneapi::detail::reduCGFunc ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  Redu 
)

◆ reduCGFunc() [2/4]

template<typename KernelName , typename KernelType , int Dims, class Reduction >
enable_if_t<!Reduction::has_fast_atomics > cl::sycl::ext::oneapi::detail::reduCGFunc ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  Redu 
)

◆ reduCGFunc() [3/4]

template<typename KernelName , typename KernelType , int Dims, typename... Reductions, size_t... Is>
void cl::sycl::ext::oneapi::detail::reduCGFunc ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >  ReduIndices 
)

◆ reduCGFunc() [4/4]

template<typename KernelName , typename KernelType , int Dims, class Reduction >
void cl::sycl::ext::oneapi::detail::reduCGFunc ( handler CGH,
KernelType  KernelFunc,
const range< Dims > &  Range,
size_t  MaxWGSize,
uint32_t  NumConcurrentWorkGroups,
Reduction &  Redu 
)

Definition at line 1017 of file reduction.hpp.

References cl::sycl::range< dimensions >::size().

◆ reduCGFuncAtomic64()

template<typename KernelName , typename KernelType , int Dims, class Reduction >
enable_if_t< Reduction::has_atomic_add_float64 > cl::sycl::ext::oneapi::detail::reduCGFuncAtomic64 ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  Redu 
)

Definition at line 1804 of file reduction.hpp.

◆ reduCGFuncImpl() [1/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t<Reduction::has_fast_reduce && Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  ,
typename Reduction::rw_accessor_type  Out 
)

Implements a command group function that enqueues a kernel that calls user's lambda function KernelFunc and also does one iteration of reduction of elements computed in user's lambda function.

This version uses ext::oneapi::reduce() algorithm to reduce elements in each of work-groups, then it calls fast SYCL atomic operations to update the given reduction variable Out.

Briefly: calls user's lambda, ext::oneapi::reduce() + atomic, INT + ADD/MIN/MAX.

Definition at line 1045 of file reduction.hpp.

References cl::sycl::handler::parallel_for().

◆ reduCGFuncImpl() [2/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t<!Reduction::has_fast_reduce && Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  ,
typename Reduction::rw_accessor_type  Out 
)

Implements a command group function that enqueues a kernel that calls user's lambda function KernelFunc and also does one iteration of reduction of elements computed in user's lambda function.

This version uses tree-reduction algorithm to reduce elements in each of work-groups, then it calls fast SYCL atomic operations to update user's reduction variable.

Briefly: calls user's lambda, tree-reduction + atomic, INT + AND/OR/XOR.

Definition at line 1072 of file reduction.hpp.

References cl::sycl::nd_range< dimensions >::get_local_range(), and cl::sycl::handler::parallel_for().

◆ reduCGFuncImpl() [3/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t<Reduction::has_fast_reduce && !Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  Redu,
typename Reduction::rw_accessor_type  Out 
)

Implements a command group function that enqueues a kernel that calls user's lambda function and does one iteration of reduction of elements in each of work-groups.

This version uses ext::oneapi::reduce() algorithm to reduce elements in each of work-groups. At the end of each work-groups the partial sum is written to a global buffer.

Briefly: user's lambda, ext::oneapi::reduce(), FP + ADD/MIN/MAX.

Definition at line 1162 of file reduction.hpp.

References cl::sycl::nd_range< dimensions >::get_group_range(), and cl::sycl::handler::parallel_for().

◆ reduCGFuncImpl() [4/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG>
enable_if_t<!Reduction::has_fast_reduce && !Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  Redu,
typename Reduction::rw_accessor_type  Out 
)

Implements a command group function that enqueues a kernel that calls user's lambda function.

Parameters
KernelFuncand does one iteration of reduction of elements in each of work-groups. This version uses tree-reduction algorithm to reduce elements in each of work-groups. At the end of each work-group the partial sum is written to a global buffer.

Briefly: user's lambda, tree-reduction, CUSTOM types/ops.

Definition at line 1200 of file reduction.hpp.

References cl::sycl::nd_range< dimensions >::get_group_range(), cl::sycl::nd_range< dimensions >::get_local_range(), and cl::sycl::handler::parallel_for().

◆ reduCGFuncImpl() [5/8]

template<typename KernelName , bool Pow2WG, bool IsOneWG, typename KernelType , int Dims, typename... Reductions, size_t... Is>
void cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
std::tuple< Reductions... > &  ReduTuple,
std::index_sequence< Is... >  ReduIndices 
)

◆ reduCGFuncImpl() [6/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t<Reduction::has_fast_atomics> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const range< Dims > &  Range,
const nd_range< 1 > &  NDRange,
Reduction &  Redu 
)

◆ reduCGFuncImpl() [7/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t<!Reduction::has_fast_atomics && Reduction::has_fast_reduce> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const range< Dims > &  Range,
const nd_range< 1 > &  NDRange,
Reduction &  Redu 
)

◆ reduCGFuncImpl() [8/8]

template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t<!Reduction::has_fast_atomics && !Reduction::has_fast_reduce> cl::sycl::ext::oneapi::detail::reduCGFuncImpl ( handler CGH,
KernelType  KernelFunc,
const range< Dims > &  Range,
const nd_range< 1 > &  NDRange,
Reduction &  Redu 
)

◆ reduCGFuncImplAtomic64()

template<typename KernelName , typename KernelType , int Dims, class Reduction >
std::enable_if_t<Reduction::has_atomic_add_float64> cl::sycl::ext::oneapi::detail::reduCGFuncImplAtomic64 ( handler CGH,
KernelType  KernelFunc,
const nd_range< Dims > &  Range,
Reduction &  ,
typename Reduction::rw_accessor_type  Out 
)

◆ reduComputeWGSize()

size_t cl::sycl::ext::oneapi::detail::reduComputeWGSize ( size_t  NWorkItems,
size_t  MaxWGSize,
size_t &  NWorkGroups 
)

Definition at line 20 of file reduction.cpp.

Referenced by reduAuxCGFunc().

◆ reductionLoop()

template<typename KernelFunc , int Dims, typename ReducerT >
void cl::sycl::ext::oneapi::detail::reductionLoop ( const range< Dims > &  Range,
ReducerT &  Reducer,
const nd_item< 1 > &  NdId,
KernelFunc F 
)

Called in device code.

This function iterates through the index space Range using stride equal to the global range specified in NdId, which gives much better performance than using stride equal to 1. For each of the index the given F function/functor is called and the reduction value hold in Reducer is accumulated in those calls.

Definition at line 826 of file reduction.hpp.

References cl::sycl::nd_item< dimensions >::get_global_id(), cl::sycl::nd_item< dimensions >::get_global_range(), cl::sycl::detail::getDelinearizedId(), and cl::sycl::range< dimensions >::size().

Referenced by reduCGFuncImpl().

◆ reduGetMaxNumConcurrentWorkGroups() [1/2]

uint32_t cl::sycl::ext::oneapi::detail::reduGetMaxNumConcurrentWorkGroups ( std::shared_ptr< queue_impl Queue)

◆ reduGetMaxNumConcurrentWorkGroups() [2/2]

uint32_t cl::sycl::ext::oneapi::detail::reduGetMaxNumConcurrentWorkGroups ( std::shared_ptr< sycl::detail::queue_impl Queue)

Definition at line 54 of file reduction.cpp.

References cl::sycl::device::get_info(), and cl::sycl::device::is_gpu().

◆ reduGetMaxWGSize() [1/2]

size_t cl::sycl::ext::oneapi::detail::reduGetMaxWGSize ( std::shared_ptr< queue_impl Queue,
size_t  LocalMemBytesPerWorkItem 
)

◆ reduGetMaxWGSize() [2/2]

size_t cl::sycl::ext::oneapi::detail::reduGetMaxWGSize ( std::shared_ptr< sycl::detail::queue_impl Queue,
size_t  LocalMemBytesPerWorkItem 
)

Definition at line 68 of file reduction.cpp.

References cl::sycl::device::get_info().

◆ reduGetMemPerWorkItem()

template<typename... ReductionT, size_t... Is>
size_t cl::sycl::ext::oneapi::detail::reduGetMemPerWorkItem ( std::tuple< ReductionT... > &  ReduTuple,
std::index_sequence< Is... >   
)

Definition at line 1989 of file reduction.hpp.

References reduGetMemPerWorkItemHelper().

Referenced by cl::sycl::handler::parallel_for().

◆ reduGetMemPerWorkItemHelper() [1/2]

template<typename Reduction >
size_t cl::sycl::ext::oneapi::detail::reduGetMemPerWorkItemHelper ( Reduction &  )

Definition at line 1978 of file reduction.hpp.

◆ reduGetMemPerWorkItemHelper() [2/2]

template<typename Reduction , typename... RestT>
size_t cl::sycl::ext::oneapi::detail::reduGetMemPerWorkItemHelper ( Reduction &  ,
RestT...  Rest 
)

Definition at line 1983 of file reduction.hpp.

Referenced by reduGetMemPerWorkItem().

◆ reduSaveFinalResultToUserMem() [1/3]

template<typename KernelName , class Reduction >
std::enable_if_t<!Reduction::is_usm > cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMem ( handler CGH,
Reduction &  Redu 
)

Copies the final reduction result kept in read-write accessor to user's accessor.

This method is not called for user's read-write accessors requiring update-write to it.

Definition at line 1419 of file reduction.hpp.

References cl::sycl::handler::copy().

◆ reduSaveFinalResultToUserMem() [2/3]

template<typename KernelName , class Reduction >
std::enable_if_t< Reduction::is_usm > cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMem ( handler CGH,
Reduction &  Redu 
)

Copies the final reduction result kept in read-write accessor to user's USM memory.

Definition at line 1435 of file reduction.hpp.

References cl::sycl::handler::single_task().

◆ reduSaveFinalResultToUserMem() [3/3]

template<typename... Reduction, size_t... Is>
std::shared_ptr< event > cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMem ( std::shared_ptr< detail::queue_impl Queue,
bool  IsHost,
std::tuple< Reduction... > &  ReduTuple,
std::index_sequence< Is... >   
)

Creates additional kernels that copy the accumulated/final results from reductions accessors to either user's accessor or user's USM memory.

Returns the event to the last kernel copying data or nullptr if no additional kernels created.

Definition at line 1967 of file reduction.hpp.

References reduSaveFinalResultToUserMemHelper().

Referenced by cl::sycl::handler::parallel_for().

◆ reduSaveFinalResultToUserMemHelper() [1/3]

void cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMemHelper ( std::vector< event > &  ,
std::shared_ptr< detail::queue_impl ,
bool   
)
inline

Definition at line 1930 of file reduction.hpp.

◆ reduSaveFinalResultToUserMemHelper() [2/3]

template<typename Reduction , typename... RestT>
std::enable_if_t<Reduction::is_usm> cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMemHelper ( std::vector< event > &  Events,
std::shared_ptr< detail::queue_impl Queue,
bool  IsHost,
Reduction &  ,
RestT...  Rest 
)

◆ reduSaveFinalResultToUserMemHelper() [3/3]

template<typename Reduction , typename... RestT>
std::enable_if_t<!Reduction::is_usm > cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMemHelper ( std::vector< event > &  Events,
std::shared_ptr< detail::queue_impl Queue,
bool  IsHost,
Reduction &  Redu,
RestT...  Rest 
)

◆ tokenize()

std::vector<std::string> cl::sycl::ext::oneapi::detail::tokenize ( const std::string &  Filter,
const std::string &  Delim 
)

◆ tuple_select_elements()

template<typename TupleT , std::size_t... Is>
std::tuple< std::tuple_element_t< Is, TupleT >... > cl::sycl::ext::oneapi::detail::tuple_select_elements ( TupleT  Tuple,
std::index_sequence< Is... >   
)

Utility function: for the given tuple.

Parameters
Tuplethe function returns a new tuple consisting of only elements indexed by the index sequence.

Definition at line 1998 of file reduction.hpp.

Referenced by cl::sycl::handler::parallel_for().

◆ writeReduSumsToOutAccs()

template<bool Pow2WG, bool IsOneWG, typename... Reductions, typename... OutAccT, typename... LocalAccT, typename... BOPsT, typename... Ts, size_t... Is>
void cl::sycl::ext::oneapi::detail::writeReduSumsToOutAccs ( size_t  OutAccIndex,
size_t  WGSize,
std::tuple< Reductions... > *  ,
ReduTupleT< OutAccT... >  OutAccs,
ReduTupleT< LocalAccT... >  LocalAccs,
ReduTupleT< BOPsT... >  BOPs,
ReduTupleT< Ts... >  IdentityVals,
std::array< bool, sizeof...(Reductions)>  IsInitializeToIdentity,
std::index_sequence< Is... >   
)

Definition at line 1577 of file reduction.hpp.

References cl::sycl::detail::make_tuple(), and cl::sycl::detail::tie().