|
template<typename... Ts> |
ReduTupleT< Ts... > | cl::sycl::ext::oneapi::detail::makeReduTupleT (Ts... Elements) |
|
size_t | cl::sycl::ext::oneapi::detail::reduGetMaxWGSize (std::shared_ptr< queue_impl > Queue, size_t LocalMemBytesPerWorkItem) |
|
size_t | cl::sycl::ext::oneapi::detail::reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize, size_t &NWorkGroups) |
|
template<typename KernelFunc , int Dims, typename ReducerT > |
void | cl::sycl::ext::oneapi::detail::reductionLoop (const range< Dims > &Range, ReducerT &Reducer, const nd_item< 1 > &NdId, KernelFunc &F) |
| Called in device code. More...
|
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
std::enable_if_t< Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, const nd_range< 1 > &NDRange, Reduction &Redu) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
std::enable_if_t<!Reduction::has_fast_atomics &&Reduction::has_fast_reduce > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, const nd_range< 1 > &NDRange, Reduction &Redu) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
std::enable_if_t<!Reduction::has_fast_atomics &&!Reduction::has_fast_reduce > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, const nd_range< 1 > &NDRange, Reduction &Redu) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
void | cl::sycl::ext::oneapi::detail::reduCGFunc (handler &CGH, KernelType KernelFunc, const range< Dims > &Range, size_t MaxWGSize, uint32_t NumConcurrentWorkGroups, Reduction &Redu) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG> |
enable_if_t< Reduction::has_fast_reduce &&Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &, typename Reduction::rw_accessor_type Out) |
| Implements a command group function that enqueues a kernel that calls user's lambda function KernelFunc and also does one iteration of reduction of elements computed in user's lambda function. More...
|
|
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG> |
enable_if_t<!Reduction::has_fast_reduce &&Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &, typename Reduction::rw_accessor_type Out) |
| Implements a command group function that enqueues a kernel that calls user's lambda function KernelFunc and also does one iteration of reduction of elements computed in user's lambda function. More...
|
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
enable_if_t< Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFunc (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG> |
enable_if_t< Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu, typename Reduction::rw_accessor_type Out) |
| Implements a command group function that enqueues a kernel that calls user's lambda function and does one iteration of reduction of elements in each of work-groups. More...
|
|
template<typename KernelName , typename KernelType , int Dims, class Reduction , bool IsPow2WG> |
enable_if_t<!Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu, typename Reduction::rw_accessor_type Out) |
| Implements a command group function that enqueues a kernel that calls user's lambda function. More...
|
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
enable_if_t<!Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduCGFunc (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu) |
|
template<typename KernelName , typename KernelType , bool UniformWG, class Reduction , typename InputT , typename OutputT > |
enable_if_t< Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduAuxCGFuncImpl (handler &CGH, size_t NWorkItems, size_t NWorkGroups, size_t WGSize, Reduction &Redu, InputT In, OutputT Out) |
| Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups. More...
|
|
template<typename KernelName , typename KernelType , bool UniformPow2WG, class Reduction , typename InputT , typename OutputT > |
enable_if_t<!Reduction::has_fast_reduce &&!Reduction::has_fast_atomics > | cl::sycl::ext::oneapi::detail::reduAuxCGFuncImpl (handler &CGH, size_t NWorkItems, size_t NWorkGroups, size_t WGSize, Reduction &Redu, InputT In, OutputT Out) |
| Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups. More...
|
|
template<typename KernelName , typename KernelType , class Reduction > |
enable_if_t<!Reduction::has_fast_atomics, size_t > | cl::sycl::ext::oneapi::detail::reduAuxCGFunc (handler &CGH, size_t NWorkItems, size_t MaxWGSize, Reduction &Redu) |
| Implements a command group function that enqueues a kernel that does one iteration of reduction of elements in each of work-groups. More...
|
|
template<typename KernelName , class Reduction > |
std::enable_if_t<!Reduction::is_usm > | cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMem (handler &CGH, Reduction &Redu) |
| Copies the final reduction result kept in read-write accessor to user's accessor. More...
|
|
template<typename KernelName , class Reduction > |
std::enable_if_t< Reduction::is_usm > | cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMem (handler &CGH, Reduction &Redu) |
| Copies the final reduction result kept in read-write accessor to user's USM memory. More...
|
|
template<typename... Reductions, size_t... Is> |
auto | cl::sycl::ext::oneapi::detail::createReduLocalAccs (size_t Size, handler &CGH, std::index_sequence< Is... >) |
| For the given 'Reductions' types pack and indices enumerating only the reductions for which a local accessors are needed, this function creates those local accessors and returns a tuple consisting of them. More...
|
|
template<bool IsOneWG, typename... Reductions, size_t... Is> |
auto | cl::sycl::ext::oneapi::detail::createReduOutAccs (size_t NWorkGroups, handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
| For the given 'Reductions' types pack and indices enumerating them this function either creates new temporary accessors for partial sums (if IsOneWG is false) or returns user's accessor/USM-pointer if (IsOneWG is true). More...
|
|
template<typename... Reductions, size_t... Is> |
auto | cl::sycl::ext::oneapi::detail::getReadAccsToPreviousPartialReds (handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
| For the given 'Reductions' types pack and indices enumerating them this function returns accessors to buffers holding partial sums generated in the previous kernel invocation. More...
|
|
template<typename... Reductions, size_t... Is> |
ReduTupleT< typename Reductions::result_type... > | cl::sycl::ext::oneapi::detail::getReduIdentities (std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
|
template<typename... Reductions, size_t... Is> |
ReduTupleT< typename Reductions::binary_operation... > | cl::sycl::ext::oneapi::detail::getReduBOPs (std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
|
template<typename... Reductions, size_t... Is> |
std::array< bool, sizeof...(Reductions)> | cl::sycl::ext::oneapi::detail::getInitToIdentityProperties (std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
|
template<typename... Reductions, size_t... Is> |
std::tuple< typename Reductions::reducer_type... > | cl::sycl::ext::oneapi::detail::createReducers (ReduTupleT< typename Reductions::result_type... > Identities, ReduTupleT< typename Reductions::binary_operation... > BOPsTuple, std::index_sequence< Is... >) |
|
template<typename KernelType , int Dims, typename... ReducerT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::callReduUserKernelFunc (KernelType KernelFunc, nd_item< Dims > NDIt, std::tuple< ReducerT... > &Reducers, std::index_sequence< Is... >) |
|
template<bool Pow2WG, typename... LocalAccT, typename... ReducerT, typename... ResultT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::initReduLocalAccs (size_t LID, size_t WGSize, ReduTupleT< LocalAccT... > LocalAccs, const std::tuple< ReducerT... > &Reducers, ReduTupleT< ResultT... > Identities, std::index_sequence< Is... >) |
|
template<bool UniformPow2WG, typename... LocalAccT, typename... InputAccT, typename... ResultT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::initReduLocalAccs (size_t LID, size_t GID, size_t NWorkItems, size_t WGSize, ReduTupleT< InputAccT... > LocalAccs, ReduTupleT< LocalAccT... > InputAccs, ReduTupleT< ResultT... > Identities, std::index_sequence< Is... >) |
|
template<typename... LocalAccT, typename... BOPsT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduceReduLocalAccs (size_t IndexA, size_t IndexB, ReduTupleT< LocalAccT... > LocalAccs, ReduTupleT< BOPsT... > BOPs, std::index_sequence< Is... >) |
|
template<bool Pow2WG, bool IsOneWG, typename... Reductions, typename... OutAccT, typename... LocalAccT, typename... BOPsT, typename... Ts, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::writeReduSumsToOutAccs (size_t OutAccIndex, size_t WGSize, std::tuple< Reductions... > *, ReduTupleT< OutAccT... > OutAccs, ReduTupleT< LocalAccT... > LocalAccs, ReduTupleT< BOPsT... > BOPs, ReduTupleT< Ts... > IdentityVals, std::array< bool, sizeof...(Reductions)> IsInitializeToIdentity, std::index_sequence< Is... >) |
|
constexpr std::index_sequence | cl::sycl::ext::oneapi::detail::concat_sequences (std::index_sequence<>) |
|
template<size_t I> |
constexpr std::index_sequence< I > | cl::sycl::ext::oneapi::detail::concat_sequences (std::index_sequence< I >) |
|
template<size_t... Is, size_t... Js> |
constexpr std::index_sequence< Is..., Js... > | cl::sycl::ext::oneapi::detail::concat_sequences (std::index_sequence< Is... >, std::index_sequence< Js... >) |
|
template<size_t... Is, size_t... Js, class... Rs> |
constexpr auto | cl::sycl::ext::oneapi::detail::concat_sequences (std::index_sequence< Is... >, std::index_sequence< Js... >, Rs...) |
|
template<typename... T, typename FunctorT , size_t... Is, std::enable_if_t<(sizeof...(Is) > 0), int > Z = 0> |
constexpr auto | cl::sycl::ext::oneapi::detail::filterSequenceHelper (FunctorT, std::index_sequence< Is... >) |
| For each index 'I' from the given indices pack 'Is' this function initially creates a number of short index_sequences, where each of such short index sequences is either empty (if the given Functor returns false for the type T[I]) or 1 element 'I' (otherwise). More...
|
|
template<typename... T, typename FunctorT , size_t... Is> |
constexpr auto | cl::sycl::ext::oneapi::detail::filterSequence (FunctorT F, std::index_sequence< Is... > Indices) |
| For each index 'I' from the given indices pack 'Is' this function returns an index sequence consisting of only those 'I's for which the 'FunctorT' applied to 'T[I]' returns true. More...
|
|
template<bool Pow2WG, bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... OutAccT, typename... ReducerT, typename... Ts, typename... BOPsT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduCGFuncImplScalar (nd_item< Dims > NDIt, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, std::tuple< ReducerT... > &ReducersTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... > ReduIndices) |
| All scalar reductions are processed together; there is one loop of log2(N) steps, and each reduction uses its own storage. More...
|
|
template<bool Pow2WG, bool IsOneWG, typename Reduction , int Dims, typename LocalAccT , typename OutAccT , typename ReducerT , typename T , typename BOPT > |
void | cl::sycl::ext::oneapi::detail::reduCGFuncImplArrayHelper (nd_item< Dims > NDIt, LocalAccT LocalReds, OutAccT Out, ReducerT &Reducer, T Identity, BOPT BOp, bool IsInitializeToIdentity) |
| Each array reduction is processed separately. More...
|
|
template<bool Pow2WG, bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... OutAccT, typename... ReducerT, typename... Ts, typename... BOPsT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduCGFuncImplArray (nd_item< Dims > NDIt, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, std::tuple< ReducerT... > &ReducersTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... >) |
|
template<typename KernelName , bool Pow2WG, bool IsOneWG, typename KernelType , int Dims, typename... Reductions, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduCGFuncImpl (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... > ReduIndices) |
|
template<typename KernelName , typename KernelType , int Dims, typename... Reductions, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduCGFunc (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
std::enable_if_t< Reduction::has_atomic_add_float64 > | cl::sycl::ext::oneapi::detail::reduCGFuncImplAtomic64 (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &, typename Reduction::rw_accessor_type Out) |
|
template<typename KernelName , typename KernelType , int Dims, class Reduction > |
enable_if_t< Reduction::has_atomic_add_float64 > | cl::sycl::ext::oneapi::detail::reduCGFuncAtomic64 (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, Reduction &Redu) |
|
void | cl::sycl::ext::oneapi::detail::associateReduAccsWithHandlerHelper (handler &) |
|
template<typename ReductionT > |
void | cl::sycl::ext::oneapi::detail::associateReduAccsWithHandlerHelper (handler &CGH, ReductionT &Redu) |
|
template<typename ReductionT , typename... RestT, enable_if_t<(sizeof...(RestT) > 0), int > Z = 0> |
void | cl::sycl::ext::oneapi::detail::associateReduAccsWithHandlerHelper (handler &CGH, ReductionT &Redu, RestT &... Rest) |
|
template<typename... Reductions, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::associateReduAccsWithHandler (handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
|
template<bool UniformPow2WG, bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... InAccT, typename... OutAccT, typename... Ts, typename... BOPsT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduAuxCGFuncImplScalar (nd_item< Dims > NDIt, size_t LID, size_t GID, size_t NWorkItems, size_t WGSize, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< InAccT... > InAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... > ReduIndices) |
| All scalar reductions are processed together; there is one loop of log2(N) steps, and each reduction uses its own storage. More...
|
|
template<bool UniformPow2WG, bool IsOneWG, typename Reduction , int Dims, typename LocalAccT , typename InAccT , typename OutAccT , typename T , typename BOPT > |
void | cl::sycl::ext::oneapi::detail::reduAuxCGFuncImplArrayHelper (nd_item< Dims > NDIt, size_t LID, size_t GID, size_t NWorkItems, size_t WGSize, LocalAccT LocalReds, InAccT In, OutAccT Out, T Identity, BOPT BOp, bool IsInitializeToIdentity) |
|
template<bool UniformPow2WG, bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... InAccT, typename... OutAccT, typename... Ts, typename... BOPsT, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduAuxCGFuncImplArray (nd_item< Dims > NDIt, size_t LID, size_t GID, size_t NWorkItems, size_t WGSize, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< InAccT... > InAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... >) |
|
template<typename KernelName , typename KernelType , bool UniformPow2WG, bool IsOneWG, typename... Reductions, size_t... Is> |
void | cl::sycl::ext::oneapi::detail::reduAuxCGFuncImpl (handler &CGH, size_t NWorkItems, size_t NWorkGroups, size_t WGSize, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... > ReduIndices) |
|
template<typename KernelName , typename KernelType , typename... Reductions, size_t... Is> |
size_t | cl::sycl::ext::oneapi::detail::reduAuxCGFunc (handler &CGH, size_t NWorkItems, size_t MaxWGSize, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >) |
|
void | cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMemHelper (std::vector< event > &, std::shared_ptr< detail::queue_impl >, bool) |
|
template<typename Reduction , typename... RestT> |
std::enable_if_t< Reduction::is_usm > | cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMemHelper (std::vector< event > &Events, std::shared_ptr< detail::queue_impl > Queue, bool IsHost, Reduction &, RestT... Rest) |
|
template<typename Reduction , typename... RestT> |
std::enable_if_t<!Reduction::is_usm > | cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMemHelper (std::vector< event > &Events, std::shared_ptr< detail::queue_impl > Queue, bool IsHost, Reduction &Redu, RestT... Rest) |
|
template<typename... Reduction, size_t... Is> |
std::shared_ptr< event > | cl::sycl::ext::oneapi::detail::reduSaveFinalResultToUserMem (std::shared_ptr< detail::queue_impl > Queue, bool IsHost, std::tuple< Reduction... > &ReduTuple, std::index_sequence< Is... >) |
| Creates additional kernels that copy the accumulated/final results from reductions accessors to either user's accessor or user's USM memory. More...
|
|
template<typename Reduction > |
size_t | cl::sycl::ext::oneapi::detail::reduGetMemPerWorkItemHelper (Reduction &) |
|
template<typename Reduction , typename... RestT> |
size_t | cl::sycl::ext::oneapi::detail::reduGetMemPerWorkItemHelper (Reduction &, RestT... Rest) |
|
template<typename... ReductionT, size_t... Is> |
size_t | cl::sycl::ext::oneapi::detail::reduGetMemPerWorkItem (std::tuple< ReductionT... > &ReduTuple, std::index_sequence< Is... >) |
|
template<typename TupleT , std::size_t... Is> |
std::tuple< std::tuple_element_t< Is, TupleT >... > | cl::sycl::ext::oneapi::detail::tuple_select_elements (TupleT Tuple, std::index_sequence< Is... >) |
| Utility function: for the given tuple. More...
|
|
template<typename T , class BinaryOperation , int Dims, access::mode AccMode, access::placeholder IsPH> |
detail::reduction_impl< T, BinaryOperation, 0, 1, detail::default_reduction_algorithm< false, IsPH, Dims > > | cl::sycl::ext::oneapi::reduction (accessor< T, Dims, AccMode, access::target::device, IsPH > &Acc, const T &Identity, BinaryOperation BOp) |
| Creates and returns an object implementing the reduction functionality. More...
|
|
template<typename T , class BinaryOperation , int Dims, access::mode AccMode, access::placeholder IsPH> |
std::enable_if_t< detail::IsKnownIdentityOp< T, BinaryOperation >::value, detail::reduction_impl< T, BinaryOperation, 0, 1, detail::default_reduction_algorithm< false, IsPH, Dims > > > | cl::sycl::ext::oneapi::reduction (accessor< T, Dims, AccMode, access::target::device, IsPH > &Acc, BinaryOperation) |
| Creates and returns an object implementing the reduction functionality. More...
|
|
template<typename T , class BinaryOperation > |
detail::reduction_impl< T, BinaryOperation, 0, 1, detail::default_reduction_algorithm< true, access::placeholder::false_t, 1 > > | cl::sycl::ext::oneapi::reduction (T *VarPtr, const T &Identity, BinaryOperation BOp) |
| Creates and returns an object implementing the reduction functionality. More...
|
|
template<typename T , class BinaryOperation > |
std::enable_if_t< detail::IsKnownIdentityOp< T, BinaryOperation >::value, detail::reduction_impl< T, BinaryOperation, 0, 1, detail::default_reduction_algorithm< true, access::placeholder::false_t, 1 > > > | cl::sycl::ext::oneapi::reduction (T *VarPtr, BinaryOperation) |
| Creates and returns an object implementing the reduction functionality. More...
|
|