DPC++ Runtime
Runtime libraries for oneAPI DPC++
reduction.hpp File Reference
#include <sycl/access/access.hpp>
#include <sycl/accessor.hpp>
#include <sycl/aspects.hpp>
#include <sycl/atomic.hpp>
#include <sycl/atomic_ref.hpp>
#include <sycl/buffer.hpp>
#include <sycl/builtins.hpp>
#include <sycl/detail/export.hpp>
#include <sycl/detail/generic_type_traits.hpp>
#include <sycl/detail/impl_utils.hpp>
#include <sycl/detail/item_base.hpp>
#include <sycl/detail/reduction_forward.hpp>
#include <sycl/detail/tuple.hpp>
#include <sycl/device.hpp>
#include <sycl/event.hpp>
#include <sycl/exception.hpp>
#include <sycl/exception_list.hpp>
#include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
#include <sycl/group.hpp>
#include <sycl/group_algorithm.hpp>
#include <sycl/handler.hpp>
#include <sycl/id.hpp>
#include <sycl/kernel.hpp>
#include <sycl/known_identity.hpp>
#include <sycl/marray.hpp>
#include <sycl/memory_enums.hpp>
#include <sycl/multi_ptr.hpp>
#include <sycl/nd_item.hpp>
#include <sycl/nd_range.hpp>
#include <sycl/properties/accessor_properties.hpp>
#include <sycl/properties/reduction_properties.hpp>
#include <sycl/property_list.hpp>
#include <sycl/queue.hpp>
#include <sycl/range.hpp>
#include <sycl/sycl_span.hpp>
#include <sycl/usm.hpp>
#include <algorithm>
#include <array>
#include <assert.h>
#include <cstddef>
#include <memory>
#include <optional>
#include <stdint.h>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <variant>
Include dependency graph for reduction.hpp:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  sycl::_V1::detail::ReducerTraits< reducer< T, BinaryOperation, Dims, Extent, IdentityContainerT, View, Subst > >
 
class  sycl::_V1::detail::ReducerAccess< ReducerT >
 Helper class for accessing internal reducer member functions. More...
 
class  sycl::_V1::detail::combiner< Reducer >
 Use CRTP to avoid redefining shorthand operators in terms of combine. More...
 
class  sycl::_V1::detail::ReductionIdentityContainer< T, BinaryOperation, ExplicitIdentity, CondT >
 Templated class for common functionality of all reduction implementation classes. More...
 
class  sycl::_V1::detail::ReductionIdentityContainer< T, BinaryOperation, ExplicitIdentity, enable_if_t< IsKnownIdentityOp< T, BinaryOperation >::value > >
 
class  sycl::_V1::detail::ReductionIdentityContainer< T, BinaryOperation, true, enable_if_t<!IsKnownIdentityOp< T, BinaryOperation >::value > >
 
class  sycl::_V1::detail::ReductionIdentityContainer< T, BinaryOperation, false, std::enable_if_t<!IsKnownIdentityOp< T, BinaryOperation >::value > >
 
class  sycl::_V1::detail::ReducerElement< T, BinaryOperation, IsOptional >
 
struct  sycl::_V1::is_device_copyable< detail::ReducerElement< T, BinaryOperation, IsOptional > >
 
class  sycl::_V1::detail::reducer_common< T, BinaryOperation, Dims >
 
struct  sycl::_V1::detail::ReducerToken< BinaryOperation, IdentityContainerT >
 
class  sycl::_V1::reducer< T, BinaryOperation, Dims, Extent, IdentityContainerT, View, std::enable_if_t< Dims==0 &&Extent==1 &&View==false &&!detail::IsKnownIdentityOp< T, BinaryOperation >::value > >
 Specialization of the generic class 'reducer'. More...
 
class  sycl::_V1::reducer< T, BinaryOperation, Dims, Extent, IdentityContainerT, View, std::enable_if_t< Dims==0 &&Extent==1 &&View==false &&detail::IsKnownIdentityOp< T, BinaryOperation >::value > >
 Specialization of the generic class 'reducer'. More...
 
class  sycl::_V1::reducer< T, BinaryOperation, Dims, Extent, IdentityContainerT, View, std::enable_if_t< Dims==0 &&View==true > >
 Component of 'reducer' class for array reductions, representing a single element of the span (as returned by the subscript operator). More...
 
class  sycl::_V1::reducer< T, BinaryOperation, Dims, Extent, IdentityContainerT, View, std::enable_if_t< Dims==1 &&View==false &&!detail::IsKnownIdentityOp< T, BinaryOperation >::value > >
 Specialization of 'reducer' class for array reductions exposing the subscript operator. More...
 
class  sycl::_V1::reducer< T, BinaryOperation, Dims, Extent, IdentityContainerT, View, std::enable_if_t< Dims==1 &&View==false &&detail::IsKnownIdentityOp< T, BinaryOperation >::value > >
 Specialization of 'reducer' class for array reductions accepting a span in cases where the identity value is known. More...
 
struct  sycl::_V1::detail::data_dim_t< T >
 
struct  sycl::_V1::detail::data_dim_t< accessor< T, AccessorDims, Mode, access::target::device, IsPH, PropList > >
 
struct  sycl::_V1::detail::get_red_t< T * >
 
struct  sycl::_V1::detail::get_red_t< buffer< T, Dims, AllocatorT > >
 
class  sycl::_V1::detail::reduction_impl_algo< T, BinaryOperation, Dims, Extent, ExplicitIdentity, RedOutVar >
 
class  sycl::_V1::detail::reduction_impl< T, BinaryOperation, Dims, Extent, ExplicitIdentity, RedOutVar >
 This class encapsulates the reduction variable/accessor, the reduction operator and an optional operator identity. More...
 
struct  sycl::_V1::detail::KernelOneWGTag
 
struct  sycl::_V1::detail::KernelMultipleWGTag
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::local_atomic_and_atomic_cross_wg >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::group_reduce_and_last_wg_detection >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::range_basic >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::group_reduce_and_atomic_cross_wg >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::local_mem_tree_and_atomic_cross_wg >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::group_reduce_and_multiple_kernels >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::basic >
 
struct  sycl::_V1::detail::IsNonUsmReductionPredicate
 
struct  sycl::_V1::detail::IsNonUsmReductionPredicate::Func< T >
 
struct  sycl::_V1::detail::EmptyReductionPredicate
 
struct  sycl::_V1::detail::EmptyReductionPredicate::Func< T >
 
struct  sycl::_V1::detail::FilterElement< Cond, I >
 
struct  sycl::_V1::detail::IsScalarReduction
 
struct  sycl::_V1::detail::IsScalarReduction::Func< Reduction >
 
struct  sycl::_V1::detail::IsArrayReduction
 
struct  sycl::_V1::detail::IsArrayReduction::Func< Reduction >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::multi >
 
struct  sycl::_V1::detail::NDRangeReduction< reduction::strategy::auto_select >
 

Namespaces

 sycl
 
 sycl::_V1
 
 sycl::_V1::detail
 
 sycl::_V1::detail::reduction
 
 sycl::_V1::detail::reduction::main_krn
 
 sycl::_V1::detail::reduction::aux_krn
 

Typedefs

template<typename T , class BinaryOperation >
using sycl::_V1::detail::IsReduOptForFastAtomicFetch = std::bool_constant<((is_sgenfloat_v< T > &&sizeof(T)==4)||is_sgeninteger_v< T >)&&IsValidAtomicType< T >::value &&(IsPlus< T, BinaryOperation >::value||IsMinimum< T, BinaryOperation >::value||IsMaximum< T, BinaryOperation >::value||IsBitOR< T, BinaryOperation >::value||IsBitXOR< T, BinaryOperation >::value||IsBitAND< T, BinaryOperation >::value)>
 
template<typename T , class BinaryOperation >
using sycl::_V1::detail::IsReduOptForAtomic64Op = std::bool_constant<(IsPlus< T, BinaryOperation >::value||IsMinimum< T, BinaryOperation >::value||IsMaximum< T, BinaryOperation >::value) &&is_sgenfloat_v< T > &&sizeof(T)==8 >
 
template<typename T , class BinaryOperation >
using sycl::_V1::detail::IsReduOptForFastReduce = std::bool_constant<((is_sgeninteger_v< T > &&(sizeof(T)==4||sizeof(T)==8))||is_sgenfloat_v< T >)&&(IsPlus< T, BinaryOperation >::value||IsMinimum< T, BinaryOperation >::value||IsMaximum< T, BinaryOperation >::value)>
 
template<typename... Ts>
using sycl::_V1::detail::ReduTupleT = sycl::detail::tuple< Ts... >
 
template<class KernelName >
using sycl::_V1::detail::__sycl_init_mem_for = std::conditional_t< std::is_same_v< KernelName, auto_name >, auto_name, reduction::InitMemKrn< KernelName > >
 A helper to pass undefined (sycl::detail::auto_name) names unmodified. More...
 
template<template< typename, reduction::strategy, typename... > class MainOrAux, class KernelName , reduction::strategy Strategy, class... Ts>
using sycl::_V1::detail::__sycl_reduction_kernel = std::conditional_t< std::is_same_v< KernelName, auto_name >, auto_name, MainOrAux< KernelName, Strategy, Ts... > >
 A helper to pass undefined (sycl::detail::auto_name) names unmodified. More...
 

Enumerations

enum class  sycl::_V1::detail::WorkSizeGuarantees { sycl::_V1::detail::None , sycl::_V1::detail::Equal , sycl::_V1::detail::LessOrEqual }
 

Functions

template<typename... Ts>
ReduTupleT< Ts... > sycl::_V1::detail::makeReduTupleT (Ts... Elements)
 
size_t sycl::_V1::detail::reduGetMaxWGSize (std::shared_ptr< queue_impl > Queue, size_t LocalMemBytesPerWorkItem)
 
size_t sycl::_V1::detail::reduComputeWGSize (size_t NWorkItems, size_t MaxWGSize, size_t &NWorkGroups)
 
size_t sycl::_V1::detail::reduGetPreferredWGSize (std::shared_ptr< queue_impl > &Queue, size_t LocalMemBytesPerWorkItem)
 
template<typename ReducerT >
auto sycl::_V1::detail::getReducerAccess (ReducerT &Reducer)
 
template<class BinaryOp , int Dims, size_t Extent, bool ExplicitIdentity, typename RedOutVar , typename... RestTy>
auto sycl::_V1::detail::make_reduction (RedOutVar RedVar, RestTy &&...Rest)
 
void sycl::_V1::detail::reduction::finalizeHandler (handler &CGH)
 
template<class FunctorTy >
void sycl::_V1::detail::reduction::withAuxHandler (handler &CGH, FunctorTy Func)
 
template<typename KernelName , class Reduction >
void sycl::_V1::detail::reduSaveFinalResultToUserMem (handler &CGH, Reduction &Redu)
 Copies the final reduction result kept in read-write accessor to user's USM memory. More...
 
size_t sycl::_V1::detail::GreatestPowerOfTwo (size_t N)
 Computes the greatest power-of-two less than or equal to N. More...
 
template<typename FuncTy >
void sycl::_V1::detail::doTreeReductionHelper (size_t WorkSize, size_t LID, FuncTy Func)
 
template<WorkSizeGuarantees WSGuarantee, int Dim, typename LocalRedsTy , typename BinOpTy , typename AccessFuncTy >
void sycl::_V1::detail::doTreeReduction (size_t WorkSize, nd_item< Dim > NDIt, LocalRedsTy &LocalReds, BinOpTy &BOp, AccessFuncTy AccessFunc)
 
template<typename... LocalAccT, typename... BOPsT, size_t... Is>
void sycl::_V1::detail::doTreeReductionOnTuple (size_t WorkSize, size_t LID, ReduTupleT< LocalAccT... > &LocalAccs, ReduTupleT< BOPsT... > &BOPs, std::index_sequence< Is... >)
 
template<bool IsOneWG, typename... Reductions, size_t... Is>
auto sycl::_V1::detail::createReduOutAccs (size_t NWorkGroups, handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 For the given 'Reductions' types pack and indices enumerating them this function either creates new temporary accessors for partial sums (if IsOneWG is false) or returns user's accessor/USM-pointer if (IsOneWG is true). More...
 
template<typename OutAccT , typename LocalAccT , typename BOPT , typename IdentityContainerT >
auto sycl::_V1::detail::getLastCombine (OutAccT OutAcc, LocalAccT LocalAcc, BOPT BOP, IdentityContainerT IdentityContainer, bool IsInitializeToIdentity)
 
template<bool IsOneWG, typename... Reductions, typename... OutAccT, typename... LocalAccT, typename... BOPsT, typename... Ts, size_t... Is>
void sycl::_V1::detail::writeReduSumsToOutAccs (size_t OutAccIndex, ReduTupleT< OutAccT... > OutAccs, ReduTupleT< LocalAccT... > LocalAccs, ReduTupleT< BOPsT... > BOPs, ReduTupleT< Ts... > IdentityVals, std::array< bool, sizeof...(Reductions)> IsInitializeToIdentity, std::index_sequence< Is... >)
 
constexpr std::index_sequence sycl::_V1::detail::concat_sequences (std::index_sequence<>)
 
template<size_t I>
constexpr std::index_sequence< I > sycl::_V1::detail::concat_sequences (std::index_sequence< I >)
 
template<size_t... Is, size_t... Js>
constexpr std::index_sequence< Is..., Js... > sycl::_V1::detail::concat_sequences (std::index_sequence< Is... >, std::index_sequence< Js... >)
 
template<size_t... Is, size_t... Js, class... Rs>
constexpr auto sycl::_V1::detail::concat_sequences (std::index_sequence< Is... >, std::index_sequence< Js... >, Rs...)
 
template<typename... T, typename FunctorT , size_t... Is, std::enable_if_t<(sizeof...(Is) > 0), int > Z = 0>
constexpr auto sycl::_V1::detail::filterSequenceHelper (FunctorT, std::index_sequence< Is... >)
 For each index 'I' from the given indices pack 'Is' this function initially creates a number of short index_sequences, where each of such short index sequences is either empty (if the given Functor returns false for the type T[I]) or 1 element 'I' (otherwise). More...
 
template<typename... T, typename FunctorT , size_t... Is>
constexpr auto sycl::_V1::detail::filterSequence (FunctorT F, std::index_sequence< Is... > Indices)
 For each index 'I' from the given indices pack 'Is' this function returns an index sequence consisting of only those 'I's for which the 'FunctorT' applied to 'T[I]' returns true. More...
 
template<typename ElementType , typename BOPT >
constexpr auto sycl::_V1::detail::makeAdjustedBOP (BOPT &BOP)
 
template<typename... Reductions, typename... BOPsT, size_t... Is>
constexpr auto sycl::_V1::detail::makeAdjustedBOPs (ReduTupleT< BOPsT... > &BOPsTuple, std::index_sequence< Is... >)
 
template<typename... Reductions, typename... BOPsT>
constexpr auto sycl::_V1::detail::makeAdjustedBOPs (ReduTupleT< BOPsT... > &BOPsTuple)
 
template<bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... OutAccT, typename... ReducerT, typename... Ts, typename... BOPsT, size_t... Is>
void sycl::_V1::detail::reduCGFuncImplScalar (nd_item< Dims > NDIt, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, std::tuple< ReducerT... > &ReducersTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... > ReduIndices)
 All scalar reductions are processed together; there is one loop of log2(N) steps, and each reduction uses its own storage. More...
 
template<bool IsOneWG, typename Reduction , int Dims, typename LocalAccT , typename OutAccT , typename ReducerT , typename BOPT >
void sycl::_V1::detail::reduCGFuncImplArrayHelper (nd_item< Dims > NDIt, LocalAccT LocalReds, OutAccT Out, ReducerT &Reducer, BOPT BOp, bool IsInitializeToIdentity)
 Each array reduction is processed separately. More...
 
template<bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... OutAccT, typename... ReducerT, typename... BOPsT, size_t... Is>
void sycl::_V1::detail::reduCGFuncImplArray (nd_item< Dims > NDIt, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, std::tuple< ReducerT... > &ReducersTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... >)
 
template<typename KernelName , typename KernelType , int Dims, typename PropertiesT , typename... Reductions, size_t... Is>
void sycl::_V1::detail::reduCGFuncMulti (handler &CGH, KernelType KernelFunc, const nd_range< Dims > &Range, PropertiesT Properties, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... > ReduIndices)
 
template<typename... Reductions, size_t... Is>
void sycl::_V1::detail::associateReduAccsWithHandler (handler &CGH, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... >)
 
template<bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... InAccT, typename... OutAccT, typename... Ts, typename... BOPsT, size_t... Is>
void sycl::_V1::detail::reduAuxCGFuncImplScalar (nd_item< Dims > NDIt, size_t LID, size_t GID, size_t RemainingWorkSize, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< InAccT... > InAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... > ReduIndices)
 All scalar reductions are processed together; there is one loop of log2(N) steps, and each reduction uses its own storage. More...
 
template<bool IsOneWG, typename Reduction , int Dims, typename LocalAccT , typename InAccT , typename OutAccT , typename T , typename BOPT >
void sycl::_V1::detail::reduAuxCGFuncImplArrayHelper (nd_item< Dims > NDIt, size_t LID, size_t GID, size_t RemainingWorkSize, LocalAccT LocalReds, InAccT In, OutAccT Out, T IdentityContainer, BOPT BOp, bool IsInitializeToIdentity)
 
template<bool IsOneWG, typename... Reductions, int Dims, typename... LocalAccT, typename... InAccT, typename... OutAccT, typename... Ts, typename... BOPsT, size_t... Is>
void sycl::_V1::detail::reduAuxCGFuncImplArray (nd_item< Dims > NDIt, size_t LID, size_t GID, size_t RemainingWorkSize, ReduTupleT< LocalAccT... > LocalAccsTuple, ReduTupleT< InAccT... > InAccsTuple, ReduTupleT< OutAccT... > OutAccsTuple, ReduTupleT< Ts... > IdentitiesTuple, ReduTupleT< BOPsT... > BOPsTuple, std::array< bool, sizeof...(Reductions)> InitToIdentityProps, std::index_sequence< Is... >)
 
template<typename KernelName , typename KernelType , typename... Reductions, size_t... Is>
size_t sycl::_V1::detail::reduAuxCGFunc (handler &CGH, size_t NWorkItems, size_t MaxWGSize, std::tuple< Reductions... > &ReduTuple, std::index_sequence< Is... > ReduIndices)
 
template<typename Reduction >
size_t sycl::_V1::detail::reduGetMemPerWorkItemHelper (Reduction &)
 
template<typename Reduction , typename... RestT>
size_t sycl::_V1::detail::reduGetMemPerWorkItemHelper (Reduction &, RestT... Rest)
 
template<typename... ReductionT, size_t... Is>
size_t sycl::_V1::detail::reduGetMemPerWorkItem (std::tuple< ReductionT... > &ReduTuple, std::index_sequence< Is... >)
 
template<typename TupleT , std::size_t... Is>
std::tuple< std::tuple_element_t< Is, TupleT >... > sycl::_V1::detail::tuple_select_elements (TupleT Tuple, std::index_sequence< Is... >)
 Utility function: for the given tuple. More...
 
template<typename KernelName , reduction::strategy Strategy = reduction::strategy::auto_select, int Dims, typename PropertiesT , typename... RestT>
void sycl::_V1::detail::reduction_parallel_for (handler &CGH, nd_range< Dims > NDRange, PropertiesT Properties, RestT... Rest)
 
uint32_t sycl::_V1::detail::reduGetMaxNumConcurrentWorkGroups (std::shared_ptr< queue_impl > Queue)
 
template<typename KernelName , reduction::strategy Strategy = reduction::strategy::auto_select, int Dims, typename PropertiesT , typename... RestT>
void sycl::_V1::detail::reduction_parallel_for (handler &CGH, range< Dims > NDRange, PropertiesT Properties, RestT... Rest)
 
template<typename T , typename AllocatorT , typename BinaryOperation >
auto sycl::_V1::reduction (buffer< T, 1, AllocatorT > Var, handler &CGH, BinaryOperation Combiner, const property_list &PropList={})
 Constructs a reduction object using the given buffer Var, handler CGH, reduction operation Combiner, and optional reduction properties. More...
 
template<typename T , typename BinaryOperation >
auto sycl::_V1::reduction (T *Var, BinaryOperation Combiner, const property_list &PropList={})
 Constructs a reduction object using the reduction variable referenced by the given USM pointer Var, handler CGH, reduction operation Combiner, and optional reduction properties. More...
 
template<typename T , typename AllocatorT , typename BinaryOperation >
auto sycl::_V1::reduction (buffer< T, 1, AllocatorT > Var, handler &CGH, const T &Identity, BinaryOperation Combiner, const property_list &PropList={})
 Constructs a reduction object using the given buffer Var, handler CGH, reduction identity value Identity, reduction operation Combiner, and optional reduction properties. More...
 
template<typename T , typename BinaryOperation >
auto sycl::_V1::reduction (T *Var, const T &Identity, BinaryOperation Combiner, const property_list &PropList={})
 Constructs a reduction object using the reduction variable referenced by the given USM pointer Var, reduction identity value Identity, binary operation Combiner, and optional reduction properties. More...
 
template<typename T , size_t Extent, typename BinaryOperation , typename = std::enable_if_t<Extent != dynamic_extent>>
auto sycl::_V1::reduction (span< T, Extent > Span, BinaryOperation Combiner, const property_list &PropList={})
 Constructs a reduction object using the reduction variable referenced by the given sycl::span Span, reduction operation Combiner, and optional reduction properties. More...
 
template<typename T , size_t Extent, typename BinaryOperation , typename = std::enable_if_t<Extent != dynamic_extent>>
auto sycl::_V1::reduction (span< T, Extent > Span, const T &Identity, BinaryOperation Combiner, const property_list &PropList={})
 Constructs a reduction object using the reduction variable referenced by the given sycl::span Span, reduction identity value Identity, reduction operation Combiner, and optional reduction properties. More...