11 #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
21 #ifdef __SYCL_DEVICE_ONLY__
31 #include <system_error>
32 #include <type_traits>
35 inline namespace _V1 {
36 namespace ext::oneapi::experimental {
45 : g(g_), scratch(scratch_) {}
57 template <
size_t Extent>
59 Compare comp_ = Compare())
60 : comp(comp_), scratch(scratch_.data()), scratch_size(scratch_.size()) {}
62 template <
typename Group,
typename Ptr>
64 #ifdef __SYCL_DEVICE_ONLY__
65 using T =
typename sycl::detail::GetValueType<Ptr>::type;
67 sycl::detail::merge_sort(g, first, last - first, comp, scratch);
75 "default_sorter constructor is not supported on host device.");
79 template <
typename Group,
typename T> T
operator()(Group g, T val) {
80 #ifdef __SYCL_DEVICE_ONLY__
81 auto range_size = g.get_local_range().size();
83 size_t local_id = g.get_local_linear_id();
84 T *temp =
reinterpret_cast<T *
>(scratch);
85 ::new (temp + local_id) T(val);
86 sycl::detail::merge_sort(g, temp, range_size, comp,
87 scratch + range_size *
sizeof(T));
95 "default_sorter operator() is not supported on host device.");
100 template <
typename T>
103 return range_size *
sizeof(T) +
alignof(T);
106 template <
typename T,
int dim = 1>
109 return 2 * memory_required<T>(scope,
r.size());
117 template <
typename T, sorting_order = sorting_order::ascending>
128 unsigned int BitsPerPass = 4>
132 uint32_t first_bit = 0;
133 uint32_t last_bit = 0;
134 size_t scratch_size = 0;
136 static constexpr uint32_t bits = BitsPerPass;
139 template <
size_t Extent>
141 const std::bitset<
sizeof(ValT) *CHAR_BIT> mask =
142 std::bitset<
sizeof(ValT) * CHAR_BIT>(
144 : scratch(scratch_.data()), scratch_size(scratch_.size()) {
145 static_assert((std::is_arithmetic<ValT>::value ||
146 std::is_same<ValT, sycl::half>::value ||
147 std::is_same<ValT, sycl::ext::oneapi::bfloat16>::value),
148 "radix sort is not usable");
151 while (first_bit < mask.size() && !mask[first_bit])
154 last_bit = first_bit;
155 while (last_bit < mask.size() && mask[last_bit])
159 template <
typename GroupT,
typename PtrT>
164 #ifdef __SYCL_DEVICE_ONLY__
165 sycl::detail::privateDynamicSort<
false,
168 g, first, first, (last - first) > 0 ? (last - first) : 0,
169 scratch, first_bit, last_bit);
173 "radix_sorter is not supported on host device.");
177 template <
typename GroupT> ValT
operator()(GroupT g, ValT val) {
180 #ifdef __SYCL_DEVICE_ONLY__
182 sycl::detail::privateStaticSort<
false,
186 g, result, result, scratch, first_bit, last_bit);
191 "radix_sorter is not supported on host device.");
199 return range_size *
sizeof(ValT) +
200 (1 << bits) * range_size *
sizeof(uint32_t) +
alignof(uint32_t);
204 template <
int dimensions = 1>
209 return (std::max)(local_range.
size() *
sizeof(ValT),
210 local_range.
size() * (1 << bits) *
sizeof(uint32_t));
default_sorter(sycl::span< std::byte, Extent > scratch_, Compare comp_=Compare())
static constexpr size_t memory_required(sycl::memory_scope scope, sycl::range< dim > r)
static constexpr size_t memory_required(sycl::memory_scope, size_t range_size)
T operator()(Group g, T val)
void operator()(Group g, Ptr first, Ptr last)
sycl::span< std::byte, Extent > get_memory() const
group_with_scratchpad(Group g_, sycl::span< std::byte, Extent > scratch_)
radix_sorter(sycl::span< std::byte, Extent > scratch_, const std::bitset< sizeof(ValT) *CHAR_BIT > mask=std::bitset< sizeof(ValT) *CHAR_BIT >((std::numeric_limits< unsigned long long >::max)()))
void operator()(GroupT g, PtrT first, PtrT last)
static constexpr size_t memory_required(sycl::memory_scope scope, sycl::range< dimensions > local_range)
ValT operator()(GroupT g, ValT val)
static constexpr size_t memory_required(sycl::memory_scope scope, size_t range_size)
Defines the iteration domain of either a single work-group in a parallel dispatch,...
fence_scope
The scope that fence() operation should apply to.
const std::error_category & sycl_category() noexcept