20 size_t &NWorkGroups) {
21 size_t WGSize = MaxWGSize;
22 if (NWorkItems <= WGSize) {
26 NWorkGroups = NWorkItems / WGSize;
27 size_t Rem = NWorkItems % WGSize;
32 size_t NWorkGroupsAlt = NWorkItems / Rem;
33 size_t RemAlt = NWorkItems % Rem;
34 if (RemAlt == 0 && NWorkGroupsAlt <= MaxWGSize) {
39 NWorkGroups = NWorkGroupsAlt;
54 std::shared_ptr<sycl::detail::queue_impl> Queue) {
55 device Dev = Queue->get_device();
56 uint32_t NumThreads = Dev.
get_info<sycl::info::device::max_compute_units>();
59 if (Dev.
is_gpu() && Dev.
get_info<sycl::info::device::host_unified_memory>())
66 size_t LocalMemBytesPerWorkItem) {
67 device Dev = Queue->get_device();
68 size_t MaxWGSize = Dev.
get_info<sycl::info::device::max_work_group_size>();
70 size_t WGSizePerMem = MaxWGSize * 2;
71 size_t WGSize = MaxWGSize;
72 if (LocalMemBytesPerWorkItem != 0) {
73 size_t MemSize = Dev.
get_info<sycl::info::device::local_mem_size>();
74 WGSizePerMem = MemSize / LocalMemBytesPerWorkItem;
79 if ((WGSizePerMem & (WGSizePerMem - 1)) != 0)
81 WGSize = (std::min)(WGSizePerMem, WGSize);
96 if (WGSize >= 4 && WGSizePerMem < MaxWGSize * 2) {
106 size_t LocalMemBytesPerWorkItem) {
107 device Dev = Queue->get_device();
116 using PrefWGConfig = sycl::detail::SYCLConfig<
117 sycl::detail::SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE>;
120 if (CPUMaxWGSize == 0)
122 size_t DevMaxWGSize =
123 Dev.
get_info<sycl::info::device::max_work_group_size>();
124 return std::min(CPUMaxWGSize, DevMaxWGSize);
130 size_t DevMaxWGSize =
131 Dev.
get_info<sycl::info::device::max_work_group_size>();
138 size_t DevMaxWGSize =
139 Dev.
get_info<sycl::info::device::max_work_group_size>();