DPC++ Runtime
Runtime libraries for oneAPI DPC++
launch_experimental.hpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * Copyright (C) Codeplay Software Ltd.
4  *
5  * Part of the LLVM Project, under the Apache License v2.0 with LLVM
6  * Exceptions. See https://llvm.org/LICENSE.txt for license information.
7  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  *
15  * SYCLcompat
16  *
17  * launch_experimental.hpp
18  *
19  * Description:
20  * Launch Overloads with accepting required subgroup size
21  **************************************************************************/
22 
23 #pragma once
24 
25 #include <syclcompat/device.hpp>
26 #include <syclcompat/dims.hpp>
27 #include <syclcompat/launch.hpp>
28 
29 namespace syclcompat {
30 namespace experimental {
31 
32 //================================================================================================//
33 // Overloads using Local Memory //
34 //================================================================================================//
35 
36 template <auto F, int SubgroupSize, typename... Args>
37 std::enable_if_t<std::is_invocable_v<decltype(F), Args..., char *>, sycl::event>
38 launch(sycl::nd_range<3> launch_range, std::size_t local_memory_size,
39  sycl::queue queue, Args... args) {
40  return queue.submit([&](sycl::handler &cgh) {
41  sycl::local_accessor<char, 1> loc(local_memory_size, cgh);
42  cgh.parallel_for(
43  launch_range,
44  [=](sycl::nd_item<3> it) [[sycl::reqd_sub_group_size(SubgroupSize)]] {
45  [[clang::always_inline]] F(
46  args..., loc.get_multi_ptr<sycl::access::decorated::yes>());
47  });
48  });
49 }
50 
51 template <auto F, int SubgroupSize, int Dim, typename... Args>
52 std::enable_if_t<std::is_invocable_v<decltype(F), Args..., char *>, sycl::event>
53 launch(sycl::nd_range<Dim> launch_range, std::size_t local_memory_size,
54  Args... args) {
55  return launch<F, SubgroupSize, Args...>(
56  ::syclcompat::detail::transform_nd_range(launch_range), local_memory_size,
58 }
59 
60 template <auto F, int SubgroupSize, typename... Args>
61 std::enable_if_t<std::is_invocable_v<decltype(F), Args..., char *>, sycl::event>
62 launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim,
63  std::size_t local_memory_size, Args... args) {
64  return launch<F, SubgroupSize, Args...>(
66  sycl::range<3>(grid_dim * block_dim), sycl::range<3>(block_dim))),
67  local_memory_size, ::syclcompat::get_default_queue(), args...);
68 }
69 
70 //================================================================================================//
71 // Overloads not using Local Memory //
72 //================================================================================================//
73 
74 template <auto F, int SubgroupSize, typename... Args>
75 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
76 launch(sycl::nd_range<3> launch_range, sycl::queue queue, Args... args) {
77  return queue.submit([&](sycl::handler &cgh) {
78  cgh.parallel_for(launch_range,
79  [=](sycl::nd_item<3> it)
80  [[sycl::reqd_sub_group_size(SubgroupSize)]] {
81  [[clang::always_inline]] F(args...);
82  });
83  });
84 }
85 
86 template <auto F, int SubgroupSize, int Dim, typename... Args>
87 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
88 launch(sycl::nd_range<Dim> launch_range, Args... args) {
89  return launch<F, SubgroupSize, Args...>(
92 }
93 
94 template <auto F, int SubgroupSize, typename... Args>
95 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>, sycl::event>
96 launch(::syclcompat::dim3 grid_dim, ::syclcompat::dim3 block_dim,
97  Args... args) {
98  return launch<F, SubgroupSize, Args...>(
100  sycl::range<3>(grid_dim * block_dim), sycl::range<3>(block_dim))),
102 }
103 
104 } // namespace experimental
105 } // namespace syclcompat
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
Command group handler class.
Definition: handler.hpp:468
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Definition: handler.hpp:2011
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
Definition: queue.hpp:110
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Definition: queue.hpp:340
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
sycl::nd_range< 3 > transform_nd_range(const sycl::nd_range< Dim > &range)
Definition: launch.hpp:45
std::enable_if_t< std::is_invocable_v< decltype(F), Args..., char * >, sycl::event > launch(sycl::nd_range< 3 > launch_range, std::size_t local_memory_size, sycl::queue queue, Args... args)
static sycl::queue get_default_queue()
Util function to get the default queue of current device in device manager.
Definition: device.hpp:872