39 template <
typename R,
typename... Types>
41 return sizeof...(Types);
48 if constexpr (Dim == 3) {
50 }
else if constexpr (Dim == 2) {
52 {1, local_range[0], local_range[1]}};
57 template <
auto F,
typename... Args>
58 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>,
sycl::event>
61 "Wrong number of arguments to SYCL kernel");
63 std::is_same<std::invoke_result_t<decltype(F), Args...>,
void>::value,
64 "SYCL kernels should return void");
70 template <
auto F,
typename... Args>
74 "Wrong number of arguments to SYCL kernel");
76 using F_t = decltype(F);
77 using f_return_t =
typename std::invoke_result_t<F_t, Args...,
char *>;
78 static_assert(std::is_same<f_return_t, void>::value,
79 "SYCL kernels should return void");
85 [[clang::always_inline]] F(args...,
local_mem);
97 throw std::invalid_argument(
"Global or local size is zero!");
99 for (
size_t i = 0; i < Dim; ++i) {
101 throw std::invalid_argument(
"Work group size larger than global size");
115 template <
auto F,
int Dim,
typename... Args>
116 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>,
sycl::event>
118 return detail::launch<F>(detail::transform_nd_range<Dim>(range), q, args...);
121 template <
auto F,
int Dim,
typename... Args>
122 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>,
sycl::event>
128 template <
auto F,
typename... Args>
129 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>,
sycl::event>
134 template <
auto F,
typename... Args>
135 std::enable_if_t<std::is_invocable_v<decltype(F), Args...>,
sycl::event>
154 template <
auto F,
int Dim,
typename... Args>
157 return detail::launch<F>(detail::transform_nd_range<Dim>(range), mem_size, q,
174 template <
auto F,
int Dim,
typename... Args>
195 template <
auto F,
typename... Args>
217 template <
auto F,
typename... Args>
The file contains implementations of accessor class.
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Command group handler class.
void parallel_for(range< 1 > NumWorkItems, _KERNELFUNCPARAM(KernelFunc))
Identifies an instance of the function object executing at each point in an nd_range.
Defines the iteration domain of both the work-groups and the overall dispatch.
range< Dimensions > get_global_range() const
range< Dimensions > get_local_range() const
Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
event parallel_for(range< 1 > Range, RestT &&...Rest)
parallel_for version with a kernel represented as a lambda + range that specifies global size only.
std::enable_if_t< std::is_invocable_r_v< void, T, handler & >, event > submit(T CGF, const detail::code_location &CodeLoc=detail::code_location::current())
Submits a command group function object to the queue, in order to be scheduled for execution on the d...
Defines the iteration domain of either a single work-group in a parallel dispatch,...
constexpr work_group_size_key::value_t< Dim0, Dims... > work_group_size
sycl::nd_range< 3 > transform_nd_range(const sycl::nd_range< Dim > &range)
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event > launch(const sycl::nd_range< 3 > &range, sycl::queue q, Args... args)
constexpr size_t getArgumentCount(R(*f)(Types...))
static sycl::queue get_default_queue()
Util function to get the default queue of current device in device manager.
sycl::nd_range< Dim > compute_nd_range(sycl::range< Dim > global_size_in, sycl::range< Dim > work_group_size)
std::enable_if_t< std::is_invocable_v< decltype(F), Args... >, sycl::event > launch(const sycl::nd_range< Dim > &range, sycl::queue q, Args... args)