DPC++ Runtime
Runtime libraries for oneAPI DPC++
helpers.hpp
Go to the documentation of this file.
1 //==---------------- helpers.hpp - SYCL helpers ----------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_types.hpp> // for MemorySemanticsMask
12 #include <sycl/access/access.hpp> // for fence_space
13 #include <sycl/detail/export.hpp> // for __SYCL_EXPORT
14 #include <sycl/memory_enums.hpp> // for memory_order
15 
16 #ifdef __SYCL_DEVICE_ONLY__
18 #endif
19 
20 #include <cstddef> // for size_t
21 #include <memory> // for shared_ptr
22 #include <stdint.h> // for uint32_t
23 #include <type_traits> // for enable_if_t, integral_constant
24 #include <utility> // for forward, integer_sequence, mak...
25 #include <vector> // for vector
26 
27 namespace sycl {
28 inline namespace _V1 {
29 class context;
30 class event;
31 template <int Dims, bool WithOffset> class item;
32 template <int Dims> class group;
33 template <int Dims> class range;
34 template <int Dims> class id;
35 template <int Dims> class nd_item;
36 template <int Dims> class h_item;
37 template <typename Type, std::size_t NumElements> class marray;
38 enum class memory_order;
39 
40 namespace detail {
41 class CGExecKernel;
42 class buffer_impl;
43 class context_impl;
44 class queue_impl;
45 using QueueImplPtr = std::shared_ptr<sycl::detail::queue_impl>;
47 
48 __SYCL_EXPORT void waitEvents(std::vector<sycl::event> DepEvents);
49 
50 __SYCL_EXPORT void
51 markBufferAsInternal(const std::shared_ptr<buffer_impl> &BufImpl);
52 
53 template <typename T> T *declptr() { return static_cast<T *>(nullptr); }
54 
55 // Function to get or store id, item, nd_item, group for the host implementation
56 // Pass nullptr to get stored object. Pass valid address to store object
57 template <typename T> T get_or_store(const T *obj) {
58  static thread_local auto stored = *obj;
59  if (obj != nullptr) {
60  stored = *obj;
61  }
62  return stored;
63 }
64 
65 class Builder {
66 public:
67  Builder() = delete;
68 
69  template <int Dims>
70  static group<Dims>
71  createGroup(const range<Dims> &Global, const range<Dims> &Local,
72  const range<Dims> &Group, const id<Dims> &Index) {
73  return group<Dims>(Global, Local, Group, Index);
74  }
75 
76  template <int Dims>
77  static group<Dims> createGroup(const range<Dims> &Global,
78  const range<Dims> &Local,
79  const id<Dims> &Index) {
80  return group<Dims>(Global, Local, Global / Local, Index);
81  }
82 
83  template <class ResType, typename BitsType>
84  static ResType createSubGroupMask(BitsType Bits, size_t BitsNum) {
85  return ResType(Bits, BitsNum);
86  }
87 
88  template <int Dims, bool WithOffset>
89  static std::enable_if_t<WithOffset, item<Dims, WithOffset>>
90  createItem(const range<Dims> &Extent, const id<Dims> &Index,
91  const id<Dims> &Offset) {
92  return item<Dims, WithOffset>(Extent, Index, Offset);
93  }
94 
95  template <int Dims, bool WithOffset>
96  static std::enable_if_t<!WithOffset, item<Dims, WithOffset>>
97  createItem(const range<Dims> &Extent, const id<Dims> &Index) {
98  return item<Dims, WithOffset>(Extent, Index);
99  }
100 
101  template <int Dims>
103  const item<Dims, false> &Local,
104  const group<Dims> &Group) {
105  return nd_item<Dims>(Global, Local, Group);
106  }
107 
108  template <int Dims>
110  const item<Dims, false> &Local) {
111  return h_item<Dims>(Global, Local);
112  }
113 
114  template <int Dims>
116  const item<Dims, false> &Local,
117  const range<Dims> &Flex) {
118  return h_item<Dims>(Global, Local, Flex);
119  }
120 
121  template <int Dims, bool WithOffset>
123  const id<Dims> &NextIndex) {
124  Item.MImpl.MIndex = NextIndex;
125  }
126 
127 #ifdef __SYCL_DEVICE_ONLY__
128 
129  template <int N>
130  using is_valid_dimensions = std::integral_constant<bool, (N > 0) && (N < 4)>;
131 
132  template <int Dims> static const id<Dims> getElement(id<Dims> *) {
133  static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
134  return __spirv::initGlobalInvocationId<Dims, id<Dims>>();
135  }
136 
137  template <int Dims> static const group<Dims> getElement(group<Dims> *) {
138  static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
139  range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
140  range<Dims> LocalSize{__spirv::initWorkgroupSize<Dims, range<Dims>>()};
141  range<Dims> GroupRange{__spirv::initNumWorkgroups<Dims, range<Dims>>()};
142  id<Dims> GroupId{__spirv::initWorkgroupId<Dims, id<Dims>>()};
143  return createGroup<Dims>(GlobalSize, LocalSize, GroupRange, GroupId);
144  }
145 
146  template <int Dims, bool WithOffset>
147  static std::enable_if_t<WithOffset, const item<Dims, WithOffset>> getItem() {
148  static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
149  id<Dims> GlobalId{__spirv::initGlobalInvocationId<Dims, id<Dims>>()};
150  range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
151  id<Dims> GlobalOffset{__spirv::initGlobalOffset<Dims, id<Dims>>()};
152  return createItem<Dims, true>(GlobalSize, GlobalId, GlobalOffset);
153  }
154 
155  template <int Dims, bool WithOffset>
156  static std::enable_if_t<!WithOffset, const item<Dims, WithOffset>> getItem() {
157  static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
158  id<Dims> GlobalId{__spirv::initGlobalInvocationId<Dims, id<Dims>>()};
159  range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
160  return createItem<Dims, false>(GlobalSize, GlobalId);
161  }
162 
163  template <int Dims> static const nd_item<Dims> getElement(nd_item<Dims> *) {
164  static_assert(is_valid_dimensions<Dims>::value, "invalid dimensions");
165  range<Dims> GlobalSize{__spirv::initGlobalSize<Dims, range<Dims>>()};
166  range<Dims> LocalSize{__spirv::initWorkgroupSize<Dims, range<Dims>>()};
167  range<Dims> GroupRange{__spirv::initNumWorkgroups<Dims, range<Dims>>()};
168  id<Dims> GroupId{__spirv::initWorkgroupId<Dims, id<Dims>>()};
169  id<Dims> GlobalId{__spirv::initGlobalInvocationId<Dims, id<Dims>>()};
170  id<Dims> LocalId{__spirv::initLocalInvocationId<Dims, id<Dims>>()};
171  id<Dims> GlobalOffset{__spirv::initGlobalOffset<Dims, id<Dims>>()};
172  group<Dims> Group =
173  createGroup<Dims>(GlobalSize, LocalSize, GroupRange, GroupId);
174  item<Dims, true> GlobalItem =
175  createItem<Dims, true>(GlobalSize, GlobalId, GlobalOffset);
176  item<Dims, false> LocalItem = createItem<Dims, false>(LocalSize, LocalId);
177  return createNDItem<Dims>(GlobalItem, LocalItem, Group);
178  }
179 
180  template <int Dims, bool WithOffset>
181  static auto getElement(item<Dims, WithOffset> *)
182  -> decltype(getItem<Dims, WithOffset>()) {
183  return getItem<Dims, WithOffset>();
184  }
185 
186  template <int Dims>
187  static auto getNDItem() -> decltype(getElement(declptr<nd_item<Dims>>())) {
188  return getElement(declptr<nd_item<Dims>>());
189  }
190 
191 #endif // __SYCL_DEVICE_ONLY__
192 };
193 
194 inline constexpr __spv::MemorySemanticsMask::Flag
197 }
198 
199 inline constexpr uint32_t
201  const __spv::MemorySemanticsMask LocalScopeMask =
203  // Huge ternary operator below is a workaround for constexpr function
204  // requirement that such function can only contain return statement and
205  // nothing more
206  //
207  // It is equivalent to the following code:
208  //
209  // uint32_t Flags =
210  // static_cast<uint32_t>(__spv::MemorySemanticsMask::SequentiallyConsistent);
211  // switch (AccessSpace) {
212  // case access::fence_space::global_space:
213  // Flags |=
214  // static_cast<uint32_t>(__spv::MemorySemanticsMask::CrossWorkgroupMemory);
215  // break;
216  // case access::fence_space::local_space:
217  // Flags |= static_cast<uint32_t>(LocalScopeMask);
218  // break;
219  // case access::fence_space::global_and_local:
220  // default:
221  // Flags |= static_cast<uint32_t>(
222  // __spv::MemorySemanticsMask::CrossWorkgroupMemory) |
223  // static_cast<uint32_t>(LocalScopeMask);
224  // break;
225  // }
226  // return Flags;
227 
228  return (AccessSpace == access::fence_space::global_space)
229  ? static_cast<uint32_t>(
232  : (AccessSpace == access::fence_space::local_space)
233  ? static_cast<uint32_t>(
235  LocalScopeMask)
236  : /* default: (AccessSpace ==
237  access::fence_space::global_and_local) */
238  static_cast<uint32_t>(
241  LocalScopeMask);
242 }
243 
244 // To ensure loop unrolling is done when processing dimensions.
245 template <size_t... Inds, class F>
246 void loop_impl(std::integer_sequence<size_t, Inds...>, F &&f) {
247  (f(std::integral_constant<size_t, Inds>{}), ...);
248 }
249 
250 template <size_t count, class F> void loop(F &&f) {
251  loop_impl(std::make_index_sequence<count>{}, std::forward<F>(f));
252 }
253 inline constexpr bool is_power_of_two(int x) { return (x & (x - 1)) == 0; }
254 
255 std::tuple<const RTDeviceBinaryImage *, ur_program_handle_t>
256 retrieveKernelBinary(const QueueImplPtr &, const char *KernelName,
257  CGExecKernel *CGKernel = nullptr);
258 } // namespace detail
259 
260 } // namespace _V1
261 } // namespace sycl
static std::enable_if_t< WithOffset, item< Dims, WithOffset > > createItem(const range< Dims > &Extent, const id< Dims > &Index, const id< Dims > &Offset)
Definition: helpers.hpp:90
static group< Dims > createGroup(const range< Dims > &Global, const range< Dims > &Local, const range< Dims > &Group, const id< Dims > &Index)
Definition: helpers.hpp:71
static ResType createSubGroupMask(BitsType Bits, size_t BitsNum)
Definition: helpers.hpp:84
static h_item< Dims > createHItem(const item< Dims, false > &Global, const item< Dims, false > &Local, const range< Dims > &Flex)
Definition: helpers.hpp:115
static group< Dims > createGroup(const range< Dims > &Global, const range< Dims > &Local, const id< Dims > &Index)
Definition: helpers.hpp:77
static void updateItemIndex(sycl::item< Dims, WithOffset > &Item, const id< Dims > &NextIndex)
Definition: helpers.hpp:122
static std::enable_if_t<!WithOffset, item< Dims, WithOffset > > createItem(const range< Dims > &Extent, const id< Dims > &Index)
Definition: helpers.hpp:97
static h_item< Dims > createHItem(const item< Dims, false > &Global, const item< Dims, false > &Local)
Definition: helpers.hpp:109
static nd_item< Dims > createNDItem(const item< Dims, true > &Global, const item< Dims, false > &Local, const group< Dims > &Group)
Definition: helpers.hpp:102
Identifies an instance of a group::parallel_for_work_item function object executing at each point in ...
Definition: h_item.hpp:31
A unique identifier of an item in an index space.
Definition: id.hpp:36
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:37
Provides a cross-platform math array class template that works on SYCL devices as well as in host C++...
Definition: marray.hpp:49
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
constexpr bool is_power_of_two(int x)
Definition: helpers.hpp:253
T get_or_store(const T *obj)
Definition: helpers.hpp:57
void markBufferAsInternal(const std::shared_ptr< buffer_impl > &BufImpl)
Definition: helpers.cpp:33
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:195
void loop_impl(std::integer_sequence< size_t, Inds... >, F &&f)
Definition: helpers.hpp:246
void waitEvents(std::vector< sycl::event > DepEvents)
Definition: helpers.cpp:27
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: helpers.hpp:45
void loop(F &&f)
Definition: helpers.hpp:250
std::tuple< const RTDeviceBinaryImage *, ur_program_handle_t > retrieveKernelBinary(const QueueImplPtr &, const char *KernelName, CGExecKernel *CGKernel=nullptr)
Definition: helpers.cpp:38
autodecltype(x) x
Definition: access.hpp:18