DPC++ Runtime
Runtime libraries for oneAPI DPC++
nd_item.hpp
Go to the documentation of this file.
1 //==--------- nd_item.hpp --- SYCL iteration nd_item -----------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp> // for __spirv_ControlBarrier
12 #include <CL/__spirv/spirv_types.hpp> // for Scope
13 #include <CL/__spirv/spirv_vars.hpp> // for initLocalInvocationId
14 #include <sycl/access/access.hpp> // for mode, fence_space
15 #include <sycl/detail/defines.hpp> // for __SYCL_ASSUME_INT
16 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED, __SY...
17 #include <sycl/detail/generic_type_traits.hpp> // for ConvertToOpenCLType_t
18 #include <sycl/detail/helpers.hpp> // for getSPIRVMemorySemanticsMask
19 #include <sycl/detail/type_traits.hpp> // for is_bool, change_base_...
20 #include <sycl/device_event.hpp> // for device_event
21 #include <sycl/exception.hpp> // for make_error_code, errc, exce...
22 #include <sycl/group.hpp> // for group
23 #include <sycl/id.hpp> // for id
24 #include <sycl/item.hpp> // for item
25 #include <sycl/nd_range.hpp> // for nd_range
26 #include <sycl/pointers.hpp> // for decorated_global_ptr, decor...
27 #include <sycl/range.hpp> // for range
28 #include <sycl/sub_group.hpp> // for sub_group
29 
30 #include <cstddef> // for size_t
31 #include <stdint.h> // for uint32_t
32 #include <type_traits> // for enable_if_t, remove_const_t
33 
34 namespace sycl {
35 inline namespace _V1 {
36 namespace detail {
37 class Builder;
38 }
39 
40 namespace ext::oneapi::experimental {
41 template <int Dimensions> class root_group;
42 }
43 
48 template <int Dimensions = 1> class nd_item {
49 public:
50  static constexpr int dimensions = Dimensions;
51 
53 #ifdef __SYCL_DEVICE_ONLY__
54  return __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>();
55 #else
56  return {};
57 #endif
58  }
59 
60  size_t __SYCL_ALWAYS_INLINE get_global_id(int Dimension) const {
61  size_t Id = get_global_id()[Dimension];
63  return Id;
64  }
65 
67  size_t LinId = 0;
68  id<Dimensions> Index = get_global_id();
70  id<Dimensions> Offset = get_offset();
71  if (1 == Dimensions) {
72  LinId = Index[0] - Offset[0];
73  } else if (2 == Dimensions) {
74  LinId = (Index[0] - Offset[0]) * Extent[1] + Index[1] - Offset[1];
75  } else {
76  LinId = (Index[0] - Offset[0]) * Extent[1] * Extent[2] +
77  (Index[1] - Offset[1]) * Extent[2] + Index[2] - Offset[2];
78  }
79  __SYCL_ASSUME_INT(LinId);
80  return LinId;
81  }
82 
84 #ifdef __SYCL_DEVICE_ONLY__
85  return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
86 #else
87  return {};
88 #endif
89  }
90 
91  size_t __SYCL_ALWAYS_INLINE get_local_id(int Dimension) const {
92  size_t Id = get_local_id()[Dimension];
94  return Id;
95  }
96 
97  size_t get_local_linear_id() const {
98  size_t LinId = 0;
99  id<Dimensions> Index = get_local_id();
101  if (1 == Dimensions) {
102  LinId = Index[0];
103  } else if (2 == Dimensions) {
104  LinId = Index[0] * Extent[1] + Index[1];
105  } else {
106  LinId =
107  Index[0] * Extent[1] * Extent[2] + Index[1] * Extent[2] + Index[2];
108  }
109  __SYCL_ASSUME_INT(LinId);
110  return LinId;
111  }
112 
114  // TODO: ideally Group object should be stateless and have a contructor with
115  // no arguments.
118  }
119 
120  sub_group get_sub_group() const { return sub_group(); }
121 
122  size_t __SYCL_ALWAYS_INLINE get_group(int Dimension) const {
123  size_t Id = get_group_id()[Dimension];
124  __SYCL_ASSUME_INT(Id);
125  return Id;
126  }
127 
129  size_t LinId = 0;
130  id<Dimensions> Index = get_group_id();
132  if (1 == Dimensions) {
133  LinId = Index[0];
134  } else if (2 == Dimensions) {
135  LinId = Index[0] * Extent[1] + Index[1];
136  } else {
137  LinId =
138  Index[0] * Extent[1] * Extent[2] + Index[1] * Extent[2] + Index[2];
139  }
140  __SYCL_ASSUME_INT(LinId);
141  return LinId;
142  }
143 
145 #ifdef __SYCL_DEVICE_ONLY__
146  return __spirv::initNumWorkgroups<Dimensions, range<Dimensions>>();
147 #else
148  return {};
149 #endif
150  }
151 
152  size_t __SYCL_ALWAYS_INLINE get_group_range(int Dimension) const {
153  size_t Range = get_group_range()[Dimension];
154  __SYCL_ASSUME_INT(Range);
155  return Range;
156  }
157 
159 #ifdef __SYCL_DEVICE_ONLY__
160  return __spirv::initGlobalSize<Dimensions, range<Dimensions>>();
161 #else
162  return {};
163 #endif
164  }
165 
166  size_t get_global_range(int Dimension) const {
167  size_t Val = get_global_range()[Dimension];
168  __SYCL_ASSUME_INT(Val);
169  return Val;
170  }
171 
173 #ifdef __SYCL_DEVICE_ONLY__
174  return __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>();
175 #else
176  return {};
177 #endif
178  }
179 
180  size_t get_local_range(int Dimension) const {
181  size_t Id = get_local_range()[Dimension];
182  __SYCL_ASSUME_INT(Id);
183  return Id;
184  }
185 
186  __SYCL2020_DEPRECATED("offsets are deprecated in SYCL 2020")
188 #ifdef __SYCL_DEVICE_ONLY__
189  return __spirv::initGlobalOffset<Dimensions, id<Dimensions>>();
190 #else
191  return {};
192 #endif
193  }
194 
197  get_offset());
198  }
199 
200  void barrier(access::fence_space accessSpace =
202  uint32_t flags = _V1::detail::getSPIRVMemorySemanticsMask(accessSpace);
204  flags);
205  }
206 
209  template <access::mode accessMode = access::mode::read_write>
210  __SYCL2020_DEPRECATED("use sycl::atomic_fence() free function instead")
211  void mem_fence(
212  typename std::enable_if_t<accessMode == access::mode::read ||
213  accessMode == access::mode::write ||
214  accessMode == access::mode::read_write,
215  access::fence_space>
216  accessSpace = access::fence_space::global_and_local) const {
217  uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace);
218  // TODO: currently, there is no good way in SPIR-V to set the memory
219  // barrier only for load operations or only for store operations.
220  // The full read-and-write barrier is used and the template parameter
221  // 'accessMode' is ignored for now. Either SPIR-V or SYCL spec may be
222  // changed to address this discrepancy between SPIR-V and SYCL,
223  // or if we decide that 'accessMode' is the important feature then
224  // we can fix this later, for example, by using OpenCL 1.2 functions
225  // read_mem_fence() and write_mem_fence().
227  }
228 
234  template <typename dataT>
235  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
236  std::enable_if_t<!detail::is_bool<dataT>::value,
238  global_ptr<dataT> src,
239  size_t numElements,
240  size_t srcStride) const {
241  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
242  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
243 
245  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
246  numElements, srcStride, 0);
247  return device_event(E);
248  }
249 
255  template <typename dataT>
256  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
257  std::enable_if_t<!detail::is_bool<dataT>::value,
259  local_ptr<dataT> src,
260  size_t numElements,
261  size_t destStride)
262  const {
263  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
264  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
265 
267  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
268  numElements, destStride, 0);
269  return device_event(E);
270  }
271 
278  template <typename DestDataT, typename SrcDataT>
279  std::enable_if_t<!detail::is_bool<DestDataT>::value &&
280  std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
281  device_event>
283  decorated_global_ptr<SrcDataT> src, size_t numElements,
284  size_t srcStride) const {
285  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
286  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
287 
289  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
290  numElements, srcStride, 0);
291  return device_event(E);
292  }
293 
300  template <typename DestDataT, typename SrcDataT>
301  std::enable_if_t<!detail::is_bool<DestDataT>::value &&
302  std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
303  device_event>
305  decorated_local_ptr<SrcDataT> src, size_t numElements,
306  size_t destStride) const {
307  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
308  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
309 
311  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
312  numElements, destStride, 0);
313  return device_event(E);
314  }
315 
321  template <typename T, access::address_space DestS, access::address_space SrcS>
322  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
323  std::enable_if_t<
324  detail::is_scalar_bool<T>::value,
326  access::decorated::legacy>
327  Dest,
328  multi_ptr<T, SrcS,
329  access::decorated::legacy>
330  Src,
331  size_t NumElements,
332  size_t Stride) const {
333  static_assert(sizeof(bool) == sizeof(uint8_t),
334  "Async copy to/from bool memory is not supported.");
336  reinterpret_cast<uint8_t *>(Dest.get()));
338  reinterpret_cast<uint8_t *>(Src.get()));
339  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
340  }
341 
347  template <typename T, access::address_space DestS, access::address_space SrcS>
348  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
349  std::enable_if_t<
350  detail::is_vector_bool<T>::value,
352  access::decorated::legacy>
353  Dest,
354  multi_ptr<T, SrcS,
355  access::decorated::legacy>
356  Src,
357  size_t NumElements,
358  size_t Stride) const {
359  static_assert(sizeof(bool) == sizeof(uint8_t),
360  "Async copy to/from bool memory is not supported.");
362  auto DestP = address_space_cast<DestS, access::decorated::legacy>(
363  reinterpret_cast<VecT *>(Dest.get()));
364  auto SrcP = address_space_cast<SrcS, access::decorated::legacy>(
365  reinterpret_cast<VecT *>(Src.get()));
366  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
367  }
368 
374  template <typename DestT, access::address_space DestS, typename SrcT,
376  std::enable_if_t<detail::is_scalar_bool<DestT>::value &&
377  std::is_same_v<std::remove_const_t<SrcT>, DestT>,
378  device_event>
381  size_t NumElements, size_t Stride) const {
382  static_assert(sizeof(bool) == sizeof(uint8_t),
383  "Async copy to/from bool memory is not supported.");
384  using QualSrcT =
385  std::conditional_t<std::is_const_v<SrcT>, const uint8_t, uint8_t>;
387  detail::cast_AS<typename multi_ptr<uint8_t, DestS,
389  Dest.get_decorated()));
391  detail::cast_AS<typename multi_ptr<QualSrcT, SrcS,
393  Src.get_decorated()));
394  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
395  }
396 
402  template <typename DestT, access::address_space DestS, typename SrcT,
404  std::enable_if_t<detail::is_vector_bool<DestT>::value &&
405  std::is_same_v<std::remove_const_t<SrcT>, DestT>,
406  device_event>
409  size_t NumElements, size_t Stride) const {
410  static_assert(sizeof(bool) == sizeof(uint8_t),
411  "Async copy to/from bool memory is not supported.");
413  using QualSrcVecT =
414  std::conditional_t<std::is_const_v<SrcT>, std::add_const_t<VecT>, VecT>;
418  Dest.get_decorated()));
420  detail::cast_AS<typename multi_ptr<QualSrcVecT, SrcS,
422  Src.get_decorated()));
423  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
424  }
425 
431  template <typename dataT>
432  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
434  async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
435  size_t numElements) const {
436  return async_work_group_copy(dest, src, numElements, 1);
437  }
438 
444  template <typename dataT>
445  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
447  async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
448  size_t numElements) const {
449  return async_work_group_copy(dest, src, numElements, 1);
450  }
451 
458  template <typename DestDataT, typename SrcDataT>
459  typename std::enable_if_t<
460  std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
463  size_t numElements) const {
464  return async_work_group_copy(dest, src, numElements, 1);
465  }
466 
473  template <typename DestDataT, typename SrcDataT>
474  typename std::enable_if_t<
475  std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
478  size_t numElements) const {
479  return async_work_group_copy(dest, src, numElements, 1);
480  }
481 
482  template <typename... eventTN> void wait_for(eventTN... events) const {
483  waitForHelper(events...);
484  }
485 
489  }
490 
491  nd_item(const nd_item &rhs) = default;
492  nd_item(nd_item &&rhs) = default;
493 
494  nd_item &operator=(const nd_item &rhs) = default;
495  nd_item &operator=(nd_item &&rhs) = default;
496 
497  bool operator==(const nd_item &) const { return true; }
498  bool operator!=(const nd_item &rhs) const { return !((*this) == rhs); }
499 
500 protected:
501  friend class detail::Builder;
502  nd_item() {}
504  const group<Dimensions> &) {}
505 
506  void waitForHelper() const {}
507 
508  void waitForHelper(device_event Event) const { Event.wait(); }
509 
510  template <typename T, typename... Ts>
511  void waitForHelper(T E, Ts... Es) const {
512  waitForHelper(E);
513  waitForHelper(Es...);
514  }
515 
517 #ifdef __SYCL_DEVICE_ONLY__
518  return __spirv::initWorkgroupId<Dimensions, id<Dimensions>>();
519 #else
520  return {};
521 #endif
522  }
523 };
524 } // namespace _V1
525 } // namespace sycl
static group< Dims > createGroup(const range< Dims > &Global, const range< Dims > &Local, const range< Dims > &Group, const id< Dims > &Index)
Definition: helpers.hpp:71
Encapsulates a single SYCL device event which is available only within SYCL kernel functions and can ...
A unique identifier of an item in an index space.
Definition: id.hpp:36
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:37
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
id< Dimensions > get_group_id() const
Definition: nd_item.hpp:516
std::enable_if_t<!detail::is_bool< DestDataT >::value &&std::is_same_v< std::remove_const_t< SrcDataT >, DestDataT >, device_event > async_work_group_copy(decorated_local_ptr< DestDataT > dest, decorated_global_ptr< SrcDataT > src, size_t numElements, size_t srcStride) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: nd_item.hpp:282
size_t get_local_linear_id() const
Definition: nd_item.hpp:97
size_t get_local_range(int Dimension) const
Definition: nd_item.hpp:180
void waitForHelper(T E, Ts... Es) const
Definition: nd_item.hpp:511
std::enable_if_t< detail::is_scalar_bool< DestT >::value &&std::is_same_v< std::remove_const_t< SrcT >, DestT >, device_event > async_work_group_copy(multi_ptr< DestT, DestS, access::decorated::yes > Dest, multi_ptr< SrcT, SrcS, access::decorated::yes > Src, size_t NumElements, size_t Stride) const
Specialization for scalar bool type.
Definition: nd_item.hpp:379
size_t __SYCL_ALWAYS_INLINE get_group_linear_id() const
Definition: nd_item.hpp:128
nd_item(const nd_item &rhs)=default
size_t get_global_range(int Dimension) const
Definition: nd_item.hpp:166
sycl::ext::oneapi::experimental::root_group< Dimensions > ext_oneapi_get_root_group() const
Definition: nd_item.hpp:487
id< Dimensions > get_local_id() const
Definition: nd_item.hpp:83
nd_item(const item< Dimensions, true > &, const item< Dimensions, false > &, const group< Dimensions > &)
Definition: nd_item.hpp:503
range< Dimensions > get_local_range() const
Definition: nd_item.hpp:172
id< Dimensions > get_global_id() const
Definition: nd_item.hpp:52
sub_group get_sub_group() const
Definition: nd_item.hpp:120
void wait_for(eventTN... events) const
Definition: nd_item.hpp:482
nd_item & operator=(const nd_item &rhs)=default
std::enable_if_t< detail::is_vector_bool< DestT >::value &&std::is_same_v< std::remove_const_t< SrcT >, DestT >, device_event > async_work_group_copy(multi_ptr< DestT, DestS, access::decorated::yes > Dest, multi_ptr< SrcT, SrcS, access::decorated::yes > Src, size_t NumElements, size_t Stride) const
Specialization for vector bool type.
Definition: nd_item.hpp:407
std::enable_if_t<!detail::is_bool< dataT >::value, device_event > async_work_group_copy(local_ptr< dataT > dest, global_ptr< dataT > src, size_t numElements, size_t srcStride) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: nd_item.hpp:237
std::enable_if_t< std::is_same_v< DestDataT, std::remove_const_t< SrcDataT > >, device_event > async_work_group_copy(decorated_global_ptr< DestDataT > dest, decorated_local_ptr< SrcDataT > src, size_t numElements) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: nd_item.hpp:476
std::enable_if_t<!detail::is_bool< DestDataT >::value &&std::is_same_v< std::remove_const_t< SrcDataT >, DestDataT >, device_event > async_work_group_copy(decorated_global_ptr< DestDataT > dest, decorated_local_ptr< SrcDataT > src, size_t numElements, size_t destStride) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: nd_item.hpp:304
nd_range< Dimensions > get_nd_range() const
Definition: nd_item.hpp:195
size_t __SYCL_ALWAYS_INLINE get_global_id(int Dimension) const
Definition: nd_item.hpp:60
void waitForHelper() const
Definition: nd_item.hpp:506
bool operator==(const nd_item &) const
Definition: nd_item.hpp:497
size_t __SYCL_ALWAYS_INLINE get_global_linear_id() const
Definition: nd_item.hpp:66
bool operator!=(const nd_item &rhs) const
Definition: nd_item.hpp:498
nd_item(nd_item &&rhs)=default
void waitForHelper(device_event Event) const
Definition: nd_item.hpp:508
size_t __SYCL_ALWAYS_INLINE get_group_range(int Dimension) const
Definition: nd_item.hpp:152
static constexpr int dimensions
Definition: nd_item.hpp:50
void mem_fence(typename std::enable_if_t< accessMode==access::mode::read||accessMode==access::mode::write||accessMode==access::mode::read_write, access::fence_space > accessSpace=access::fence_space::global_and_local) const
Executes a work-group mem-fence with memory ordering on the local address space, global address space...
Definition: nd_item.hpp:211
id< Dimensions > get_offset() const
Definition: nd_item.hpp:187
group< Dimensions > get_group() const
Definition: nd_item.hpp:113
nd_item & operator=(nd_item &&rhs)=default
size_t __SYCL_ALWAYS_INLINE get_local_id(int Dimension) const
Definition: nd_item.hpp:91
range< Dimensions > get_global_range() const
Definition: nd_item.hpp:158
range< Dimensions > get_group_range() const
Definition: nd_item.hpp:144
size_t __SYCL_ALWAYS_INLINE get_group(int Dimension) const
Definition: nd_item.hpp:122
std::enable_if_t< std::is_same_v< DestDataT, std::remove_const_t< SrcDataT > >, device_event > async_work_group_copy(decorated_local_ptr< DestDataT > dest, decorated_global_ptr< SrcDataT > src, size_t numElements) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: nd_item.hpp:461
void barrier(access::fence_space accessSpace=access::fence_space::global_and_local) const
Definition: nd_item.hpp:200
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
#define __SYCL_ASSUME_INT(x)
Definition: defines.hpp:17
#define __SYCL_ALWAYS_INLINE
typename change_base_type< T, B >::type change_base_type_t
ToT cast_AS(FromT from)
Definition: access.hpp:330
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:195
decltype(convertToOpenCLType(std::declval< T >())) ConvertToOpenCLType_t
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:85
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
Definition: multi_ptr.hpp:459
Definition: access.hpp:18
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1344
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:27
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:37
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1356
void * __ocl_event_t