DPC++ Runtime
Runtime libraries for oneAPI DPC++
group.hpp
Go to the documentation of this file.
1 //==-------------- group.hpp --- SYCL work group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp>
17 #include <CL/sycl/detail/spirv.hpp>
18 #include <CL/sycl/device_event.hpp>
19 #include <CL/sycl/h_item.hpp>
20 #include <CL/sycl/id.hpp>
21 #include <CL/sycl/memory_enums.hpp>
22 #include <CL/sycl/pointers.hpp>
23 #include <CL/sycl/range.hpp>
24 #include <stdexcept>
25 #include <type_traits>
26 
28 namespace sycl {
29 namespace detail {
30 class Builder;
31 
32 // Implements a barrier accross work items within a work group.
33 static inline void workGroupBarrier() {
34 #ifdef __SYCL_DEVICE_ONLY__
35  constexpr uint32_t flags =
36  static_cast<uint32_t>(
38  static_cast<uint32_t>(__spv::MemorySemanticsMask::WorkgroupMemory);
40  flags);
41 #endif // __SYCL_DEVICE_ONLY__
42 }
43 
44 } // namespace detail
45 
46 // SYCL 1.2.1rev5, section "4.8.5.3 Parallel For hierarchical invoke":
47 // Quote:
48 // ... To guarantee use of private per-work-item memory, the private_memory
49 // class can be used to wrap the data. This class very simply constructs
50 // private data for a given group across the entire group.The id of the
51 // current work-item is passed to any access to grab the correct data.
52 template <typename T, int Dimensions = 1> class private_memory {
53 public:
54  // Construct based directly off the number of work-items
56 #ifndef __SYCL_DEVICE_ONLY__
57  // serial host => one instance per work-group - allocate space for each WI
58  // in the group:
59  Val.reset(new T[G.get_local_range().size()]);
60 #endif // __SYCL_DEVICE_ONLY__
61  (void)G;
62  }
63 
64  // Access the instance for the current work-item
66 #ifndef __SYCL_DEVICE_ONLY__
67  // Calculate the linear index of current WI and return reference to the
68  // corresponding spot in the value array:
69  size_t Ind = Id.get_physical_local().get_linear_id();
70  return Val.get()[Ind];
71 #else
72  (void)Id;
73  return Val;
74 #endif // __SYCL_DEVICE_ONLY__
75  }
76 
77 private:
78 #ifdef __SYCL_DEVICE_ONLY__
79  // On SYCL device private_memory<T> instance is created per physical WI, so
80  // there is 1:1 correspondence betwen this class instances and per-WI memory.
81  T Val;
82 #else
83  // On serial host there is one private_memory<T> instance per work group, so
84  // it must have space to hold separate value per WI in the group.
85  std::unique_ptr<T[]> Val;
86 #endif // #ifdef __SYCL_DEVICE_ONLY__
87 };
88 
93 template <int Dimensions = 1> class group {
94 public:
95 #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
98  using linear_id_type = size_t;
99  static constexpr int dimensions = Dimensions;
100 #endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
101 
102  static constexpr sycl::memory_scope fence_scope =
103  sycl::memory_scope::work_group;
104 
105  group() = delete;
106 
107  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
108  id<Dimensions> get_id() const { return index; }
109 
110  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
111  size_t get_id(int dimension) const { return index[dimension]; }
112 
113  id<Dimensions> get_group_id() const { return index; }
114 
115  size_t get_group_id(int dimension) const { return index[dimension]; }
116 
117  range<Dimensions> get_global_range() const { return globalRange; }
118 
119  size_t get_global_range(int dimension) const {
120  return globalRange[dimension];
121  }
122 
124 #ifdef __SYCL_DEVICE_ONLY__
125  return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
126 #else
127  throw runtime_error("get_local_id() is not implemented on host device",
129  // Implementing get_local_id() on host device requires ABI breaking change.
130  // It requires extending class group with local item which represents
131  // local_id. Currently this local id is only used in nd_item and group
132  // cannot access it.
133 #endif
134  }
135 
136  size_t get_local_linear_id() const {
137  return get_local_linear_id_impl<Dimensions>();
138  }
139 
140  range<Dimensions> get_local_range() const { return localRange; }
141 
142  size_t get_local_range(int dimension) const { return localRange[dimension]; }
143 
144  size_t get_local_linear_range() const {
145  return get_local_linear_range_impl();
146  }
147 
148  range<Dimensions> get_group_range() const { return groupRange; }
149 
150  size_t get_group_range(int dimension) const {
151  return get_group_range()[dimension];
152  }
153 
154  size_t get_group_linear_range() const {
155  return get_group_linear_range_impl();
156  }
157 
158  range<Dimensions> get_max_local_range() const { return get_local_range(); }
159 
160  size_t operator[](int dimension) const { return index[dimension]; }
161 
162  __SYCL2020_DEPRECATED("use sycl::group::get_group_linear_id() instead")
163  size_t get_linear_id() const { return get_group_linear_id(); }
164 
165  size_t get_group_linear_id() const { return get_group_linear_id_impl(); }
166 
167  bool leader() const { return (get_local_linear_id() == 0); }
168 
169  template <typename WorkItemFunctionT>
170  void parallel_for_work_item(WorkItemFunctionT Func) const {
171  // need barriers to enforce SYCL semantics for the work item loop -
172  // compilers are expected to optimize when possible
174 #ifdef __SYCL_DEVICE_ONLY__
175  range<Dimensions> GlobalSize{
176  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
177  range<Dimensions> LocalSize{
178  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
179  id<Dimensions> GlobalId{
180  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
181  id<Dimensions> LocalId{
182  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
183 
184  // no 'iterate' in the device code variant, because
185  // (1) this code is already invoked by each work item as a part of the
186  // enclosing parallel_for_work_group kernel
187  // (2) the range this pfwi iterates over matches work group size exactly
188  item<Dimensions, false> GlobalItem =
189  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
190  item<Dimensions, false> LocalItem =
191  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
192  h_item<Dimensions> HItem =
193  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
194 
195  Func(HItem);
196 #else
197  id<Dimensions> GroupStartID = index * localRange;
198 
199  // ... host variant needs explicit 'iterate' because it is serial
201  localRange, [&](const id<Dimensions> &LocalID) {
202  item<Dimensions, false> GlobalItem =
203  detail::Builder::createItem<Dimensions, false>(
204  globalRange, GroupStartID + LocalID);
205  item<Dimensions, false> LocalItem =
206  detail::Builder::createItem<Dimensions, false>(localRange,
207  LocalID);
208  h_item<Dimensions> HItem =
209  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
210  Func(HItem);
211  });
212 #endif // __SYCL_DEVICE_ONLY__
213  // Need both barriers here - before and after the parallel_for_work_item
214  // (PFWI). There can be work group scope code after the PFWI which reads
215  // work group local data written within this PFWI. Back Ends are expected to
216  // optimize away unneeded barriers (e.g. two barriers in a row).
218  }
219 
220  template <typename WorkItemFunctionT>
222  WorkItemFunctionT Func) const {
224 #ifdef __SYCL_DEVICE_ONLY__
225  range<Dimensions> GlobalSize{
226  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
227  range<Dimensions> LocalSize{
228  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
229  id<Dimensions> GlobalId{
230  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
231  id<Dimensions> LocalId{
232  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
233 
234  item<Dimensions, false> GlobalItem =
235  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
236  item<Dimensions, false> LocalItem =
237  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
238  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
239  GlobalItem, LocalItem, flexibleRange);
240 
241  // iterate over flexible range with work group size stride; each item
242  // performs flexibleRange/LocalSize iterations (if the former is divisible
243  // by the latter)
245  LocalId, LocalSize, flexibleRange,
246  [&](const id<Dimensions> &LogicalLocalID) {
247  HItem.setLogicalLocalID(LogicalLocalID);
248  Func(HItem);
249  });
250 #else
251  id<Dimensions> GroupStartID = index * localRange;
252 
254  localRange, [&](const id<Dimensions> &LocalID) {
255  item<Dimensions, false> GlobalItem =
256  detail::Builder::createItem<Dimensions, false>(
257  globalRange, GroupStartID + LocalID);
258  item<Dimensions, false> LocalItem =
259  detail::Builder::createItem<Dimensions, false>(localRange,
260  LocalID);
261  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
262  GlobalItem, LocalItem, flexibleRange);
263 
265  LocalID, localRange, flexibleRange,
266  [&](const id<Dimensions> &LogicalLocalID) {
267  HItem.setLogicalLocalID(LogicalLocalID);
268  Func(HItem);
269  });
270  });
271 #endif // __SYCL_DEVICE_ONLY__
273  }
274 
277  template <access::mode accessMode = access::mode::read_write>
278  void mem_fence(
279  typename detail::enable_if_t<accessMode == access::mode::read ||
280  accessMode == access::mode::write ||
281  accessMode == access::mode::read_write,
283  accessSpace = access::fence_space::global_and_local) const {
284  uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace);
285  // TODO: currently, there is no good way in SPIR-V to set the memory
286  // barrier only for load operations or only for store operations.
287  // The full read-and-write barrier is used and the template parameter
288  // 'accessMode' is ignored for now. Either SPIR-V or SYCL spec may be
289  // changed to address this discrepancy between SPIR-V and SYCL,
290  // or if we decide that 'accessMode' is the important feature then
291  // we can fix this later, for example, by using OpenCL 1.2 functions
292  // read_mem_fence() and write_mem_fence().
294  }
295 
301  template <typename dataT>
304  size_t numElements, size_t srcStride) const {
305  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
306  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
307 
309  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
310  numElements, srcStride, 0);
311  return device_event(E);
312  }
313 
319  template <typename dataT>
322  size_t numElements, size_t destStride) const {
323  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
324  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
325 
327  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
328  numElements, destStride, 0);
329  return device_event(E);
330  }
331 
337  template <typename T, access::address_space DestS, access::address_space SrcS>
340  size_t NumElements, size_t Stride) const {
341  static_assert(sizeof(bool) == sizeof(uint8_t),
342  "Async copy to/from bool memory is not supported.");
343  auto DestP =
344  multi_ptr<uint8_t, DestS>(reinterpret_cast<uint8_t *>(Dest.get()));
345  auto SrcP =
346  multi_ptr<uint8_t, SrcS>(reinterpret_cast<uint8_t *>(Src.get()));
347  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
348  }
349 
355  template <typename T, access::address_space DestS, access::address_space SrcS>
358  size_t NumElements, size_t Stride) const {
359  static_assert(sizeof(bool) == sizeof(uint8_t),
360  "Async copy to/from bool memory is not supported.");
362  auto DestP = multi_ptr<VecT, DestS>(reinterpret_cast<VecT *>(Dest.get()));
363  auto SrcP = multi_ptr<VecT, SrcS>(reinterpret_cast<VecT *>(Src.get()));
364  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
365  }
366 
372  template <typename dataT>
374  global_ptr<dataT> src,
375  size_t numElements) const {
376  return async_work_group_copy(dest, src, numElements, 1);
377  }
378 
384  template <typename dataT>
386  local_ptr<dataT> src,
387  size_t numElements) const {
388  return async_work_group_copy(dest, src, numElements, 1);
389  }
390 
391  template <typename... eventTN>
392  void wait_for(eventTN... Events) const {
393  waitForHelper(Events...);
394  }
395 
396  bool operator==(const group<Dimensions> &rhs) const {
397  bool Result = (rhs.globalRange == globalRange) &&
398  (rhs.localRange == localRange) && (rhs.index == index);
399  __SYCL_ASSERT(rhs.groupRange == groupRange &&
400  "inconsistent group class fields");
401  return Result;
402  }
403 
404  bool operator!=(const group<Dimensions> &rhs) const {
405  return !((*this) == rhs);
406  }
407 
408 private:
409  range<Dimensions> globalRange;
410  range<Dimensions> localRange;
411  range<Dimensions> groupRange;
412  id<Dimensions> index;
413 
414  template <int dims = Dimensions>
415  typename detail::enable_if_t<(dims == 1), size_t>
416  get_local_linear_id_impl() const {
417  id<Dimensions> localId = get_local_id();
418  return localId[0];
419  }
420 
421  template <int dims = Dimensions>
422  typename detail::enable_if_t<(dims == 2), size_t>
423  get_local_linear_id_impl() const {
424  id<Dimensions> localId = get_local_id();
425  return localId[0] * groupRange[1] + localId[1];
426  }
427 
428  template <int dims = Dimensions>
429  typename detail::enable_if_t<(dims == 3), size_t>
430  get_local_linear_id_impl() const {
431  id<Dimensions> localId = get_local_id();
432  return (localId[0] * groupRange[1] * groupRange[2]) +
433  (localId[1] * groupRange[2]) + localId[2];
434  }
435 
436  template <int dims = Dimensions>
437  typename detail::enable_if_t<(dims == 1), size_t>
438  get_local_linear_range_impl() const {
439  auto localRange = get_local_range();
440  return localRange[0];
441  }
442 
443  template <int dims = Dimensions>
444  typename detail::enable_if_t<(dims == 2), size_t>
445  get_local_linear_range_impl() const {
446  auto localRange = get_local_range();
447  return localRange[0] * localRange[1];
448  }
449 
450  template <int dims = Dimensions>
451  typename detail::enable_if_t<(dims == 3), size_t>
452  get_local_linear_range_impl() const {
453  auto localRange = get_local_range();
454  return localRange[0] * localRange[1] * localRange[2];
455  }
456 
457  template <int dims = Dimensions>
458  typename detail::enable_if_t<(dims == 1), size_t>
459  get_group_linear_range_impl() const {
460  auto groupRange = get_group_range();
461  return groupRange[0];
462  }
463 
464  template <int dims = Dimensions>
465  typename detail::enable_if_t<(dims == 2), size_t>
466  get_group_linear_range_impl() const {
467  auto groupRange = get_group_range();
468  return groupRange[0] * groupRange[1];
469  }
470 
471  template <int dims = Dimensions>
472  typename detail::enable_if_t<(dims == 3), size_t>
473  get_group_linear_range_impl() const {
474  auto groupRange = get_group_range();
475  return groupRange[0] * groupRange[1] * groupRange[2];
476  }
477 
478  template <int dims = Dimensions>
479  typename detail::enable_if_t<(dims == 1), size_t>
480  get_group_linear_id_impl() const {
481  return index[0];
482  }
483 
484  template <int dims = Dimensions>
485  typename detail::enable_if_t<(dims == 2), size_t>
486  get_group_linear_id_impl() const {
487  return index[0] * groupRange[1] + index[1];
488  }
489 
490  // SYCL specification 1.2.1rev5, section 4.7.6.5 "Buffer accessor":
491  // Whenever a multi-dimensional index is passed to a SYCL accessor the
492  // linear index is calculated based on the index {id1, id2, id3} provided
493  // and the range of the SYCL accessor {r1, r2, r3} according to row-major
494  // ordering as follows:
495  // id3 + (id2 · r3) + (id1 · r3 · r2) (4.3)
496  // section 4.8.1.8 "group class":
497  // size_t get_linear_id()const
498  // Get a linearized version of the work-group id. Calculating a linear
499  // work-group id from a multi-dimensional index follows the equation 4.3.
500  template <int dims = Dimensions>
501  typename detail::enable_if_t<(dims == 3), size_t>
502  get_group_linear_id_impl() const {
503  return (index[0] * groupRange[1] * groupRange[2]) +
504  (index[1] * groupRange[2]) + index[2];
505  }
506 
507  void waitForHelper() const {}
508 
509  void waitForHelper(device_event Event) const {
510  Event.wait();
511  }
512 
513  template <typename T, typename... Ts>
514  void waitForHelper(T E, Ts... Es) const {
515  waitForHelper(E);
516  waitForHelper(Es...);
517  }
518 
519 protected:
520  friend class detail::Builder;
522  const range<Dimensions> GroupRange, const id<Dimensions> &I)
523  : globalRange(G), localRange(L), groupRange(GroupRange), index(I) {
524  // Make sure local range divides global without remainder:
525  __SYCL_ASSERT(((G % L).size() == 0) &&
526  "global range is not multiple of local");
527  __SYCL_ASSERT((((G / L) - GroupRange).size() == 0) &&
528  "inconsistent group constructor arguments");
529  }
530 };
531 
532 template <int Dims>
533 __SYCL_DEPRECATED("use sycl::ext::oneapi::experimental::this_group() instead")
534 group<Dims> this_group() {
535 #ifdef __SYCL_DEVICE_ONLY__
536  return detail::Builder::getElement(detail::declptr<group<Dims>>());
537 #else
538  throw sycl::exception(
539  sycl::make_error_code(sycl::errc::feature_not_supported),
540  "Free function calls are not supported on host device");
541 #endif
542 }
543 
544 namespace ext {
545 namespace oneapi {
546 namespace experimental {
547 template <int Dims> group<Dims> this_group() {
548 #ifdef __SYCL_DEVICE_ONLY__
549  return sycl::detail::Builder::getElement(
551 #else
552  throw sycl::exception(
553  sycl::make_error_code(sycl::errc::feature_not_supported),
554  "Free function calls are not supported on host device");
555 #endif
556 }
557 } // namespace experimental
558 } // namespace oneapi
559 } // namespace ext
560 } // namespace sycl
561 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::group::async_work_group_copy
detail::enable_if_t<!detail::is_bool< dataT >::value, device_event > async_work_group_copy(local_ptr< dataT > dest, global_ptr< dataT > src, size_t numElements, size_t srcStride) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: group.hpp:303
spirv_ops.hpp
cl::sycl::group::get_local_range
size_t get_local_range(int dimension) const
Definition: group.hpp:142
__spirv_ControlBarrier
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
cl::sycl::group::mem_fence
void mem_fence(typename detail::enable_if_t< accessMode==access::mode::read||accessMode==access::mode::write||accessMode==access::mode::read_write, access::fence_space > accessSpace=access::fence_space::global_and_local) const
Executes a work-group mem-fence with memory ordering on the local address space, global address space...
Definition: group.hpp:278
cl::sycl::detail::Builder
Definition: helpers.hpp:68
__spv::Scope::Workgroup
@ Workgroup
Definition: spirv_types.hpp:30
T
__SYCL2020_DEPRECATED
#define __SYCL2020_DEPRECATED(message)
Definition: defines_elementary.hpp:56
cl::sycl::item
Identifies an instance of the function object executing at each point in a range.
Definition: helpers.hpp:28
cl::sycl::group::async_work_group_copy
detail::enable_if_t<!detail::is_bool< dataT >::value, device_event > async_work_group_copy(global_ptr< dataT > dest, local_ptr< dataT > src, size_t numElements, size_t destStride) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: group.hpp:321
cl::sycl::private_memory::operator()
T & operator()(const h_item< Dimensions > &Id)
Definition: group.hpp:65
cl::sycl::multi_ptr::get
pointer_t get() const
Definition: multi_ptr.hpp:232
cl::sycl::group::operator!=
bool operator!=(const group< Dimensions > &rhs) const
Definition: group.hpp:404
__SYCL_OpGroupAsyncCopyGlobalToLocal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:880
cl::sycl::id< Dimensions >
cl::sycl::group::get_group_range
size_t get_group_range(int dimension) const
Definition: group.hpp:150
__SYCL_DEPRECATED
#define __SYCL_DEPRECATED(message)
Definition: defines_elementary.hpp:47
cl::sycl::detail::workGroupBarrier
static void workGroupBarrier()
Definition: group.hpp:33
cl::sycl::group
Encapsulates all functionality required to represent a particular work-group within a parallel execut...
Definition: helpers.hpp:29
cl::sycl::detail::declptr
T * declptr()
Definition: helpers.hpp:56
cl::sycl::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:121
cl::sycl::group::operator[]
size_t operator[](int dimension) const
Definition: group.hpp:160
cl::sycl::detail::get_local_linear_id
Group::linear_id_type get_local_linear_id(Group g)
cl::sycl::h_item::setLogicalLocalID
void setLogicalLocalID(const id< dimensions > &ID)
Definition: h_item.hpp:124
helpers.hpp
spirv_vars.hpp
sycl
Definition: invoke_simd.hpp:68
cl::sycl::multi_ptr
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: atomic.hpp:32
__ocl_event_t
void * __ocl_event_t
Definition: spirv_types.hpp:174
cl::sycl::h_item::get_physical_local
item< dimensions, false > get_physical_local() const
Definition: h_item.hpp:43
cl::sycl::group::wait_for
void wait_for(eventTN... Events) const
Definition: group.hpp:392
cl::sycl::detail::write
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:110
id.hpp
cl::sycl::range< Dimensions >
cl::sycl::group::leader
bool leader() const
Definition: group.hpp:167
cl::sycl::group::async_work_group_copy
device_event async_work_group_copy(global_ptr< dataT > dest, local_ptr< dataT > src, size_t numElements) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: group.hpp:385
cl::sycl::group::async_work_group_copy
detail::enable_if_t< detail::is_vector_bool< T >::value, device_event > async_work_group_copy(multi_ptr< T, DestS > Dest, multi_ptr< T, SrcS > Src, size_t NumElements, size_t Stride) const
Specialization for vector bool type.
Definition: group.hpp:357
cl::sycl::group::operator==
bool operator==(const group< Dimensions > &rhs) const
Definition: group.hpp:396
cl::sycl::private_memory
Definition: group.hpp:52
cl::sycl::access::fence_space
fence_space
Definition: access.hpp:37
cl::sycl::detail::ConvertToOpenCLType_t
conditional_t< TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::type, conditional_t< TryToGetPointerT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetPointerVecT< SelectMatchingOpenCLType_t< T > >::type, SelectMatchingOpenCLType_t< T > >> ConvertToOpenCLType_t
Definition: generic_type_traits.hpp:472
cl::sycl::memory_scope
memory_scope
Definition: memory_enums.hpp:26
cl::sycl::detail::NDLoop
Generates an NDIMS-dimensional perfect loop nest.
Definition: common.hpp:288
cl::sycl::detail::getSPIRVMemorySemanticsMask
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:200
cl::sycl::group::get_local_linear_id
size_t get_local_linear_id() const
Definition: group.hpp:136
cl::sycl::h_item
Identifies an instance of a group::parallel_for_work_item function object executing at each point in ...
Definition: helpers.hpp:33
generic_type_traits.hpp
range.hpp
spirv.hpp
cl::sycl::group::get_local_id
id< Dimensions > get_local_id() const
Definition: group.hpp:123
cl::sycl::group< 1 >::linear_id_type
size_t linear_id_type
Definition: group.hpp:98
cl::sycl::group::get_group_id
id< Dimensions > get_group_id() const
Definition: group.hpp:113
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
__spirv_MemoryBarrier
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:36
cl::sycl::group::async_work_group_copy
device_event async_work_group_copy(local_ptr< dataT > dest, global_ptr< dataT > src, size_t numElements) const
Asynchronously copies a number of elements specified by numElements from the source pointed by src to...
Definition: group.hpp:373
cl::sycl::group::get_global_range
size_t get_global_range(int dimension) const
Definition: group.hpp:119
cl::sycl::group::group
group(const range< Dimensions > &G, const range< Dimensions > &L, const range< Dimensions > GroupRange, const id< Dimensions > &I)
Definition: group.hpp:521
__SYCL_OpGroupAsyncCopyLocalToGlobal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:892
cl::sycl::private_memory::private_memory
private_memory(const group< Dimensions > &G)
Definition: group.hpp:55
cl::sycl::group::get_local_linear_range
size_t get_local_linear_range() const
Definition: group.hpp:144
__spv::MemorySemanticsMask::SequentiallyConsistent
@ SequentiallyConsistent
Definition: spirv_types.hpp:89
cl::sycl::item::get_linear_id
size_t __SYCL_ALWAYS_INLINE get_linear_id() const
Definition: item.hpp:95
cl::sycl::group::get_local_range
range< Dimensions > get_local_range() const
Definition: group.hpp:140
cl::sycl::detail::change_base_type_t
typename change_base_type< T, B >::type change_base_type_t
Definition: type_traits.hpp:110
__spv::MemorySemanticsMask::WorkgroupMemory
@ WorkgroupMemory
Definition: spirv_types.hpp:92
cl::sycl::device_event
Encapsulates a single SYCL device event which is available only within SYCL kernel functions and can ...
Definition: device_event.hpp:22
cl::sycl::group::get_max_local_range
range< Dimensions > get_max_local_range() const
Definition: group.hpp:158
cl::sycl::group::get_group_range
range< Dimensions > get_group_range() const
Definition: group.hpp:148
cl::sycl::group::get_group_linear_range
size_t get_group_linear_range() const
Definition: group.hpp:154
cl::sycl::group::async_work_group_copy
detail::enable_if_t< detail::is_scalar_bool< T >::value, device_event > async_work_group_copy(multi_ptr< T, DestS > Dest, multi_ptr< T, SrcS > Src, size_t NumElements, size_t Stride) const
Specialization for scalar bool type.
Definition: group.hpp:339
device_event.hpp
cl::sycl::group::get_global_range
range< Dimensions > get_global_range() const
Definition: group.hpp:117
memory_enums.hpp
cl::sycl::group::parallel_for_work_item
void parallel_for_work_item(WorkItemFunctionT Func) const
Definition: group.hpp:170
common.hpp
cl::sycl::exception
Definition: exception.hpp:63
h_item.hpp
__SYCL_ASSERT
#define __SYCL_ASSERT(x)
Definition: common.hpp:109
cl::sycl::ext::oneapi::experimental::this_group
group< Dims > this_group()
Definition: group.hpp:547
cl::sycl::group::parallel_for_work_item
void parallel_for_work_item(range< Dimensions > flexibleRange, WorkItemFunctionT Func) const
Definition: group.hpp:221
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
pointers.hpp
cl::sycl::Dimensions
Dimensions
Definition: backend.hpp:138
PI_INVALID_DEVICE
@ PI_INVALID_DEVICE
Definition: pi.h:94
cl::sycl::group::get_group_linear_id
size_t get_group_linear_id() const
Definition: group.hpp:165
cl::sycl::group::get_group_id
size_t get_group_id(int dimension) const
Definition: group.hpp:115
spirv_types.hpp
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12