DPC++ Runtime
Runtime libraries for oneAPI DPC++
fpga_lsu.hpp
Go to the documentation of this file.
1 //==-------------- fpga_lsu.hpp --- SYCL FPGA LSU Extensions ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #pragma once
9 
10 #include "fpga_utils.hpp"
11 #include <sycl/detail/defines.hpp>
13 #include <sycl/pointers.hpp>
14 
15 namespace sycl {
17 namespace ext::intel::experimental {
18 
19 constexpr uint8_t BURST_COALESCE = 0x1;
20 constexpr uint8_t CACHE = 0x2;
21 constexpr uint8_t STATICALLY_COALESCE = 0x4;
22 constexpr uint8_t PREFETCH = 0x8;
23 
24 template <int32_t _N> struct burst_coalesce_impl {
25  static constexpr int32_t value = _N;
26  static constexpr int32_t default_value = 0;
27 };
28 
29 template <int32_t _N> struct cache {
30  static constexpr int32_t value = _N;
31  static constexpr int32_t default_value = 0;
32 };
33 
34 template <int32_t _N> struct prefetch_impl {
35  static constexpr int32_t value = _N;
36  static constexpr int32_t default_value = 0;
37 };
38 
39 template <int32_t _N> struct statically_coalesce_impl {
40  static constexpr int32_t value = _N;
41  static constexpr int32_t default_value = 1;
42 };
43 
44 template <bool _B> using burst_coalesce = burst_coalesce_impl<_B>;
45 template <bool _B> using prefetch = prefetch_impl<_B>;
47 
48 template <class... _mem_access_params> class lsu final {
49 public:
50  lsu() = delete;
51 
52  template <typename _T, access::address_space _space,
53  access::decorated _Is_decorated, typename _propertiesT>
54  static _T load(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr,
55  _propertiesT Properties) {
56  check_space<_space>();
57  check_load();
58 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
59  // Get latency control properties
60  using _latency_anchor_id_prop = typename detail::GetOrDefaultValT<
61  _propertiesT, latency_anchor_id_key,
63  using _latency_constraint_prop = typename detail::GetOrDefaultValT<
64  _propertiesT, latency_constraint_key,
66 
67  // Get latency control property values
68  static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
69  static constexpr int32_t _target_anchor = _latency_constraint_prop::target;
70  static constexpr latency_control_type _control_type =
71  _latency_constraint_prop::type;
72  static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
73 
74  int32_t _control_type_code = 0; // latency_control_type::none is default
75  if constexpr (_control_type == latency_control_type::exact) {
76  _control_type_code = 1;
77  } else if constexpr (_control_type == latency_control_type::max) {
78  _control_type_code = 2;
79  } else if constexpr (_control_type == latency_control_type::min) {
80  _control_type_code = 3;
81  }
82 
83  return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
84  _control_type_code, _relative_cycle);
85 #else
86  (void)Properties;
87  return *Ptr;
88 #endif
89  }
90 
91  template <typename _T, access::address_space _space,
92  access::decorated _Is_decorated>
93  static _T load(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr) {
94  return load<_T, _space>(Ptr, oneapi::experimental::properties{});
95  }
96 
97  template <typename _T, access::address_space _space,
98  access::decorated _Is_decorated, typename _propertiesT>
99  static void store(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr, _T Val,
100  _propertiesT Properties) {
101  check_space<_space>();
102  check_store();
103 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
104  // Get latency control properties
105  using _latency_anchor_id_prop = typename detail::GetOrDefaultValT<
106  _propertiesT, latency_anchor_id_key,
108  using _latency_constraint_prop = typename detail::GetOrDefaultValT<
109  _propertiesT, latency_constraint_key,
111 
112  // Get latency control property values
113  static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
114  static constexpr int32_t _target_anchor = _latency_constraint_prop::target;
115  static constexpr latency_control_type _control_type =
116  _latency_constraint_prop::type;
117  static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
118 
119  int32_t _control_type_code = 0; // latency_control_type::none is default
120  if constexpr (_control_type == latency_control_type::exact) {
121  _control_type_code = 1;
122  } else if constexpr (_control_type == latency_control_type::max) {
123  _control_type_code = 2;
124  } else if constexpr (_control_type == latency_control_type::min) {
125  _control_type_code = 3;
126  }
127 
128  *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
129  _control_type_code, _relative_cycle) = Val;
130 #else
131  (void)Properties;
132  *Ptr = Val;
133 #endif
134  }
135 
136  template <typename _T, access::address_space _space,
137  access::decorated _Is_decorated>
138  static void store(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr, _T Val) {
139  store<_T, _space>(Ptr, Val, oneapi::experimental::properties{});
140  }
141 
142 private:
143  static constexpr int32_t _burst_coalesce_val =
144  detail::_GetValue<burst_coalesce_impl, _mem_access_params...>::value;
145  static constexpr uint8_t _burst_coalesce =
146  _burst_coalesce_val == 1 ? BURST_COALESCE : 0;
147 
148  static constexpr int32_t _cache_val =
149  detail::_GetValue<cache, _mem_access_params...>::value;
150  static constexpr uint8_t _cache = (_cache_val > 0) ? CACHE : 0;
151 
152  static constexpr int32_t _statically_coalesce_val =
153  detail::_GetValue<statically_coalesce_impl, _mem_access_params...>::value;
154  static constexpr uint8_t _dont_statically_coalesce =
155  _statically_coalesce_val == 0 ? STATICALLY_COALESCE : 0;
156 
157  static constexpr int32_t _prefetch_val =
158  detail::_GetValue<prefetch_impl, _mem_access_params...>::value;
159  static constexpr uint8_t _prefetch = _prefetch_val ? PREFETCH : 0;
160 
161  static_assert(_cache_val >= 0, "cache size parameter must be non-negative");
162 
163  template <access::address_space _space> static void check_space() {
164  static_assert(
165  _space == access::address_space::global_space ||
166  _space == access::address_space::ext_intel_global_device_space ||
167  _space == access::address_space::ext_intel_global_host_space,
168  "lsu controls are only supported for global_ptr, "
169  "device_ptr, and host_ptr objects");
170  }
171 
172  static void check_load() {
173  static_assert(_cache == 0 || _burst_coalesce == BURST_COALESCE,
174  "unable to implement a cache without a burst coalescer");
175  static_assert(_prefetch == 0 || _burst_coalesce == 0,
176  "unable to implement a prefetcher and a burst coalescer "
177  "simulataneously");
178  static_assert(
179  _prefetch == 0 || _cache == 0,
180  "unable to implement a prefetcher and a cache simulataneously");
181  }
182  static void check_store() {
183  static_assert(_cache == 0, "unable to implement a store LSU with a cache.");
184  static_assert(_prefetch == 0,
185  "unable to implement a store LSU with a prefetcher.");
186  }
187 
188 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
189  // FPGA BE will recognize this function and extract its arguments.
190  // TODO: Pass latency control params via __builtin_intel_fpga_mem when ready.
191  template <typename _T>
192  static _T *__latency_control_mem_wrapper(_T *Ptr, int32_t AnchorID,
193  int32_t TargetAnchor, int32_t Type,
194  int32_t Cycle) {
195  return __builtin_intel_fpga_mem(
196  Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch,
197  _cache_val);
198  }
199 #endif
200 };
201 
202 } // namespace ext::intel::experimental
203 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
204 } // namespace sycl
sycl::_V1::ext::intel::experimental::detail::GetOrDefaultValT
Definition: fpga_utils.hpp:40
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
sycl::_V1::ext::oneapi::experimental::properties
Definition: properties.hpp:126
properties.hpp
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::ext::intel::experimental::cache
Definition: fpga_lsu.hpp:29
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
sycl::_V1::ext::intel::experimental::prefetch_impl
Definition: fpga_lsu.hpp:34
sycl::_V1::ext::intel::experimental::latency_constraint_key
Definition: properties.hpp:32
sycl::_V1::ext::intel::experimental::detail::_GetValue
Definition: fpga_utils.hpp:24
sycl::_V1::ext::intel::experimental::statically_coalesce_impl
Definition: fpga_lsu.hpp:39
sycl::_V1::ext::intel::STATICALLY_COALESCE
constexpr uint8_t STATICALLY_COALESCE
Definition: fpga_lsu.hpp:19
defines.hpp
sycl::_V1::access::target
target
Definition: access.hpp:18
sycl::_V1::ext::intel::experimental::lsu::store
static void store(sycl::multi_ptr< _T, _space, _Is_decorated > Ptr, _T Val, _propertiesT Properties)
Definition: fpga_lsu.hpp:99
sycl::_V1::access::decorated
decorated
Definition: access.hpp:59
fpga_utils.hpp
sycl::_V1::ext::intel::experimental::lsu::store
static void store(sycl::multi_ptr< _T, _space, _Is_decorated > Ptr, _T Val)
Definition: fpga_lsu.hpp:138
sycl::_V1::ext::intel::experimental::lsu
Definition: fpga_lsu.hpp:48
sycl::_V1::ext::intel::experimental::lsu::load
static _T load(sycl::multi_ptr< _T, _space, _Is_decorated > Ptr, _propertiesT Properties)
Definition: fpga_lsu.hpp:54
sycl::_V1::ext::intel::experimental::lsu::load
static _T load(sycl::multi_ptr< _T, _space, _Is_decorated > Ptr)
Definition: fpga_lsu.hpp:93
sycl::_V1::ext::intel::experimental::latency_control_type
latency_control_type
Definition: properties.hpp:18
sycl::_V1::ext::intel::BURST_COALESCE
constexpr uint8_t BURST_COALESCE
Definition: fpga_lsu.hpp:17
sycl::_V1::ext::oneapi::experimental::property_value
Definition: property_utils.hpp:22
sycl::_V1::ext::intel::experimental::latency_anchor_id_key
Definition: properties.hpp:25
sycl::_V1::ext::intel::experimental::burst_coalesce_impl
Definition: fpga_lsu.hpp:24
pointers.hpp
sycl::_V1::ext::intel::PREFETCH
constexpr uint8_t PREFETCH
Definition: fpga_lsu.hpp:20
sycl::_V1::ext::intel::CACHE
constexpr uint8_t CACHE
Definition: fpga_lsu.hpp:18
min
simd< _Tp, _Abi > min(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
sycl::_V1::access::address_space
address_space
Definition: access.hpp:47