DPC++ Runtime
Runtime libraries for oneAPI DPC++
fpga_lsu.hpp
Go to the documentation of this file.
1 //==-------------- fpga_lsu.hpp --- SYCL FPGA LSU Extensions ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #pragma once
9 
10 #include "fpga_utils.hpp"
12 #include <CL/sycl/pointers.hpp>
14 
16 namespace sycl {
17 namespace ext {
18 namespace intel {
19 namespace experimental {
20 
21 constexpr uint8_t BURST_COALESCE = 0x1;
22 constexpr uint8_t CACHE = 0x2;
23 constexpr uint8_t STATICALLY_COALESCE = 0x4;
24 constexpr uint8_t PREFETCH = 0x8;
25 
26 template <int32_t _N> struct burst_coalesce_impl {
27  static constexpr int32_t value = _N;
28  static constexpr int32_t default_value = 0;
29 };
30 
31 template <int32_t _N> struct cache {
32  static constexpr int32_t value = _N;
33  static constexpr int32_t default_value = 0;
34 };
35 
36 template <int32_t _N> struct prefetch_impl {
37  static constexpr int32_t value = _N;
38  static constexpr int32_t default_value = 0;
39 };
40 
41 template <int32_t _N> struct statically_coalesce_impl {
42  static constexpr int32_t value = _N;
43  static constexpr int32_t default_value = 1;
44 };
45 
46 template <bool _B> using burst_coalesce = burst_coalesce_impl<_B>;
47 template <bool _B> using prefetch = prefetch_impl<_B>;
49 
50 template <class... _mem_access_params> class lsu final {
51 public:
52  lsu() = delete;
53 
54  template <typename _T, access::address_space _space, typename _propertiesT>
55  static _T load(sycl::multi_ptr<_T, _space> Ptr, _propertiesT Properties) {
56  check_space<_space>();
57  check_load();
58 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
59  // Get latency control properties
60  using _latency_anchor_id_prop = typename detail::GetOrDefaultValT<
61  _propertiesT, latency_anchor_id_key,
63  using _latency_constraint_prop = typename detail::GetOrDefaultValT<
64  _propertiesT, latency_constraint_key,
66 
67  // Get latency control property values
68  static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
69  static constexpr int32_t _target_anchor = _latency_constraint_prop::target;
70  static constexpr latency_control_type _control_type =
71  _latency_constraint_prop::type;
72  static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
73 
74  int32_t _control_type_code = 0; // latency_control_type::none is default
75  if constexpr (_control_type == latency_control_type::exact) {
76  _control_type_code = 1;
77  } else if constexpr (_control_type == latency_control_type::max) {
78  _control_type_code = 2;
79  } else if constexpr (_control_type == latency_control_type::min) {
80  _control_type_code = 3;
81  }
82 
83  return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
84  _control_type_code, _relative_cycle);
85 #else
86  (void)Properties;
87  return *Ptr;
88 #endif
89  }
90 
91  template <typename _T, access::address_space _space>
93  return load<_T, _space>(Ptr, oneapi::experimental::properties{});
94  }
95 
96  template <typename _T, access::address_space _space, typename _propertiesT>
97  static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val,
98  _propertiesT Properties) {
99  check_space<_space>();
100  check_store();
101 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
102  // Get latency control properties
103  using _latency_anchor_id_prop = typename detail::GetOrDefaultValT<
104  _propertiesT, latency_anchor_id_key,
106  using _latency_constraint_prop = typename detail::GetOrDefaultValT<
107  _propertiesT, latency_constraint_key,
109 
110  // Get latency control property values
111  static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
112  static constexpr int32_t _target_anchor = _latency_constraint_prop::target;
113  static constexpr latency_control_type _control_type =
114  _latency_constraint_prop::type;
115  static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
116 
117  int32_t _control_type_code = 0; // latency_control_type::none is default
118  if constexpr (_control_type == latency_control_type::exact) {
119  _control_type_code = 1;
120  } else if constexpr (_control_type == latency_control_type::max) {
121  _control_type_code = 2;
122  } else if constexpr (_control_type == latency_control_type::min) {
123  _control_type_code = 3;
124  }
125 
126  *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
127  _control_type_code, _relative_cycle) = Val;
128 #else
129  (void)Properties;
130  *Ptr = Val;
131 #endif
132  }
133 
134  template <typename _T, access::address_space _space>
135  static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) {
136  store<_T, _space>(Ptr, Val, oneapi::experimental::properties{});
137  }
138 
139 private:
140  static constexpr int32_t _burst_coalesce_val =
141  detail::_GetValue<burst_coalesce_impl, _mem_access_params...>::value;
142  static constexpr uint8_t _burst_coalesce =
143  _burst_coalesce_val == 1 ? BURST_COALESCE : 0;
144 
145  static constexpr int32_t _cache_val =
146  detail::_GetValue<cache, _mem_access_params...>::value;
147  static constexpr uint8_t _cache = (_cache_val > 0) ? CACHE : 0;
148 
149  static constexpr int32_t _statically_coalesce_val =
150  detail::_GetValue<statically_coalesce_impl, _mem_access_params...>::value;
151  static constexpr uint8_t _dont_statically_coalesce =
152  _statically_coalesce_val == 0 ? STATICALLY_COALESCE : 0;
153 
154  static constexpr int32_t _prefetch_val =
155  detail::_GetValue<prefetch_impl, _mem_access_params...>::value;
156  static constexpr uint8_t _prefetch = _prefetch_val ? PREFETCH : 0;
157 
158  static_assert(_cache_val >= 0, "cache size parameter must be non-negative");
159 
160  template <access::address_space _space> static void check_space() {
161  static_assert(
162  _space == access::address_space::global_space ||
163  _space == access::address_space::ext_intel_global_device_space ||
164  _space == access::address_space::ext_intel_global_host_space,
165  "lsu controls are only supported for global_ptr, "
166  "device_ptr, and host_ptr objects");
167  }
168 
169  static void check_load() {
170  static_assert(_cache == 0 || _burst_coalesce == BURST_COALESCE,
171  "unable to implement a cache without a burst coalescer");
172  static_assert(_prefetch == 0 || _burst_coalesce == 0,
173  "unable to implement a prefetcher and a burst coalescer "
174  "simulataneously");
175  static_assert(
176  _prefetch == 0 || _cache == 0,
177  "unable to implement a prefetcher and a cache simulataneously");
178  }
179  static void check_store() {
180  static_assert(_cache == 0, "unable to implement a store LSU with a cache.");
181  static_assert(_prefetch == 0,
182  "unable to implement a store LSU with a prefetcher.");
183  }
184 
185 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
186  // FPGA BE will recognize this function and extract its arguments.
187  // TODO: Pass latency control params via __builtin_intel_fpga_mem when ready.
188  template <typename _T>
189  static _T *__latency_control_mem_wrapper(_T *Ptr, int32_t AnchorID,
190  int32_t TargetAnchor, int32_t Type,
191  int32_t Cycle) {
192  return __builtin_intel_fpga_mem(
193  Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch,
194  _cache_val);
195  }
196 #endif
197 };
198 
199 } // namespace experimental
200 } // namespace intel
201 } // namespace ext
202 } // namespace sycl
203 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::ext::intel::experimental::lsu::load
static _T load(sycl::multi_ptr< _T, _space > Ptr)
Definition: fpga_lsu.hpp:92
cl::sycl::ext::intel::experimental::latency_anchor_id_key
Definition: properties.hpp:25
cl::sycl::ext::intel::PREFETCH
constexpr uint8_t PREFETCH
Definition: fpga_lsu.hpp:21
cl::sycl::ext::intel::experimental::latency_constraint_key
Definition: properties.hpp:32
cl::sycl::ext::intel::experimental::burst_coalesce_impl
Definition: fpga_lsu.hpp:26
cl::sycl::ext::intel::experimental::detail::_GetValue
Definition: fpga_utils.hpp:23
properties.hpp
cl::sycl::ext::intel::STATICALLY_COALESCE
constexpr uint8_t STATICALLY_COALESCE
Definition: fpga_lsu.hpp:20
sycl
Definition: invoke_simd.hpp:68
cl::sycl::multi_ptr
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: atomic.hpp:33
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
cl::sycl::ext::intel::experimental::statically_coalesce_impl
Definition: fpga_lsu.hpp:41
cl::sycl::ext::intel::experimental::prefetch_impl
Definition: fpga_lsu.hpp:36
defines.hpp
cl::sycl::access::target
target
Definition: access.hpp:17
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::ext::intel::experimental::latency_control_type
latency_control_type
Definition: properties.hpp:18
cl::sycl::ext::intel::experimental::lsu::store
static void store(sycl::multi_ptr< _T, _space > Ptr, _T Val)
Definition: fpga_lsu.hpp:135
cl::sycl::ext::intel::experimental::detail::GetOrDefaultValT
Definition: fpga_utils.hpp:39
fpga_utils.hpp
cl::sycl::ext::intel::experimental::cache
Definition: fpga_lsu.hpp:31
cl::sycl::ext::oneapi::experimental::properties
Definition: properties.hpp:128
cl::sycl::ext::intel::BURST_COALESCE
constexpr uint8_t BURST_COALESCE
Definition: fpga_lsu.hpp:18
cl::sycl::ext::intel::experimental::lsu::load
static _T load(sycl::multi_ptr< _T, _space > Ptr, _propertiesT Properties)
Definition: fpga_lsu.hpp:55
cl::sycl::ext::intel::CACHE
constexpr uint8_t CACHE
Definition: fpga_lsu.hpp:19
pointers.hpp
cl::sycl::ext::oneapi::experimental::property_value
Definition: property_utils.hpp:23
cl::sycl::ext::intel::experimental::lsu
Definition: fpga_lsu.hpp:50
min
simd< _Tp, _Abi > min(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12
cl::sycl::ext::intel::experimental::lsu::store
static void store(sycl::multi_ptr< _T, _space > Ptr, _T Val, _propertiesT Properties)
Definition: fpga_lsu.hpp:97