17 namespace ext::intel::experimental {
25 static constexpr int32_t value = _N;
26 static constexpr int32_t default_value = 0;
29 template <
int32_t _N>
struct cache {
30 static constexpr int32_t value = _N;
31 static constexpr int32_t default_value = 0;
35 static constexpr int32_t value = _N;
36 static constexpr int32_t default_value = 0;
40 static constexpr int32_t value = _N;
41 static constexpr int32_t default_value = 1;
48 template <
class... _mem_access_params>
class lsu final {
54 static _T
load(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr,
55 _propertiesT Properties) {
56 check_space<_space>();
58 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
68 static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
71 _latency_constraint_prop::type;
72 static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
74 int32_t _control_type_code = 0;
75 if constexpr (_control_type == latency_control_type::exact) {
76 _control_type_code = 1;
78 _control_type_code = 2;
80 _control_type_code = 3;
83 return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
84 _control_type_code, _relative_cycle);
93 static _T
load(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr) {
99 static void store(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr, _T Val,
100 _propertiesT Properties) {
101 check_space<_space>();
103 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
113 static constexpr int32_t _anchor_id = _latency_anchor_id_prop::value;
116 _latency_constraint_prop::type;
117 static constexpr int32_t _relative_cycle = _latency_constraint_prop::cycle;
119 int32_t _control_type_code = 0;
120 if constexpr (_control_type == latency_control_type::exact) {
121 _control_type_code = 1;
123 _control_type_code = 2;
125 _control_type_code = 3;
128 *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
129 _control_type_code, _relative_cycle) = Val;
138 static void store(sycl::multi_ptr<_T, _space, _Is_decorated> Ptr, _T Val) {
143 static constexpr int32_t _burst_coalesce_val =
145 static constexpr uint8_t _burst_coalesce =
148 static constexpr int32_t _cache_val =
150 static constexpr uint8_t _cache = (_cache_val > 0) ?
CACHE : 0;
152 static constexpr int32_t _statically_coalesce_val =
154 static constexpr uint8_t _dont_statically_coalesce =
157 static constexpr int32_t _prefetch_val =
159 static constexpr uint8_t _prefetch = _prefetch_val ?
PREFETCH : 0;
161 static_assert(_cache_val >= 0,
"cache size parameter must be non-negative");
163 template <access::address_space _space>
static void check_space() {
165 _space == access::address_space::global_space ||
166 _space == access::address_space::ext_intel_global_device_space ||
167 _space == access::address_space::ext_intel_global_host_space,
168 "lsu controls are only supported for global_ptr, "
169 "device_ptr, and host_ptr objects");
172 static void check_load() {
174 "unable to implement a cache without a burst coalescer");
175 static_assert(_prefetch == 0 || _burst_coalesce == 0,
176 "unable to implement a prefetcher and a burst coalescer "
179 _prefetch == 0 || _cache == 0,
180 "unable to implement a prefetcher and a cache simulataneously");
182 static void check_store() {
183 static_assert(_cache == 0,
"unable to implement a store LSU with a cache.");
184 static_assert(_prefetch == 0,
185 "unable to implement a store LSU with a prefetcher.");
188 #if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
191 template <
typename _T>
192 static _T *__latency_control_mem_wrapper(_T *Ptr, int32_t AnchorID,
193 int32_t TargetAnchor, int32_t Type,
195 return __builtin_intel_fpga_mem(
196 Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch,