DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel_program_cache.hpp
Go to the documentation of this file.
1 //==--- kernel_program_cache.hpp - Cache for kernel and program -*- C++-*---==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include "sycl/exception.hpp"
13 #include <detail/platform_impl.hpp>
14 #include <sycl/detail/common.hpp>
15 #include <sycl/detail/locked.hpp>
16 #include <sycl/detail/os_util.hpp>
17 #include <sycl/detail/ur.hpp>
18 #include <sycl/detail/util.hpp>
19 
20 #include <atomic>
21 #include <condition_variable>
22 #include <mutex>
23 #include <type_traits>
24 
25 #include <boost/unordered/unordered_flat_map.hpp>
26 #include <boost/unordered_map.hpp>
27 
28 // For testing purposes
29 class MockKernelProgramCache;
30 
31 namespace sycl {
32 inline namespace _V1 {
33 namespace detail {
34 class context_impl;
36 public:
39  struct BuildError {
40  std::string Msg;
41  int32_t Code;
42 
43  bool isFilledIn() const { return !Msg.empty(); }
44  };
45 
48 
53  template <typename T> struct BuildResult {
54  T Val;
55  std::atomic<BuildState> State{BuildState::BS_Initial};
56  BuildError Error{"", 0};
57 
68  std::condition_variable MBuildCV;
70  std::mutex MBuildResultMutex;
71 
74  BuildState To;
75  std::unique_lock<std::mutex> Lock(MBuildResultMutex);
76  MBuildCV.wait(Lock, [&] {
77  To = State;
78  return State != From;
79  });
80  return To;
81  }
82 
83  void updateAndNotify(BuildState DesiredState) {
84  {
85  std::lock_guard<std::mutex> Lock(MBuildResultMutex);
86  State.store(DesiredState);
87  }
88  MBuildCV.notify_all();
89  }
90  };
91 
92  struct ProgramBuildResult : public BuildResult<ur_program_handle_t> {
95  Val = nullptr;
96  }
98  : Plugin(Plugin) {
99  Val = nullptr;
100  this->State.store(InitialState);
101  }
103  if (Val) {
104  ur_result_t Err = Plugin->call_nocheck(urProgramRelease, Val);
106  }
107  }
108  };
109  using ProgramBuildResultPtr = std::shared_ptr<ProgramBuildResult>;
110 
111  /* Drop LinkOptions and CompileOptions from CacheKey since they are only used
112  * when debugging environment variables are set and we can just ignore them
113  * since all kernels will have their build options overridden with the same
114  * string*/
116  std::pair<std::pair<SerializedObj, std::uintptr_t>, ur_device_handle_t>;
117  using CommonProgramKeyT = std::pair<std::uintptr_t, ur_device_handle_t>;
118 
119  struct ProgramCache {
120  ::boost::unordered_map<ProgramCacheKeyT, ProgramBuildResultPtr> Cache;
121  ::boost::unordered_multimap<CommonProgramKeyT, ProgramCacheKeyT> KeyMap;
122 
123  size_t size() const noexcept { return Cache.size(); }
124  };
125 
127 
129  std::pair<ur_kernel_handle_t, const KernelArgMask *>;
130  struct KernelBuildResult : public BuildResult<KernelArgMaskPairT> {
133  Val.first = nullptr;
134  }
136  if (Val.first) {
137  ur_result_t Err = Plugin->call_nocheck(urKernelRelease, Val.first);
139  }
140  }
141  };
142  using KernelBuildResultPtr = std::shared_ptr<KernelBuildResult>;
143 
145  ::boost::unordered_map<std::string, KernelBuildResultPtr>;
146  using KernelCacheT =
147  ::boost::unordered_map<ur_program_handle_t, KernelByNameT>;
148 
150  std::tuple<SerializedObj, ur_device_handle_t, std::string, std::string>;
152  std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
153  ur_program_handle_t>;
154  // This container is used as a fast path for retrieving cached kernels.
155  // unordered_flat_map is used here to reduce lookup overhead.
156  // The slow path is used only once for each newly created kernel, so the
157  // higher overhead of insertion that comes with unordered_flat_map is more
158  // of an issue there. For that reason, those use regular unordered maps.
160  ::boost::unordered_flat_map<KernelFastCacheKeyT, KernelFastCacheValT>;
161 
162  ~KernelProgramCache() = default;
163 
164  void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; }
165 
167  return {MCachedPrograms, MProgramCacheMutex};
168  }
169 
171  return {MKernelsPerProgramCache, MKernelsPerProgramCacheMutex};
172  }
173 
174  std::pair<ProgramBuildResultPtr, bool>
176  auto LockedCache = acquireCachedPrograms();
177  auto &ProgCache = LockedCache.get();
178  auto [It, DidInsert] = ProgCache.Cache.try_emplace(CacheKey, nullptr);
179  if (DidInsert) {
180  It->second = std::make_shared<ProgramBuildResult>(getPlugin());
181  // Save reference between the common key and the full key.
182  CommonProgramKeyT CommonKey =
183  std::make_pair(CacheKey.first.second, CacheKey.second);
184  ProgCache.KeyMap.emplace(CommonKey, CacheKey);
185  }
186  return std::make_pair(It->second, DidInsert);
187  }
188 
189  // Used in situation where you have several cache keys corresponding to the
190  // same program. An example would be a multi-device build, or use of virtual
191  // functions in kernels.
192  //
193  // Returns whether or not an insertion took place.
194  bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey,
195  ur_program_handle_t Program) {
196  auto LockedCache = acquireCachedPrograms();
197  auto &ProgCache = LockedCache.get();
198  auto [It, DidInsert] = ProgCache.Cache.try_emplace(CacheKey, nullptr);
199  if (DidInsert) {
200  It->second = std::make_shared<ProgramBuildResult>(getPlugin(),
202  It->second->Val = Program;
203  // Save reference between the common key and the full key.
204  CommonProgramKeyT CommonKey =
205  std::make_pair(CacheKey.first.second, CacheKey.second);
206  ProgCache.KeyMap.emplace(CommonKey, CacheKey);
207  }
208  return DidInsert;
209  }
210 
211  std::pair<KernelBuildResultPtr, bool>
212  getOrInsertKernel(ur_program_handle_t Program,
213  const std::string &KernelName) {
214  auto LockedCache = acquireKernelsPerProgramCache();
215  auto &Cache = LockedCache.get()[Program];
216  auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr);
217  if (DidInsert)
218  It->second = std::make_shared<KernelBuildResult>(getPlugin());
219  return std::make_pair(It->second, DidInsert);
220  }
221 
222  template <typename KeyT>
224  std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
225  auto It = MKernelFastCache.find(CacheKey);
226  if (It != MKernelFastCache.end()) {
227  return It->second;
228  }
229  return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
230  }
231 
232  template <typename KeyT, typename ValT>
233  void saveKernel(KeyT &&CacheKey, ValT &&CacheVal) {
234  std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
235  // if no insertion took place, thus some other thread has already inserted
236  // smth in the cache
237  MKernelFastCache.emplace(CacheKey, CacheVal);
238  }
239 
243  void reset() {
244  std::lock_guard<std::mutex> L1(MProgramCacheMutex);
245  std::lock_guard<std::mutex> L2(MKernelsPerProgramCacheMutex);
246  std::lock_guard<std::mutex> L3(MKernelFastCacheMutex);
247  MCachedPrograms = ProgramCache{};
248  MKernelsPerProgramCache = KernelCacheT{};
249  MKernelFastCache = KernelFastCacheT{};
250  }
251 
274  template <errc Errc, typename GetCachedBuildFT, typename BuildFT>
275  auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build) {
277  constexpr size_t MaxAttempts = 2;
278  for (size_t AttemptCounter = 0;; ++AttemptCounter) {
279  auto Res = GetCachedBuild();
280  auto &BuildResult = Res.first;
283  if (!BuildResult->State.compare_exchange_strong(Expected, Desired)) {
284  // no insertion took place, thus some other thread has already inserted
285  // smth in the cache
287 
288  // Build succeeded.
289  if (NewState == BuildState::BS_Done)
290  return BuildResult;
291 
292  // Build failed, or this is the last attempt.
293  if (NewState == BuildState::BS_Failed ||
294  AttemptCounter + 1 == MaxAttempts) {
296  throw detail::set_ur_error(
299  else
300  throw exception();
301  }
302 
303  // NewState == BuildState::BS_Initial
304  // Build state was set back to the initial state,
305  // which means to go back to the beginning of the
306  // loop and try again.
307  continue;
308  }
309 
310  // only the building thread will run this
311  try {
312  BuildResult->Val = Build();
313 
315  return BuildResult;
316  } catch (const exception &Ex) {
317  BuildResult->Error.Msg = Ex.what();
319  if (Ex.code() == errc::memory_allocation ||
320  BuildResult->Error.Code == UR_RESULT_ERROR_OUT_OF_RESOURCES ||
321  BuildResult->Error.Code == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
322  reset();
324  continue;
325  }
326 
328  std::rethrow_exception(std::current_exception());
329  } catch (...) {
331  std::rethrow_exception(std::current_exception());
332  }
333  }
334  }
335 
336 private:
337  std::mutex MProgramCacheMutex;
338  std::mutex MKernelsPerProgramCacheMutex;
339 
340  ProgramCache MCachedPrograms;
341  KernelCacheT MKernelsPerProgramCache;
342  ContextPtr MParentContext;
343 
344  std::mutex MKernelFastCacheMutex;
345  KernelFastCacheT MKernelFastCache;
346  friend class ::MockKernelProgramCache;
347 
348  const PluginPtr &getPlugin();
349 };
350 } // namespace detail
351 } // namespace _V1
352 } // namespace sycl
std::shared_ptr< KernelBuildResult > KernelBuildResultPtr
::boost::unordered_map< ur_program_handle_t, KernelByNameT > KernelCacheT
std::pair< std::uintptr_t, ur_device_handle_t > CommonProgramKeyT
std::shared_ptr< ProgramBuildResult > ProgramBuildResultPtr
bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey, ur_program_handle_t Program)
::boost::unordered_flat_map< KernelFastCacheKeyT, KernelFastCacheValT > KernelFastCacheT
BuildState
Denotes the state of a build.
void setContextPtr(const ContextPtr &AContext)
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(ur_program_handle_t Program, const std::string &KernelName)
std::pair< std::pair< SerializedObj, std::uintptr_t >, ur_device_handle_t > ProgramCacheKeyT
std::pair< ur_kernel_handle_t, const KernelArgMask * > KernelArgMaskPairT
std::tuple< ur_kernel_handle_t, std::mutex *, const KernelArgMask *, ur_program_handle_t > KernelFastCacheValT
Locked< KernelCacheT > acquireKernelsPerProgramCache()
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
::boost::unordered_map< std::string, KernelBuildResultPtr > KernelByNameT
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
std::tuple< SerializedObj, ur_device_handle_t, std::string, std::string > KernelFastCacheKeyT
Represents a reference to value with appropriate lock acquired.
Definition: locked.hpp:23
const char * what() const noexcept final
Definition: exception.cpp:49
const std::error_code & code() const noexcept
Definition: exception.cpp:43
std::vector< bool > KernelArgMask
std::shared_ptr< plugin > PluginPtr
Definition: ur.hpp:60
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:35
int32_t get_ur_error(const exception &e)
Definition: exception.hpp:156
exception set_ur_error(exception &&e, int32_t ur_err)
Definition: exception.hpp:157
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:65
Definition: access.hpp:18
#define __SYCL_CHECK_OCL_CODE_NO_EXC(X)
Definition: plugin.hpp:36
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Denotes pointer to some entity with its general state and build error.
std::mutex MBuildResultMutex
A mutex to be employed along with MBuildCV.
BuildState waitUntilTransition(BuildState From=BuildState::BS_InProgress)
std::condition_variable MBuildCV
Condition variable to signal that build result is ready.
ProgramBuildResult(const PluginPtr &Plugin, BuildState InitialState)
::boost::unordered_map< ProgramCacheKeyT, ProgramBuildResultPtr > Cache
::boost::unordered_multimap< CommonProgramKeyT, ProgramCacheKeyT > KeyMap
C++ utilities for Unified Runtime integration.