DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel_program_cache.hpp
Go to the documentation of this file.
1 //==--- kernel_program_cache.hpp - Cache for kernel and program -*- C++-*---==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
12 #include <detail/platform_impl.hpp>
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/locked.hpp>
15 #include <sycl/detail/os_util.hpp>
16 #include <sycl/detail/pi.hpp>
17 #include <sycl/detail/util.hpp>
18 
19 #include <atomic>
20 #include <condition_variable>
21 #include <mutex>
22 #include <type_traits>
23 
24 #include <boost/unordered/unordered_flat_map.hpp>
25 #include <boost/unordered_map.hpp>
26 
27 // For testing purposes
28 class MockKernelProgramCache;
29 
30 namespace sycl {
31 inline namespace _V1 {
32 namespace detail {
33 class context_impl;
35 public:
38  struct BuildError {
39  std::string Msg;
41 
42  bool isFilledIn() const { return !Msg.empty(); }
43  };
44 
47 
52  template <typename T> struct BuildResult {
53  T Val;
54  std::atomic<BuildState> State{BuildState::BS_Initial};
55  BuildError Error{"", 0};
56 
67  std::condition_variable MBuildCV;
69  std::mutex MBuildResultMutex;
70 
73  BuildState To;
74  std::unique_lock<std::mutex> Lock(MBuildResultMutex);
75  MBuildCV.wait(Lock, [&] {
76  To = State;
77  return State != From;
78  });
79  return To;
80  }
81 
82  void updateAndNotify(BuildState DesiredState) {
83  {
84  std::lock_guard<std::mutex> Lock(MBuildResultMutex);
85  State.store(DesiredState);
86  }
87  MBuildCV.notify_all();
88  }
89  };
90 
91  struct ProgramBuildResult : public BuildResult<sycl::detail::pi::PiProgram> {
94  Val = nullptr;
95  }
97  : Plugin(Plugin) {
98  Val = nullptr;
99  this->State.store(InitialState);
100  }
102  if (Val) {
104  Plugin->call_nocheck<PiApiKind::piProgramRelease>(Val);
106  }
107  }
108  };
109  using ProgramBuildResultPtr = std::shared_ptr<ProgramBuildResult>;
110 
111  /* Drop LinkOptions and CompileOptions from CacheKey since they are only used
112  * when debugging environment variables are set and we can just ignore them
113  * since all kernels will have their build options overridden with the same
114  * string*/
115  using ProgramCacheKeyT = std::pair<std::pair<SerializedObj, std::uintptr_t>,
118  std::pair<std::uintptr_t, sycl::detail::pi::PiDevice>;
119 
120  struct ProgramCache {
121  ::boost::unordered_map<ProgramCacheKeyT, ProgramBuildResultPtr> Cache;
122  ::boost::unordered_multimap<CommonProgramKeyT, ProgramCacheKeyT> KeyMap;
123 
124  size_t size() const noexcept { return Cache.size(); }
125  };
126 
128 
130  std::pair<sycl::detail::pi::PiKernel, const KernelArgMask *>;
131  struct KernelBuildResult : public BuildResult<KernelArgMaskPairT> {
134  Val.first = nullptr;
135  }
137  if (Val.first) {
139  Plugin->call_nocheck<PiApiKind::piKernelRelease>(Val.first);
141  }
142  }
143  };
144  using KernelBuildResultPtr = std::shared_ptr<KernelBuildResult>;
145 
147  ::boost::unordered_map<std::string, KernelBuildResultPtr>;
148  using KernelCacheT =
149  ::boost::unordered_map<sycl::detail::pi::PiProgram, KernelByNameT>;
150 
152  std::tuple<SerializedObj, sycl::detail::pi::PiDevice, std::string,
153  std::string>;
155  std::tuple<sycl::detail::pi::PiKernel, std::mutex *,
157  // This container is used as a fast path for retrieving cached kernels.
158  // unordered_flat_map is used here to reduce lookup overhead.
159  // The slow path is used only once for each newly created kernel, so the
160  // higher overhead of insertion that comes with unordered_flat_map is more
161  // of an issue there. For that reason, those use regular unordered maps.
163  ::boost::unordered_flat_map<KernelFastCacheKeyT, KernelFastCacheValT>;
164 
165  ~KernelProgramCache() = default;
166 
167  void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; }
168 
170  return {MCachedPrograms, MProgramCacheMutex};
171  }
172 
174  return {MKernelsPerProgramCache, MKernelsPerProgramCacheMutex};
175  }
176 
177  std::pair<ProgramBuildResultPtr, bool>
179  auto LockedCache = acquireCachedPrograms();
180  auto &ProgCache = LockedCache.get();
181  auto [It, DidInsert] = ProgCache.Cache.try_emplace(CacheKey, nullptr);
182  if (DidInsert) {
183  It->second = std::make_shared<ProgramBuildResult>(getPlugin());
184  // Save reference between the common key and the full key.
185  CommonProgramKeyT CommonKey =
186  std::make_pair(CacheKey.first.second, CacheKey.second);
187  ProgCache.KeyMap.emplace(CommonKey, CacheKey);
188  }
189  return std::make_pair(It->second, DidInsert);
190  }
191 
192  // Used in situation where you have several cache keys corresponding to the
193  // same program. An example would be a multi-device build, or use of virtual
194  // functions in kernels.
195  //
196  // Returns whether or not an insertion took place.
197  bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey,
198  sycl::detail::pi::PiProgram Program) {
199  auto LockedCache = acquireCachedPrograms();
200  auto &ProgCache = LockedCache.get();
201  auto [It, DidInsert] = ProgCache.Cache.try_emplace(CacheKey, nullptr);
202  if (DidInsert) {
203  It->second = std::make_shared<ProgramBuildResult>(getPlugin(),
205  It->second->Val = Program;
206  // Save reference between the common key and the full key.
207  CommonProgramKeyT CommonKey =
208  std::make_pair(CacheKey.first.second, CacheKey.second);
209  ProgCache.KeyMap.emplace(CommonKey, CacheKey);
210  }
211  return DidInsert;
212  }
213 
214  std::pair<KernelBuildResultPtr, bool>
216  const std::string &KernelName) {
217  auto LockedCache = acquireKernelsPerProgramCache();
218  auto &Cache = LockedCache.get()[Program];
219  auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr);
220  if (DidInsert)
221  It->second = std::make_shared<KernelBuildResult>(getPlugin());
222  return std::make_pair(It->second, DidInsert);
223  }
224 
225  template <typename KeyT>
227  std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
228  auto It = MKernelFastCache.find(CacheKey);
229  if (It != MKernelFastCache.end()) {
230  return It->second;
231  }
232  return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
233  }
234 
235  template <typename KeyT, typename ValT>
236  void saveKernel(KeyT &&CacheKey, ValT &&CacheVal) {
237  std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
238  // if no insertion took place, thus some other thread has already inserted
239  // smth in the cache
240  MKernelFastCache.emplace(CacheKey, CacheVal);
241  }
242 
246  void reset() {
247  std::lock_guard<std::mutex> L1(MProgramCacheMutex);
248  std::lock_guard<std::mutex> L2(MKernelsPerProgramCacheMutex);
249  std::lock_guard<std::mutex> L3(MKernelFastCacheMutex);
250  MCachedPrograms = ProgramCache{};
251  MKernelsPerProgramCache = KernelCacheT{};
252  MKernelFastCache = KernelFastCacheT{};
253  }
254 
277  template <errc Errc, typename GetCachedBuildFT, typename BuildFT>
278  auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build) {
280  constexpr size_t MaxAttempts = 2;
281  for (size_t AttemptCounter = 0;; ++AttemptCounter) {
282  auto Res = GetCachedBuild();
283  auto &BuildResult = Res.first;
286  if (!BuildResult->State.compare_exchange_strong(Expected, Desired)) {
287  // no insertion took place, thus some other thread has already inserted
288  // smth in the cache
290 
291  // Build succeeded.
292  if (NewState == BuildState::BS_Done)
293  return BuildResult;
294 
295  // Build failed, or this is the last attempt.
296  if (NewState == BuildState::BS_Failed ||
297  AttemptCounter + 1 == MaxAttempts) {
299  throw detail::set_pi_error(
302  else
303  throw exception();
304  }
305 
306  // NewState == BuildState::BS_Initial
307  // Build state was set back to the initial state,
308  // which means to go back to the beginning of the
309  // loop and try again.
310  continue;
311  }
312 
313  // only the building thread will run this
314  try {
315  BuildResult->Val = Build();
316 
318  return BuildResult;
319  } catch (const exception &Ex) {
320  BuildResult->Error.Msg = Ex.what();
322  if (Ex.code() == errc::memory_allocation ||
323  BuildResult->Error.Code == PI_ERROR_OUT_OF_RESOURCES ||
324  BuildResult->Error.Code == PI_ERROR_OUT_OF_HOST_MEMORY) {
325  reset();
327  continue;
328  }
329 
331  std::rethrow_exception(std::current_exception());
332  } catch (...) {
334  std::rethrow_exception(std::current_exception());
335  }
336  }
337  }
338 
339 private:
340  std::mutex MProgramCacheMutex;
341  std::mutex MKernelsPerProgramCacheMutex;
342 
343  ProgramCache MCachedPrograms;
344  KernelCacheT MKernelsPerProgramCache;
345  ContextPtr MParentContext;
346 
347  std::mutex MKernelFastCacheMutex;
348  KernelFastCacheT MKernelFastCache;
349  friend class ::MockKernelProgramCache;
350 
351  const PluginPtr &getPlugin();
352 };
353 } // namespace detail
354 } // namespace _V1
355 } // namespace sycl
std::shared_ptr< KernelBuildResult > KernelBuildResultPtr
std::shared_ptr< ProgramBuildResult > ProgramBuildResultPtr
::boost::unordered_flat_map< KernelFastCacheKeyT, KernelFastCacheValT > KernelFastCacheT
BuildState
Denotes the state of a build.
std::pair< std::pair< SerializedObj, std::uintptr_t >, sycl::detail::pi::PiDevice > ProgramCacheKeyT
void setContextPtr(const ContextPtr &AContext)
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(sycl::detail::pi::PiProgram Program, const std::string &KernelName)
Locked< KernelCacheT > acquireKernelsPerProgramCache()
::boost::unordered_map< sycl::detail::pi::PiProgram, KernelByNameT > KernelCacheT
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
std::tuple< SerializedObj, sycl::detail::pi::PiDevice, std::string, std::string > KernelFastCacheKeyT
bool insertBuiltProgram(const ProgramCacheKeyT &CacheKey, sycl::detail::pi::PiProgram Program)
std::pair< std::uintptr_t, sycl::detail::pi::PiDevice > CommonProgramKeyT
::boost::unordered_map< std::string, KernelBuildResultPtr > KernelByNameT
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > KernelFastCacheValT
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
std::pair< sycl::detail::pi::PiKernel, const KernelArgMask * > KernelArgMaskPairT
Represents a reference to value with appropriate lock acquired.
Definition: locked.hpp:23
const char * what() const noexcept final
Definition: exception.cpp:48
const std::error_code & code() const noexcept
Definition: exception.cpp:42
::pi_device PiDevice
Definition: pi.hpp:110
::pi_kernel PiKernel
Definition: pi.hpp:117
std::vector< bool > KernelArgMask
exception set_pi_error(exception &&e, pi_int32 pi_err)
Definition: exception.cpp:70
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:47
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:35
std::vector< unsigned char > SerializedObj
Definition: util.hpp:69
pi_int32 get_pi_error(const exception &e)
Definition: exception.cpp:69
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:64
Definition: access.hpp:18
int32_t pi_int32
Definition: pi.h:248
_pi_result
Definition: pi.h:260
pi_result piKernelRelease(pi_kernel kernel)
Definition: pi_cuda.cpp:552
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:324
C++ wrapper of extern "C" PI interfaces.
#define __SYCL_CHECK_OCL_CODE_NO_EXC(X)
Definition: plugin.hpp:35
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Denotes pointer to some entity with its general state and build error.
std::mutex MBuildResultMutex
A mutex to be employed along with MBuildCV.
BuildState waitUntilTransition(BuildState From=BuildState::BS_InProgress)
std::condition_variable MBuildCV
Condition variable to signal that build result is ready.
ProgramBuildResult(const PluginPtr &Plugin, BuildState InitialState)
::boost::unordered_map< ProgramCacheKeyT, ProgramBuildResultPtr > Cache
::boost::unordered_multimap< CommonProgramKeyT, ProgramCacheKeyT > KeyMap