DPC++ Runtime
Runtime libraries for oneAPI DPC++
kernel_program_cache.hpp
Go to the documentation of this file.
1 //==--- kernel_program_cache.hpp - Cache for kernel and program -*- C++-*---==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
12 #include <detail/platform_impl.hpp>
13 #include <sycl/detail/common.hpp>
14 #include <sycl/detail/locked.hpp>
15 #include <sycl/detail/os_util.hpp>
16 #include <sycl/detail/pi.hpp>
17 #include <sycl/detail/util.hpp>
18 
19 #include <atomic>
20 #include <condition_variable>
21 #include <mutex>
22 #include <type_traits>
23 
24 #include <boost/unordered/unordered_flat_map.hpp>
25 #include <boost/unordered_map.hpp>
26 
27 // For testing purposes
28 class MockKernelProgramCache;
29 
30 namespace sycl {
31 inline namespace _V1 {
32 namespace detail {
33 class context_impl;
35 public:
38  struct BuildError {
39  std::string Msg;
41 
42  bool isFilledIn() const { return !Msg.empty(); }
43  };
44 
47 
52  template <typename T> struct BuildResult {
53  T Val;
54  std::atomic<BuildState> State{BuildState::BS_Initial};
55  BuildError Error{"", 0};
56 
67  std::condition_variable MBuildCV;
69  std::mutex MBuildResultMutex;
70 
73  BuildState To;
74  std::unique_lock<std::mutex> Lock(MBuildResultMutex);
75  MBuildCV.wait(Lock, [&] {
76  To = State;
77  return State != From;
78  });
79  return To;
80  }
81 
82  void updateAndNotify(BuildState DesiredState) {
83  {
84  std::lock_guard<std::mutex> Lock(MBuildResultMutex);
85  State.store(DesiredState);
86  }
87  MBuildCV.notify_all();
88  }
89  };
90 
91  struct ProgramBuildResult : public BuildResult<sycl::detail::pi::PiProgram> {
94  Val = nullptr;
95  }
97  if (Val) {
99  Plugin->call_nocheck<PiApiKind::piProgramRelease>(Val);
101  }
102  }
103  };
104  using ProgramBuildResultPtr = std::shared_ptr<ProgramBuildResult>;
105 
106  /* Drop LinkOptions and CompileOptions from CacheKey since they are only used
107  * when debugging environment variables are set and we can just ignore them
108  * since all kernels will have their build options overridden with the same
109  * string*/
110  using ProgramCacheKeyT = std::pair<std::pair<SerializedObj, std::uintptr_t>,
113  std::pair<std::uintptr_t, sycl::detail::pi::PiDevice>;
114 
115  struct ProgramCache {
116  ::boost::unordered_map<ProgramCacheKeyT, ProgramBuildResultPtr> Cache;
117  ::boost::unordered_multimap<CommonProgramKeyT, ProgramCacheKeyT> KeyMap;
118 
119  size_t size() const noexcept { return Cache.size(); }
120  };
121 
123 
125  std::pair<sycl::detail::pi::PiKernel, const KernelArgMask *>;
126  struct KernelBuildResult : public BuildResult<KernelArgMaskPairT> {
129  Val.first = nullptr;
130  }
132  if (Val.first) {
134  Plugin->call_nocheck<PiApiKind::piKernelRelease>(Val.first);
136  }
137  }
138  };
139  using KernelBuildResultPtr = std::shared_ptr<KernelBuildResult>;
140 
142  ::boost::unordered_map<std::string, KernelBuildResultPtr>;
143  using KernelCacheT =
144  ::boost::unordered_map<sycl::detail::pi::PiProgram, KernelByNameT>;
145 
147  std::tuple<SerializedObj, sycl::detail::pi::PiDevice, std::string,
148  std::string>;
150  std::tuple<sycl::detail::pi::PiKernel, std::mutex *,
152  // This container is used as a fast path for retrieving cached kernels.
153  // unordered_flat_map is used here to reduce lookup overhead.
154  // The slow path is used only once for each newly created kernel, so the
155  // higher overhead of insertion that comes with unordered_flat_map is more
156  // of an issue there. For that reason, those use regular unordered maps.
158  ::boost::unordered_flat_map<KernelFastCacheKeyT, KernelFastCacheValT>;
159 
160  ~KernelProgramCache() = default;
161 
162  void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; }
163 
165  return {MCachedPrograms, MProgramCacheMutex};
166  }
167 
169  return {MKernelsPerProgramCache, MKernelsPerProgramCacheMutex};
170  }
171 
172  std::pair<ProgramBuildResultPtr, bool>
174  auto LockedCache = acquireCachedPrograms();
175  auto &ProgCache = LockedCache.get();
176  auto [It, DidInsert] = ProgCache.Cache.try_emplace(CacheKey, nullptr);
177  if (DidInsert) {
178  It->second = std::make_shared<ProgramBuildResult>(getPlugin());
179  // Save reference between the common key and the full key.
180  CommonProgramKeyT CommonKey =
181  std::make_pair(CacheKey.first.second, CacheKey.second);
182  ProgCache.KeyMap.emplace(CommonKey, CacheKey);
183  }
184  return std::make_pair(It->second, DidInsert);
185  }
186 
187  std::pair<KernelBuildResultPtr, bool>
189  const std::string &KernelName) {
190  auto LockedCache = acquireKernelsPerProgramCache();
191  auto &Cache = LockedCache.get()[Program];
192  auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr);
193  if (DidInsert)
194  It->second = std::make_shared<KernelBuildResult>(getPlugin());
195  return std::make_pair(It->second, DidInsert);
196  }
197 
198  template <typename KeyT>
200  std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
201  auto It = MKernelFastCache.find(CacheKey);
202  if (It != MKernelFastCache.end()) {
203  return It->second;
204  }
205  return std::make_tuple(nullptr, nullptr, nullptr, nullptr);
206  }
207 
208  template <typename KeyT, typename ValT>
209  void saveKernel(KeyT &&CacheKey, ValT &&CacheVal) {
210  std::unique_lock<std::mutex> Lock(MKernelFastCacheMutex);
211  // if no insertion took place, thus some other thread has already inserted
212  // smth in the cache
213  MKernelFastCache.emplace(CacheKey, CacheVal);
214  }
215 
219  void reset() {
220  std::lock_guard<std::mutex> L1(MProgramCacheMutex);
221  std::lock_guard<std::mutex> L2(MKernelsPerProgramCacheMutex);
222  std::lock_guard<std::mutex> L3(MKernelFastCacheMutex);
223  MCachedPrograms = ProgramCache{};
224  MKernelsPerProgramCache = KernelCacheT{};
225  MKernelFastCache = KernelFastCacheT{};
226  }
227 
250  template <typename ExceptionT, typename GetCachedBuildFT, typename BuildFT>
251  auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build) {
253  constexpr size_t MaxAttempts = 2;
254  for (size_t AttemptCounter = 0;; ++AttemptCounter) {
255  auto Res = GetCachedBuild();
256  auto &BuildResult = Res.first;
259  if (!BuildResult->State.compare_exchange_strong(Expected, Desired)) {
260  // no insertion took place, thus some other thread has already inserted
261  // smth in the cache
263 
264  // Build succeeded.
265  if (NewState == BuildState::BS_Done)
266  return BuildResult;
267 
268  // Build failed, or this is the last attempt.
269  if (NewState == BuildState::BS_Failed ||
270  AttemptCounter + 1 == MaxAttempts) {
272  throw ExceptionT(BuildResult->Error.Msg, BuildResult->Error.Code);
273  else
274  throw exception();
275  }
276 
277  // NewState == BuildState::BS_Initial
278  // Build state was set back to the initial state,
279  // which means to go back to the beginning of the
280  // loop and try again.
281  continue;
282  }
283 
284  // only the building thread will run this
285  try {
286  BuildResult->Val = Build();
287 
289  return BuildResult;
290  } catch (const exception &Ex) {
291  BuildResult->Error.Msg = Ex.what();
293  if (BuildResult->Error.Code == PI_ERROR_OUT_OF_RESOURCES) {
294  reset();
296  continue;
297  }
298 
300  std::rethrow_exception(std::current_exception());
301  } catch (...) {
303  std::rethrow_exception(std::current_exception());
304  }
305  }
306  }
307 
308 private:
309  std::mutex MProgramCacheMutex;
310  std::mutex MKernelsPerProgramCacheMutex;
311 
312  ProgramCache MCachedPrograms;
313  KernelCacheT MKernelsPerProgramCache;
314  ContextPtr MParentContext;
315 
316  std::mutex MKernelFastCacheMutex;
317  KernelFastCacheT MKernelFastCache;
318  friend class ::MockKernelProgramCache;
319 
320  const PluginPtr &getPlugin();
321 };
322 } // namespace detail
323 } // namespace _V1
324 } // namespace sycl
std::shared_ptr< KernelBuildResult > KernelBuildResultPtr
std::shared_ptr< ProgramBuildResult > ProgramBuildResultPtr
::boost::unordered_flat_map< KernelFastCacheKeyT, KernelFastCacheValT > KernelFastCacheT
BuildState
Denotes the state of a build.
std::pair< std::pair< SerializedObj, std::uintptr_t >, sycl::detail::pi::PiDevice > ProgramCacheKeyT
void setContextPtr(const ContextPtr &AContext)
auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build)
Try to fetch entity (kernel or program) from cache.
std::pair< KernelBuildResultPtr, bool > getOrInsertKernel(sycl::detail::pi::PiProgram Program, const std::string &KernelName)
Locked< KernelCacheT > acquireKernelsPerProgramCache()
::boost::unordered_map< sycl::detail::pi::PiProgram, KernelByNameT > KernelCacheT
KernelFastCacheValT tryToGetKernelFast(KeyT &&CacheKey)
std::tuple< SerializedObj, sycl::detail::pi::PiDevice, std::string, std::string > KernelFastCacheKeyT
std::pair< std::uintptr_t, sycl::detail::pi::PiDevice > CommonProgramKeyT
::boost::unordered_map< std::string, KernelBuildResultPtr > KernelByNameT
void saveKernel(KeyT &&CacheKey, ValT &&CacheVal)
std::tuple< sycl::detail::pi::PiKernel, std::mutex *, const KernelArgMask *, sycl::detail::pi::PiProgram > KernelFastCacheValT
std::pair< ProgramBuildResultPtr, bool > getOrInsertProgram(const ProgramCacheKeyT &CacheKey)
std::pair< sycl::detail::pi::PiKernel, const KernelArgMask * > KernelArgMaskPairT
Represents a reference to value with appropriate lock acquired.
Definition: locked.hpp:23
const char * what() const noexcept final
Definition: exception.cpp:76
cl_int get_cl_code() const
Definition: exception.cpp:87
#define __SYCL_CHECK_OCL_CODE_NO_EXC(X)
Definition: common.hpp:239
::pi_device PiDevice
Definition: pi.hpp:131
::pi_kernel PiKernel
Definition: pi.hpp:138
std::vector< bool > KernelArgMask
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
constexpr tuple< Ts... > make_tuple(Ts... Args)
Definition: tuple.hpp:35
std::vector< unsigned char > SerializedObj
Definition: util.hpp:68
Definition: access.hpp:18
int32_t pi_int32
Definition: pi.h:199
_pi_result
Definition: pi.h:211
pi_result piKernelRelease(pi_kernel kernel)
Definition: pi_cuda.cpp:531
pi_result piProgramRelease(pi_program program)
Definition: pi_cuda.cpp:324
C++ wrapper of extern "C" PI interfaces.
_Abi const simd< _Tp, _Abi > & noexcept
Definition: simd.hpp:1324
Denotes pointer to some entity with its general state and build error.
std::mutex MBuildResultMutex
A mutex to be employed along with MBuildCV.
BuildState waitUntilTransition(BuildState From=BuildState::BS_InProgress)
std::condition_variable MBuildCV
Condition variable to signal that build result is ready.
::boost::unordered_map< ProgramCacheKeyT, ProgramBuildResultPtr > Cache
::boost::unordered_multimap< CommonProgramKeyT, ProgramCacheKeyT > KeyMap