DPC++ Runtime
Runtime libraries for oneAPI DPC++
global_handler.cpp
Go to the documentation of this file.
1 //==--------- global_handler.cpp --- Global objects handler ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifdef ENABLE_STACK_TRACE
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/Support/Signals.h"
12 #endif
13 
14 #include <detail/config.hpp>
16 #include <detail/platform_impl.hpp>
17 #include <detail/plugin.hpp>
20 #include <detail/thread_pool.hpp>
21 #include <detail/xpti_registry.hpp>
23 #include <sycl/detail/pi.hpp>
24 #include <sycl/detail/spinlock.hpp>
25 
26 #ifdef _WIN32
27 #include <windows.h>
28 #endif
29 
30 #include <vector>
31 
32 namespace sycl {
34 namespace detail {
35 
36 using LockGuard = std::lock_guard<SpinLock>;
37 SpinLock GlobalHandler::MSyclGlobalHandlerProtector{};
38 
39 // Utility class to track references on object.
40 // Used for GlobalHandler now and created as thread_local object on the first
41 // Scheduler usage. Origin idea is to track usage of Scheduler from main and
42 // other used threads - they increment MCounter; and to use but not add extra
43 // reference by our thread_pool threads. For this control MIncrementCounter
44 // class member is used.
46 public:
47  ObjectUsageCounter(bool ModifyCounter) : MModifyCounter(ModifyCounter) {
48  if (MModifyCounter)
49  MCounter++;
50  }
52  if (!MModifyCounter)
53  return;
54 
55  MCounter--;
56  if (!MCounter) {
57  LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
58  GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
59  if (RTGlobalObjHandler) {
60  RTGlobalObjHandler->prepareSchedulerToRelease();
61  }
62  }
63  }
64 
65 private:
66  static std::atomic_uint MCounter;
67  bool MModifyCounter;
68 };
69 std::atomic_uint ObjectUsageCounter::MCounter{0};
70 
71 GlobalHandler::GlobalHandler() = default;
72 GlobalHandler::~GlobalHandler() = default;
73 
74 void GlobalHandler::InitXPTI() {
75 #ifdef XPTI_ENABLE_INSTRUMENTATION
76  // Let subscribers know a new stream is being initialized
77  getXPTIRegistry().initializeStream(SYCL_STREAM_NAME, GMajVer, GMinVer,
78  GVerStr);
79  xpti::payload_t SYCLPayload("SYCL Runtime Exceptions");
80  uint64_t SYCLInstanceNo;
81  GSYCLCallEvent = xptiMakeEvent("SYCL Try-catch Exceptions", &SYCLPayload,
82  xpti::trace_algorithm_event, xpti_at::active,
83  &SYCLInstanceNo);
84 #endif
85 }
86 
87 void GlobalHandler::TraceEventXPTI(const char *Message) {
88 #ifdef XPTI_ENABLE_INSTRUMENTATION
89  if (!Message)
90  return;
91  if (xptiTraceEnabled()) {
92  // We have to handle the cases where: (1) we may have just the code location
93  // set and not UID and (2) UID set
95  auto CodeLocation = Tls.query();
96 
97  // Creating a tracepoint will convert a CodeLocation to UID, if not set
98  xpti::framework::tracepoint_t TP(
99  CodeLocation.fileName(), CodeLocation.functionName(),
100  CodeLocation.lineNumber(), CodeLocation.columnNumber(), nullptr);
101 
102  // The call to notify will have the signature of:
103  // (1) the stream defined in .stream()
104  // (2) The trace type equal to what is set by .trace_type()
105  // (3) Parent event set to NULL
106  // (4) Current event set to one created from CodeLocation and UID
107  // (5) An instance ID that records the number of times this code location
108  // has been seen (6) The message generated by the exception handler
109  TP.stream(SYCL_STREAM_NAME)
110  .trace_type(xpti::trace_point_type_t::diagnostics)
111  .notify(static_cast<const void *>(Message));
112  }
113 
114 #endif
115 }
116 
117 GlobalHandler *&GlobalHandler::getInstancePtr() {
118  static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
119  return RTGlobalObjHandler;
120 }
121 
122 GlobalHandler &GlobalHandler::instance() {
123  GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
124  assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
125 
126 #ifdef XPTI_ENABLE_INSTRUMENTATION
127  static std::once_flag InitXPTIFlag;
128  if (xptiTraceEnabled()) {
129  std::call_once(InitXPTIFlag, [&]() { RTGlobalObjHandler->InitXPTI(); });
130  }
131 #endif
132  return *RTGlobalObjHandler;
133 }
134 
135 template <typename T, typename... Types>
136 T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types... Args) {
137  const LockGuard Lock{IWL.Lock};
138 
139  if (!IWL.Inst)
140  IWL.Inst = std::make_unique<T>(Args...);
141 
142  return *IWL.Inst;
143 }
144 
145 void GlobalHandler::attachScheduler(Scheduler *Scheduler) {
146  // The method is used in unit tests only. Do not protect with lock since
147  // releaseResources will cause dead lock due to host queue release
148  if (MScheduler.Inst)
149  prepareSchedulerToRelease();
150  MScheduler.Inst.reset(Scheduler);
151 }
152 
154 #ifdef ENABLE_STACK_TRACE
155  static std::once_flag PrintStackFlag;
156  std::call_once(PrintStackFlag, []() {
157  llvm::sys::PrintStackTraceOnErrorSignal(llvm::StringRef());
158  });
159 #endif
160 }
161 
162 Scheduler &GlobalHandler::getScheduler() {
163  getOrCreate(MScheduler);
164  registerSchedulerUsage();
165  // On Windows the registration of the signal handler before main function
166  // (e.g. from DLLMain or from constructors of program scope objects) doesn't
167  // work. So, registering signal handler here because:
168  // 1) getScheduler is likely to be called for any non-trivial application;
169  // 2) first call to getScheduler is likely to be done after main starts.
170  // The same is done in getPlugins.
172  return *MScheduler.Inst;
173 }
174 
175 void GlobalHandler::registerSchedulerUsage(bool ModifyCounter) {
176  thread_local ObjectUsageCounter SchedulerCounter(ModifyCounter);
177 }
178 
179 ProgramManager &GlobalHandler::getProgramManager() {
180  return getOrCreate(MProgramManager);
181 }
182 
183 std::unordered_map<PlatformImplPtr, ContextImplPtr> &
184 GlobalHandler::getPlatformToDefaultContextCache() {
185  return getOrCreate(MPlatformToDefaultContextCache);
186 }
187 
188 std::mutex &GlobalHandler::getPlatformToDefaultContextCacheMutex() {
189  return getOrCreate(MPlatformToDefaultContextCacheMutex);
190 }
191 
192 Sync &GlobalHandler::getSync() { return getOrCreate(MSync); }
193 
194 std::vector<PlatformImplPtr> &GlobalHandler::getPlatformCache() {
195  return getOrCreate(MPlatformCache);
196 }
197 
198 std::mutex &GlobalHandler::getPlatformMapMutex() {
199  return getOrCreate(MPlatformMapMutex);
200 }
201 
202 std::mutex &GlobalHandler::getFilterMutex() {
203  return getOrCreate(MFilterMutex);
204 }
205 std::vector<plugin> &GlobalHandler::getPlugins() {
207  return getOrCreate(MPlugins);
208 }
210 GlobalHandler::getDeviceFilterList(const std::string &InitValue) {
211  return getOrCreate(MDeviceFilterList, InitValue);
212 }
213 
215 GlobalHandler::getOneapiDeviceSelectorTargets(const std::string &InitValue) {
216  return getOrCreate(MOneapiDeviceSelectorTargets, InitValue);
217 }
218 
219 XPTIRegistry &GlobalHandler::getXPTIRegistry() {
220  return getOrCreate(MXPTIRegistry);
221 }
222 
223 ThreadPool &GlobalHandler::getHostTaskThreadPool() {
225  ThreadPool &TP = getOrCreate(MHostTaskThreadPool, Size);
226 
227  return TP;
228 }
229 
230 void GlobalHandler::releaseDefaultContexts() {
231  // Release shared-pointers to SYCL objects.
232 #ifndef _WIN32
233  MPlatformToDefaultContextCache.Inst.reset(nullptr);
234 #else
235  // Windows does not maintain dependencies between dynamically loaded libraries
236  // and can unload SYCL runtime dependencies before sycl.dll's DllMain has
237  // finished. To avoid calls to nowhere, intentionally leak platform to device
238  // cache. This will prevent destructors from being called, thus no PI cleanup
239  // routines will be called in the end.
240  // Update: the win_proxy_loader addresses this for SYCL's own dependencies,
241  // but the GPU device dlls seem to manually load yet another DLL which may
242  // have been released when this function is called. So we still release() and
243  // leak until that is addressed. context destructs fine on CPU device.
244  MPlatformToDefaultContextCache.Inst.release();
245 #endif
246 }
247 
250  GlobalHandler::instance().releaseDefaultContexts();
251  }
252 };
253 
254 void GlobalHandler::registerDefaultContextReleaseHandler() {
256 }
257 
258 // Note: Split from shutdown so it is available to the unittests for ensuring
259 // that the mock plugin is the lone plugin.
260 void GlobalHandler::unloadPlugins() {
261  // Call to GlobalHandler::instance().getPlugins() initializes plugins. If
262  // user application has loaded SYCL runtime, and never called any APIs,
263  // there's no need to load and unload plugins.
264  if (MPlugins.Inst) {
265  for (plugin &Plugin : getPlugins()) {
266  // PluginParameter is reserved for future use that can control
267  // some parameters in the plugin tear-down process.
268  // Currently, it is not used.
269  void *PluginParameter = nullptr;
270  Plugin.call<PiApiKind::piTearDown>(PluginParameter);
271  Plugin.unload();
272  }
273  }
274  // Clear after unload to avoid uses after unload.
275  getPlugins().clear();
276 }
277 
278 void GlobalHandler::prepareSchedulerToRelease() {
279 #ifndef _WIN32
280  drainThreadPool();
281  if (MScheduler.Inst)
282  MScheduler.Inst->releaseResources();
283 #endif
284 }
285 
286 void GlobalHandler::drainThreadPool() {
287  if (MHostTaskThreadPool.Inst)
288  MHostTaskThreadPool.Inst->drain();
289 }
290 
291 #ifdef _WIN32
292  // because of something not-yet-understood on Windows
293  // threads may be shutdown once the end of main() is reached
294  // making an orderly shutdown difficult. Fortunately, Windows
295  // itself is very aggressive about reclaiming memory. Thus,
296  // we focus solely on unloading the plugins, so as to not
297  // accidentally retain device handles. etc
298 void shutdown(){
299  GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
300  Handler->unloadPlugins();
301 }
302 #else
303 void shutdown() {
304  const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
305  GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
306  if (!Handler)
307  return;
308 
309  // Ensure neither host task is working so that no default context is accessed
310  // upon its release
311  Handler->prepareSchedulerToRelease();
312 
313  if (Handler->MHostTaskThreadPool.Inst)
314  Handler->MHostTaskThreadPool.Inst->finishAndWait();
315 
316  // If default contexts are requested after the first default contexts have
317  // been released there may be a new default context. These must be released
318  // prior to closing the plugins.
319  // Note: Releasing a default context here may cause failures in plugins with
320  // global state as the global state may have been released.
321  Handler->releaseDefaultContexts();
322 
323  // First, release resources, that may access plugins.
324  Handler->MPlatformCache.Inst.reset(nullptr);
325  Handler->MScheduler.Inst.reset(nullptr);
326  Handler->MProgramManager.Inst.reset(nullptr);
327 
328  // Clear the plugins and reset the instance if it was there.
329  Handler->unloadPlugins();
330  if (Handler->MPlugins.Inst)
331  Handler->MPlugins.Inst.reset(nullptr);
332 
333  // Release the rest of global resources.
334  delete Handler;
335  Handler = nullptr;
336 }
337 #endif
338 
339 #ifdef _WIN32
340 extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
341  DWORD fdwReason,
342  LPVOID lpReserved) {
343  bool PrintPiTrace = false;
344  static const char *PiTrace = std::getenv("SYCL_PI_TRACE");
345  static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0;
346  if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces
347  PrintPiTrace = true;
348  }
349 
350  // Perform actions based on the reason for calling.
351  switch (fdwReason) {
352  case DLL_PROCESS_DETACH:
353  if (PrintPiTrace)
354  std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl;
355 
356 #ifdef XPTI_ENABLE_INSTRUMENTATION
357  if (xptiTraceEnabled())
358  return TRUE; // When doing xpti tracing, we can't safely call shutdown.
359  // TODO: figure out what XPTI is doing that prevents release.
360 #endif
361 
362  shutdown();
363  break;
364  case DLL_PROCESS_ATTACH:
365  if (PrintPiTrace)
366  std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl;
367  case DLL_THREAD_ATTACH:
368  case DLL_THREAD_DETACH:
369  break;
370  }
371  return TRUE; // Successful DLL_PROCESS_ATTACH.
372 }
373 #else
374 // Setting low priority on destructor ensures it runs after all other global
375 // destructors. Priorities 0-100 are reserved by the compiler. The priority
376 // value 110 allows SYCL users to run their destructors after runtime library
377 // deinitialization.
378 __attribute__((destructor(110))) static void syclUnload() { shutdown(); }
379 #endif
380 } // namespace detail
381 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
382 } // namespace sycl
sycl::_V1::detail::tls_code_loc_t::query
const detail::code_location & query()
Query the information in the TLS slot.
Definition: common.cpp:55
sycl::_V1::detail::SYCL_STREAM_NAME
constexpr const char * SYCL_STREAM_NAME
Definition: xpti_registry.hpp:29
sycl::_V1::detail::DefaultContextReleaseHandler::~DefaultContextReleaseHandler
~DefaultContextReleaseHandler()
Definition: global_handler.cpp:249
device_filter.hpp
config.hpp
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
xpti_registry.hpp
sycl::_V1::detail::GlobalHandler::releaseDefaultContexts
void releaseDefaultContexts()
Definition: global_handler.cpp:230
PrintPiTrace
static bool PrintPiTrace
Definition: pi_esimd_emulator.cpp:114
piTearDown
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
Definition: pi_esimd_emulator.cpp:2059
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
plugin.hpp
pi.hpp
scheduler.hpp
sycl::_V1::detail::__attribute__
__attribute__((destructor(110))) static void syclUnload()
Definition: global_handler.cpp:378
sycl::_V1::detail::GlobalHandler::prepareSchedulerToRelease
void prepareSchedulerToRelease()
Definition: global_handler.cpp:278
sycl::_V1::detail::XPTIRegistry
Definition: xpti_registry.hpp:59
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
sycl::_V1::detail::plugin
The plugin class provides a unified interface to the underlying low-level runtimes for the device-agn...
Definition: plugin.hpp:90
sycl::_V1::detail::SpinLock
SpinLock is a synchronization primitive, that uses atomic variable and causes thread trying acquire l...
Definition: spinlock.hpp:27
sycl::_V1::detail::ObjectUsageCounter
Definition: global_handler.cpp:45
sycl::_V1::detail::enableOnCrashStackPrinting
static void enableOnCrashStackPrinting()
Definition: global_handler.cpp:153
platform_impl.hpp
sycl::_V1::detail::shutdown
void shutdown()
Definition: global_handler.cpp:303
sycl::_V1::detail::LockGuard
std::lock_guard< SpinLock > LockGuard
Definition: global_handler.cpp:36
sycl::_V1::detail::tls_code_loc_t
Data type that manages the code_location information in TLS.
Definition: common.hpp:152
sycl::_V1::handler
Command group handler class.
Definition: handler.hpp:315
global_handler.hpp
spinlock.hpp
sycl::_V1::detail::Sync
Groups and provides access to all the locks used the SYCL runtime.
Definition: util.hpp:24
GVerStr
constexpr auto GVerStr
Definition: tracing.cpp:33
sycl::_V1::detail::ObjectUsageCounter::ObjectUsageCounter
ObjectUsageCounter(bool ModifyCounter)
Definition: global_handler.cpp:47
GMajVer
constexpr int GMajVer
Definition: tracing.cpp:34
program_manager.hpp
sycl::_V1::detail::GlobalHandler::InitXPTI
void InitXPTI()
Definition: global_handler.cpp:74
sycl::_V1::detail::DefaultContextReleaseHandler
Definition: global_handler.cpp:248
sycl::_V1::detail::device_filter_list
Definition: device_filter.hpp:84
GMinVer
constexpr int GMinVer
Definition: tracing.cpp:35
sycl::_V1::detail::ods_target_list
Definition: device_filter.hpp:55
sycl::_V1::detail::GlobalHandler
Wrapper class for global data structures with non-trivial destructors.
Definition: global_handler.hpp:46
sycl::_V1::detail::ThreadPool
Definition: thread_pool.hpp:25
sycl::_V1::detail::ObjectUsageCounter::~ObjectUsageCounter
~ObjectUsageCounter()
Definition: global_handler.cpp:51
sycl::_V1::detail::ProgramManager
Definition: program_manager.hpp:81
sycl::_V1::detail::Scheduler
DPC++ graph scheduler class.
Definition: scheduler.hpp:363
sycl::_V1::detail::GlobalHandler::unloadPlugins
void unloadPlugins()
Definition: global_handler.cpp:260
std::cout
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
thread_pool.hpp
PiTrace
static void PiTrace(std::string TraceString)
Definition: pi_esimd_emulator.cpp:116