DPC++ Runtime
Runtime libraries for oneAPI DPC++
global_handler.cpp
Go to the documentation of this file.
1 //==--------- global_handler.cpp --- Global objects handler ----------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifdef ENABLE_STACK_TRACE
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/Support/Signals.h"
12 #endif
13 
14 #include <detail/config.hpp>
16 #include <detail/platform_impl.hpp>
17 #include <detail/plugin.hpp>
20 #include <detail/thread_pool.hpp>
21 #include <detail/xpti_registry.hpp>
23 #include <sycl/detail/pi.hpp>
24 #include <sycl/detail/spinlock.hpp>
25 
26 #ifdef _WIN32
27 #include <windows.h>
28 #endif
29 
30 #include <vector>
31 
32 namespace sycl {
33 inline namespace _V1 {
34 namespace detail {
35 
36 using LockGuard = std::lock_guard<SpinLock>;
37 SpinLock GlobalHandler::MSyclGlobalHandlerProtector{};
38 
39 // Utility class to track references on object.
40 // Used for GlobalHandler now and created as thread_local object on the first
41 // Scheduler usage. Origin idea is to track usage of Scheduler from main and
42 // other used threads - they increment MCounter; and to use but not add extra
43 // reference by our thread_pool threads. For this control MIncrementCounter
44 // class member is used.
46 public:
47  ObjectUsageCounter(bool ModifyCounter) : MModifyCounter(ModifyCounter) {
48  if (MModifyCounter)
49  MCounter++;
50  }
52  if (!MModifyCounter)
53  return;
54 
55  LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
56  MCounter--;
57  GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
58  if (RTGlobalObjHandler) {
59  RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter);
60  }
61  }
62 
63 private:
64  static std::atomic_uint MCounter;
65  bool MModifyCounter;
66 };
67 std::atomic_uint ObjectUsageCounter::MCounter{0};
68 
69 GlobalHandler::GlobalHandler() = default;
70 GlobalHandler::~GlobalHandler() = default;
71 
73 #ifdef XPTI_ENABLE_INSTRUMENTATION
74  // Let subscribers know a new stream is being initialized
76  GVerStr);
77  xpti::payload_t SYCLPayload("SYCL Runtime Exceptions");
78  uint64_t SYCLInstanceNo;
79  GSYCLCallEvent = xptiMakeEvent("SYCL Try-catch Exceptions", &SYCLPayload,
80  xpti::trace_algorithm_event, xpti_at::active,
81  &SYCLInstanceNo);
82 #endif
83 }
84 
85 void GlobalHandler::TraceEventXPTI(const char *Message) {
86  if (!Message)
87  return;
88 #ifdef XPTI_ENABLE_INSTRUMENTATION
89  static std::once_flag InitXPTIFlag;
90  if (xptiTraceEnabled()) {
91  std::call_once(InitXPTIFlag, [&]() { InitXPTI(); });
92 
93  // We have to handle the cases where: (1) we may have just the code location
94  // set and not UID and (2) UID set
96  auto CodeLocation = Tls.query();
97 
98  // Creating a tracepoint will convert a CodeLocation to UID, if not set
99  xpti::framework::tracepoint_t TP(
100  CodeLocation.fileName(), CodeLocation.functionName(),
101  CodeLocation.lineNumber(), CodeLocation.columnNumber(), nullptr);
102 
103  // The call to notify will have the signature of:
104  // (1) the stream defined in .stream()
105  // (2) The trace type equal to what is set by .trace_type()
106  // (3) Parent event set to NULL
107  // (4) Current event set to one created from CodeLocation and UID
108  // (5) An instance ID that records the number of times this code location
109  // has been seen (6) The message generated by the exception handler
110  TP.stream(SYCL_STREAM_NAME)
111  .trace_type(xpti::trace_point_type_t::diagnostics)
112  .notify(static_cast<const void *>(Message));
113  }
114 
115 #endif
116 }
117 
118 GlobalHandler *&GlobalHandler::getInstancePtr() {
119  static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
120  return RTGlobalObjHandler;
121 }
122 
124  GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
125  assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
126  return *RTGlobalObjHandler;
127 }
128 
129 template <typename T, typename... Types>
130 T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types... Args) {
131  const LockGuard Lock{IWL.Lock};
132 
133  if (!IWL.Inst)
134  IWL.Inst = std::make_unique<T>(Args...);
135 
136  return *IWL.Inst;
137 }
138 
140  // The method is used in unit tests only. Do not protect with lock since
141  // releaseResources will cause dead lock due to host queue release
142  if (MScheduler.Inst)
144  MScheduler.Inst.reset(Scheduler);
145 }
146 
148 #ifdef ENABLE_STACK_TRACE
149  static std::once_flag PrintStackFlag;
150  std::call_once(PrintStackFlag, []() {
151  llvm::sys::PrintStackTraceOnErrorSignal(llvm::StringRef());
152  });
153 #endif
154 }
155 
157  getOrCreate(MScheduler);
159  // On Windows the registration of the signal handler before main function
160  // (e.g. from DLLMain or from constructors of program scope objects) doesn't
161  // work. So, registering signal handler here because:
162  // 1) getScheduler is likely to be called for any non-trivial application;
163  // 2) first call to getScheduler is likely to be done after main starts.
164  // The same is done in getPlugins.
166  return *MScheduler.Inst;
167 }
168 
169 bool GlobalHandler::isSchedulerAlive() const { return MScheduler.Inst.get(); }
170 
171 void GlobalHandler::registerSchedulerUsage(bool ModifyCounter) {
172  thread_local ObjectUsageCounter SchedulerCounter(ModifyCounter);
173 }
174 
176  return getOrCreate(MProgramManager);
177 }
178 
179 std::unordered_map<PlatformImplPtr, ContextImplPtr> &
181  return getOrCreate(MPlatformToDefaultContextCache);
182 }
183 
185  return getOrCreate(MPlatformToDefaultContextCacheMutex);
186 }
187 
188 Sync &GlobalHandler::getSync() { return getOrCreate(MSync); }
189 
190 std::vector<PlatformImplPtr> &GlobalHandler::getPlatformCache() {
191  return getOrCreate(MPlatformCache);
192 }
193 
195  return getOrCreate(MPlatformMapMutex);
196 }
197 
199  return getOrCreate(MFilterMutex);
200 }
201 std::vector<PluginPtr> &GlobalHandler::getPlugins() {
203  return getOrCreate(MPlugins);
204 }
205 
207 GlobalHandler::getOneapiDeviceSelectorTargets(const std::string &InitValue) {
208  return getOrCreate(MOneapiDeviceSelectorTargets, InitValue);
209 }
210 
212  return getOrCreate(MXPTIRegistry);
213 }
214 
217  ThreadPool &TP = getOrCreate(MHostTaskThreadPool, Size);
218 
219  return TP;
220 }
221 
223  // Release shared-pointers to SYCL objects.
224  // Note that on Windows the destruction of the default context
225  // races with the detaching of the DLL object that calls piTearDown.
226 
227  MPlatformToDefaultContextCache.Inst.reset(nullptr);
228 }
229 
233  }
234 };
235 
238 }
239 
240 // Note: Split from shutdown so it is available to the unittests for ensuring
241 // that the mock plugin is the lone plugin.
243  // Call to GlobalHandler::instance().getPlugins() initializes plugins. If
244  // user application has loaded SYCL runtime, and never called any APIs,
245  // there's no need to load and unload plugins.
246  if (MPlugins.Inst) {
247  for (const PluginPtr &Plugin : getPlugins()) {
248  // PluginParameter for Teardown is the boolean tracking if a
249  // given plugin has been teardown successfully.
250  // This tracking prevents usage of this plugin after teardown
251  // has been completed to avoid invalid resource access.
252  Plugin->call<PiApiKind::piTearDown>(&Plugin->pluginReleased);
253  Plugin->unload();
254  }
255  }
256  // Clear after unload to avoid uses after unload.
257  getPlugins().clear();
258 }
259 
261 #ifndef _WIN32
262  if (Blocking)
263  drainThreadPool();
264  if (MScheduler.Inst)
265  MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING
267 #endif
268 }
269 
271  if (MHostTaskThreadPool.Inst)
272  MHostTaskThreadPool.Inst->drain();
273 }
274 
275 #ifdef _WIN32
276 // because of something not-yet-understood on Windows
277 // threads may be shutdown once the end of main() is reached
278 // making an orderly shutdown difficult. Fortunately, Windows
279 // itself is very aggressive about reclaiming memory. Thus,
280 // we focus solely on unloading the plugins, so as to not
281 // accidentally retain device handles. etc
282 void shutdown() {
283  GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
284  Handler->unloadPlugins();
285 }
286 #else
287 void shutdown() {
288  const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
289  GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
290  if (!Handler)
291  return;
292 
293  // Ensure neither host task is working so that no default context is accessed
294  // upon its release
295  Handler->prepareSchedulerToRelease(true);
296 
297  if (Handler->MHostTaskThreadPool.Inst)
298  Handler->MHostTaskThreadPool.Inst->finishAndWait();
299 
300  // If default contexts are requested after the first default contexts have
301  // been released there may be a new default context. These must be released
302  // prior to closing the plugins.
303  // Note: Releasing a default context here may cause failures in plugins with
304  // global state as the global state may have been released.
305  Handler->releaseDefaultContexts();
306 
307  // First, release resources, that may access plugins.
308  Handler->MPlatformCache.Inst.reset(nullptr);
309  Handler->MScheduler.Inst.reset(nullptr);
310  Handler->MProgramManager.Inst.reset(nullptr);
311 
312  // Clear the plugins and reset the instance if it was there.
313  Handler->unloadPlugins();
314  if (Handler->MPlugins.Inst)
315  Handler->MPlugins.Inst.reset(nullptr);
316 
317  Handler->MXPTIRegistry.Inst.reset(nullptr);
318 
319  // Release the rest of global resources.
320  delete Handler;
321  Handler = nullptr;
322 }
323 #endif
324 
325 #ifdef _WIN32
326 extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
327  DWORD fdwReason,
328  LPVOID lpReserved) {
329  bool PrintPiTrace = false;
330  static const char *PiTrace = std::getenv("SYCL_PI_TRACE");
331  static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0;
332  if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces
333  PrintPiTrace = true;
334  }
335 
336  // Perform actions based on the reason for calling.
337  switch (fdwReason) {
338  case DLL_PROCESS_DETACH:
339  if (PrintPiTrace)
340  std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl;
341 
342 #ifdef XPTI_ENABLE_INSTRUMENTATION
343  if (xptiTraceEnabled())
344  return TRUE; // When doing xpti tracing, we can't safely call shutdown.
345  // TODO: figure out what XPTI is doing that prevents release.
346 #endif
347 
348  shutdown();
349  break;
350  case DLL_PROCESS_ATTACH:
351  if (PrintPiTrace)
352  std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl;
353  break;
354  case DLL_THREAD_ATTACH:
355  break;
356  case DLL_THREAD_DETACH:
357  break;
358  }
359  return TRUE; // Successful DLL_PROCESS_ATTACH.
360 }
361 #else
362 // Setting low priority on destructor ensures it runs after all other global
363 // destructors. Priorities 0-100 are reserved by the compiler. The priority
364 // value 110 allows SYCL users to run their destructors after runtime library
365 // deinitialization.
366 __attribute__((destructor(110))) static void syclUnload() { shutdown(); }
367 #endif
368 } // namespace detail
369 } // namespace _V1
370 } // namespace sycl
Wrapper class for global data structures with non-trivial destructors.
ods_target_list & getOneapiDeviceSelectorTargets(const std::string &InitValue)
static void registerDefaultContextReleaseHandler()
std::vector< PlatformImplPtr > & getPlatformCache()
std::unordered_map< PlatformImplPtr, ContextImplPtr > & getPlatformToDefaultContextCache()
std::vector< PluginPtr > & getPlugins()
void prepareSchedulerToRelease(bool Blocking)
void TraceEventXPTI(const char *Message)
void attachScheduler(Scheduler *Scheduler)
void registerSchedulerUsage(bool ModifyCounter=true)
std::mutex & getPlatformToDefaultContextCacheMutex()
static GlobalHandler & instance()
static const char * get()
Definition: config.hpp:115
DPC++ graph scheduler class.
Definition: scheduler.hpp:367
SpinLock is a synchronization primitive, that uses atomic variable and causes thread trying acquire l...
Definition: spinlock.hpp:27
Groups and provides access to all the locks used the SYCL runtime.
Definition: util.hpp:25
void initializeStream(const std::string &StreamName, uint32_t MajVer, uint32_t MinVer, const std::string &VerStr)
Notifies XPTI subscribers about new stream.
Data type that manages the code_location information in TLS.
Definition: common.hpp:129
const detail::code_location & query()
Query the information in the TLS slot.
Definition: common.cpp:54
Command group handler class.
Definition: handler.hpp:458
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr const char * SYCL_STREAM_NAME
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
__attribute__((destructor(110))) static void syclUnload()
std::lock_guard< SpinLock > LockGuard
static void enableOnCrashStackPrinting()
Definition: access.hpp:18
pi_result piTearDown(void *PluginParameter)
API to notify that the plugin should clean up its resources.
Definition: pi_cuda.cpp:1243
C++ wrapper of extern "C" PI interfaces.
constexpr int GMajVer
Definition: tracing.cpp:27
constexpr int GMinVer
Definition: tracing.cpp:28
constexpr auto GVerStr
Definition: tracing.cpp:26