DPC++ Runtime
Runtime libraries for oneAPI DPC++
event_impl.cpp
Go to the documentation of this file.
1 //==---------------- event_impl.cpp - SYCL event ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/event_impl.hpp>
10 #include <detail/event_info.hpp>
11 #include <detail/plugin.hpp>
12 #include <detail/queue_impl.hpp>
14 #include <sycl/context.hpp>
15 #include <sycl/device_selector.hpp>
16 
17 #include "detail/config.hpp"
18 
19 #include <chrono>
20 
21 #ifdef XPTI_ENABLE_INSTRUMENTATION
22 #include "xpti/xpti_trace_framework.hpp"
23 #include <atomic>
24 #include <detail/xpti_registry.hpp>
25 #include <sstream>
26 #endif
27 
28 namespace sycl {
29 inline namespace _V1 {
30 namespace detail {
31 #ifdef XPTI_ENABLE_INSTRUMENTATION
32 extern xpti::trace_event_data_t *GSYCLGraphEvent;
33 #endif
34 
35 // If we do not yet have a context, use the default one.
38  return;
39 
40  if (MHostEvent) {
42  this->setContextImpl(detail::getSyclObjImpl(HostQueue->get_context()));
43  } else {
44  const device SyclDevice;
46  detail::getSyclObjImpl(SyclDevice)));
47  }
48 }
49 
51  // Treat all devices that don't support interoperability as host devices to
52  // avoid attempts to call method get on such events.
53  return MHostEvent;
54 }
55 
57  if (MEvent)
59 }
60 
62  if (!MHostEvent && MEvent) {
63  // Wait for the native event
65  } else if (MState == HES_Discarded) {
66  // Waiting for the discarded event is invalid
67  throw sycl::exception(
69  "waitInternal method cannot be used for a discarded event.");
70  } else if (MState != HES_Complete) {
71  // Wait for the host event
72  std::unique_lock<std::mutex> lock(MMutex);
73  cv.wait(lock, [this] { return MState == HES_Complete; });
74  }
75 
76  // Wait for connected events(e.g. streams prints)
77  for (const EventImplPtr &Event : MPostCompleteEvents)
78  Event->wait(Event);
79 }
80 
82  if (MHostEvent || !MEvent) {
83  {
84  std::unique_lock<std::mutex> lock(MMutex);
85 #ifndef NDEBUG
86  int Expected = HES_NotComplete;
87  int Desired = HES_Complete;
88 
89  bool Succeeded = MState.compare_exchange_strong(Expected, Desired);
90 
91  assert(Succeeded && "Unexpected state of event");
92 #else
93  MState.store(static_cast<int>(HES_Complete));
94 #endif
95  }
96  cv.notify_all();
97  return;
98  }
99 
100  assert(false && "setComplete is not supported for non-host event");
101 }
102 
103 static uint64_t inline getTimestamp() {
104  auto Timestamp = std::chrono::high_resolution_clock::now().time_since_epoch();
105  return std::chrono::duration_cast<std::chrono::nanoseconds>(Timestamp)
106  .count();
107 }
108 
110  return MEvent;
111 }
113 
116  return MContext;
117 }
118 
121  return MContext->getPlugin();
122 }
123 
125 
127  MHostEvent = Context->is_host();
128  MContext = Context;
129  MIsContextInitialized = true;
130 }
131 
133  const context &SyclContext)
134  : MIsContextInitialized(true), MEvent(Event),
135  MContext(detail::getSyclObjImpl(SyclContext)), MHostEvent(false),
136  MIsFlushed(true), MState(HES_Complete) {
137 
138  if (MContext->is_host()) {
139  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
140  "The syclContext must match the OpenCL context "
141  "associated with the clEvent. " +
142  codeToString(PI_ERROR_INVALID_CONTEXT));
143  }
144 
145  sycl::detail::pi::PiContext TempContext;
148  &TempContext, nullptr);
149  if (MContext->getHandleRef() != TempContext) {
150  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
151  "The syclContext must match the OpenCL context "
152  "associated with the clEvent. " +
153  codeToString(PI_ERROR_INVALID_CONTEXT));
154  }
155 }
156 
158  this->setContextImpl(Queue->getContextImplPtr());
159  this->associateWithQueue(Queue);
160 }
161 
163  MQueue = Queue;
164  MIsProfilingEnabled = Queue->is_host() || Queue->MIsProfilingEnabled;
165  MFallbackProfiling = MIsProfilingEnabled && Queue->isProfilingFallback();
166  if (Queue->is_host()) {
167  MState.store(HES_NotComplete);
168  if (Queue->has_property<property::queue::enable_profiling>()) {
170  if (!MHostProfilingInfo)
171  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
172  "Out of host memory " +
173  codeToString(PI_ERROR_OUT_OF_HOST_MEMORY));
174  }
175  return;
176  }
177  MState.store(HES_Complete);
178 }
179 
180 void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
181  uint64_t &IId) const {
182  void *TraceEvent = nullptr;
183 #ifdef XPTI_ENABLE_INSTRUMENTATION
184  constexpr uint16_t NotificationTraceType = xpti::trace_wait_begin;
185  if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
186  return TraceEvent;
187  // Use a thread-safe counter to get a unique instance ID for the wait() on the
188  // event
189  static std::atomic<uint64_t> InstanceID = {1};
190  xpti::trace_event_data_t *WaitEvent = nullptr;
191 
192  // Create a string with the event address so it
193  // can be associated with other debug data
194  xpti::utils::StringHelper SH;
195  Name = SH.nameWithAddress<sycl::detail::pi::PiEvent>("event.wait", MEvent);
196 
197  // We can emit the wait associated with the graph if the
198  // event does not have a command object or associated with
199  // the command object, if it exists
200  if (MCommand) {
201  Command *Cmd = (Command *)MCommand;
202  WaitEvent = Cmd->MTraceEvent ? static_cast<xpti_td *>(Cmd->MTraceEvent)
203  : GSYCLGraphEvent;
204  } else
205  WaitEvent = GSYCLGraphEvent;
206 
207  // Record the current instance ID for use by Epilog
208  IId = InstanceID++;
209  xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, WaitEvent,
210  IId, static_cast<const void *>(Name.c_str()));
211  TraceEvent = (void *)WaitEvent;
212 #endif
213  return TraceEvent;
214 }
215 
216 void event_impl::instrumentationEpilog(void *TelemetryEvent,
217  const std::string &Name,
218  int32_t StreamID, uint64_t IId) const {
219 #ifdef XPTI_ENABLE_INSTRUMENTATION
220  constexpr uint16_t NotificationTraceType = xpti::trace_wait_end;
221  if (!(xptiCheckTraceEnabled(StreamID, NotificationTraceType) &&
222  TelemetryEvent))
223  return;
224  // Close the wait() scope
225  xpti::trace_event_data_t *TraceEvent =
226  (xpti::trace_event_data_t *)TelemetryEvent;
227  xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, TraceEvent,
228  IId, static_cast<const void *>(Name.c_str()));
229 #endif
230 }
231 
232 void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
233  if (MState == HES_Discarded)
235  "wait method cannot be used for a discarded event.");
236 
237  if (MGraph.lock()) {
239  "wait method cannot be used for an event associated "
240  "with a command graph.");
241  }
242 
243 #ifdef XPTI_ENABLE_INSTRUMENTATION
244  void *TelemetryEvent = nullptr;
245  uint64_t IId = 0;
246  std::string Name;
247  int32_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
248  TelemetryEvent = instrumentationProlog(Name, StreamID, IId);
249 #endif
250 
251  if (MEvent)
252  // presence of MEvent means the command has been enqueued, so no need to
253  // go via the slow path event waiting in the scheduler
254  waitInternal();
255  else if (MCommand)
257 
258 #ifdef XPTI_ENABLE_INSTRUMENTATION
259  instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
260 #endif
261 }
262 
264  std::shared_ptr<sycl::detail::event_impl> Self) {
265  wait(Self);
266 
267  if (QueueImplPtr SubmittedQueue = MSubmittedQueue.lock())
268  SubmittedQueue->throw_asynchronous();
269 }
270 
272  std::weak_ptr<queue_impl> EmptyPtr;
273 
274  if (!EmptyPtr.owner_before(MQueue) && !MQueue.owner_before(EmptyPtr)) {
275  throw sycl::exception(make_error_code(sycl::errc::invalid),
276  "Profiling information is unavailable as the event "
277  "has no associated queue.");
278  }
279  if (!MIsProfilingEnabled) {
280  throw sycl::exception(
281  make_error_code(sycl::errc::invalid),
282  "Profiling information is unavailable as the queue associated with "
283  "the event does not have the 'enable_profiling' property.");
284  }
285 }
286 
287 template <>
288 uint64_t
289 event_impl::get_profiling_info<info::event_profiling::command_submit>() {
290  checkProfilingPreconditions();
291 
292  // The delay between the submission and the actual start of a CommandBuffer
293  // can be short. Consequently, the submission time, which is based on
294  // an estimated clock and not on the real device clock, may be ahead of the
295  // start time, which is based on the actual device clock.
296  // MSubmitTime is set in a critical performance path.
297  // Force reading the device clock when setting MSubmitTime may deteriorate
298  // the performance.
299  // Since submit time is an estimated time, we implement this little hack
300  // that allows all profiled time to be meaningful.
301  // (Note that the observed time deviation between the estimated clock and
302  // the real device clock is typically less than 0.5ms. The approximation we
303  // made by forcing the re-sync of submit time to start time is less than
304  // 0.5ms. These timing values were obtained empirically using an integrated
305  // Intel GPU).
306  if (MEventFromSubmittedExecCommandBuffer && !MHostEvent && MEvent) {
307  uint64_t StartTime =
308  get_event_profiling_info<info::event_profiling::command_start>(
309  this->getHandleRef(), this->getPlugin());
310  if (StartTime < MSubmitTime)
311  MSubmitTime = StartTime;
312  }
313  return MSubmitTime;
314 }
315 
316 template <>
317 uint64_t
318 event_impl::get_profiling_info<info::event_profiling::command_start>() {
319  checkProfilingPreconditions();
320 
321  // For nop command start time is equal to submission time.
322  if (isNOP() && MSubmitTime)
323  return MSubmitTime;
324 
325  if (!MHostEvent) {
326  if (MEvent) {
327  auto StartTime =
328  get_event_profiling_info<info::event_profiling::command_start>(
329  this->getHandleRef(), this->getPlugin());
330  if (!MFallbackProfiling) {
331  return StartTime;
332  } else {
333  auto DeviceBaseTime =
334  get_event_profiling_info<info::event_profiling::command_submit>(
335  this->getHandleRef(), this->getPlugin());
336  return MHostBaseTime - DeviceBaseTime + StartTime;
337  }
338  }
339  return 0;
340  }
341  if (!MHostProfilingInfo)
342  throw sycl::exception(
343  sycl::make_error_code(sycl::errc::invalid),
344  "Profiling info is not available. " +
345  codeToString(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE));
346  return MHostProfilingInfo->getStartTime();
347 }
348 
349 template <>
350 uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
351  checkProfilingPreconditions();
352 
353  // For nop command end time is equal to submission time.
354  if (isNOP() && MSubmitTime)
355  return MSubmitTime;
356 
357  if (!MHostEvent) {
358  if (MEvent) {
359  auto EndTime =
360  get_event_profiling_info<info::event_profiling::command_end>(
361  this->getHandleRef(), this->getPlugin());
362  if (!MFallbackProfiling) {
363  return EndTime;
364  } else {
365  auto DeviceBaseTime =
366  get_event_profiling_info<info::event_profiling::command_submit>(
367  this->getHandleRef(), this->getPlugin());
368  return MHostBaseTime - DeviceBaseTime + EndTime;
369  }
370  }
371  return 0;
372  }
373  if (!MHostProfilingInfo)
374  throw sycl::exception(
375  sycl::make_error_code(sycl::errc::invalid),
376  "Profiling info is not available. " +
377  codeToString(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE));
378  return MHostProfilingInfo->getEndTime();
379 }
380 
381 template <> uint32_t event_impl::get_info<info::event::reference_count>() {
382  if (!MHostEvent && MEvent) {
383  return get_event_info<info::event::reference_count>(this->getHandleRef(),
384  this->getPlugin());
385  }
386  return 0;
387 }
388 
389 template <>
391 event_impl::get_info<info::event::command_execution_status>() {
392  if (MState == HES_Discarded)
394 
395  if (!MHostEvent) {
396  // Command is enqueued and PiEvent is ready
397  if (MEvent)
398  return get_event_info<info::event::command_execution_status>(
399  this->getHandleRef(), this->getPlugin());
400  // Command is blocked and not enqueued, PiEvent is not assigned yet
401  else if (MCommand)
402  return sycl::info::event_command_status::submitted;
403  }
404 
405  return MHostEvent && MState.load() != HES_Complete
406  ? sycl::info::event_command_status::submitted
408 }
409 
410 void HostProfilingInfo::start() { StartTime = getTimestamp(); }
411 
412 void HostProfilingInfo::end() { EndTime = getTimestamp(); }
413 
416 
417  auto Plugin = getPlugin();
418  if (!MIsInitialized) {
419  MIsInitialized = true;
420  auto TempContext = MContext.get()->getHandleRef();
421  Plugin->call<PiApiKind::piEventCreate>(TempContext, &MEvent);
422  }
423  if (MContext->getBackend() == backend::opencl)
424  Plugin->call<PiApiKind::piEventRetain>(getHandleRef());
425  pi_native_handle Handle;
426  Plugin->call<PiApiKind::piextEventGetNativeHandle>(getHandleRef(), &Handle);
427  return Handle;
428 }
429 
430 std::vector<EventImplPtr> event_impl::getWaitList() {
431  if (MState == HES_Discarded)
432  throw sycl::exception(
434  "get_wait_list() cannot be used for a discarded event.");
435 
436  std::lock_guard<std::mutex> Lock(MMutex);
437 
438  std::vector<EventImplPtr> Result;
439  Result.reserve(MPreparedDepsEvents.size() + MPreparedHostDepsEvents.size());
440  Result.insert(Result.end(), MPreparedDepsEvents.begin(),
441  MPreparedDepsEvents.end());
442  Result.insert(Result.end(), MPreparedHostDepsEvents.begin(),
444 
445  return Result;
446 }
447 
448 void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) {
449  // Some events might not have a native handle underneath even at this point,
450  // e.g. those produced by memset with 0 size (no PI call is made).
451  if (MIsFlushed || !MEvent)
452  return;
453 
454  QueueImplPtr Queue = MQueue.lock();
455  // If the queue has been released, all of the commands have already been
456  // implicitly flushed by piQueueRelease.
457  if (!Queue) {
458  MIsFlushed = true;
459  return;
460  }
461  if (Queue == UserQueue)
462  return;
463 
464  // Check if the task for this event has already been submitted.
468  nullptr);
469  if (Status == PI_EVENT_QUEUED) {
470  getPlugin()->call<PiApiKind::piQueueFlush>(Queue->getHandleRef());
471  }
472  MIsFlushed = true;
473 }
474 
476  std::lock_guard<std::mutex> Lock(MMutex);
477  MPreparedDepsEvents.clear();
478  MPreparedHostDepsEvents.clear();
479 }
480 
482  std::lock_guard<std::mutex> Lock(MMutex);
483  for (auto &Event : MPreparedDepsEvents) {
484  Event->cleanupDependencyEvents();
485  }
486  for (auto &Event : MPreparedHostDepsEvents) {
487  Event->cleanupDependencyEvents();
488  }
489 }
490 
492  if (!MIsProfilingEnabled)
493  return;
494  if (!MFallbackProfiling) {
495  if (QueueImplPtr Queue = MQueue.lock()) {
496  try {
497  MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime();
498  } catch (sycl::exception &e) {
499  if (e.code() == sycl::errc::feature_not_supported)
500  throw sycl::exception(
502  std::string("Unable to get command group submission time: ") +
503  e.what());
504  std::rethrow_exception(std::current_exception());
505  }
506  }
507  } else {
508  // Capture the host timestamp for a return value of function call
509  // <info::event_profiling::command_submit>. See MFallbackProfiling
511  }
512 }
513 
516  return;
517  // Capture a host timestamp to use normalize profiling time in
518  // <command_start> and <command_end>. See MFallbackProfiling
520 }
521 
523 
525  return get_info<info::event::command_execution_status>() ==
527 }
528 
529 } // namespace detail
530 } // namespace _V1
531 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:107
void * MTraceEvent
The event for node_create and task_begin.
Definition: commands.hpp:351
Profiling info for the host execution.
void end()
Measures event's end time.
Definition: event_impl.cpp:412
void start()
Measures event's start time.
Definition: event_impl.cpp:410
void waitForEvent(const EventImplPtr &Event)
Waits for the event.
Definition: scheduler.cpp:269
QueueImplPtr getDefaultHostQueue()
Definition: scheduler.hpp:448
static Scheduler & getInstance()
Definition: scheduler.cpp:261
void checkProfilingPreconditions() const
Definition: event_impl.cpp:271
std::vector< EventImplPtr > getWaitList()
Returns vector of event_impl that this event_impl depends on.
Definition: event_impl.cpp:430
void * instrumentationProlog(std::string &Name, int32_t StreamID, uint64_t &instance_id) const
Definition: event_impl.cpp:180
std::vector< EventImplPtr > MPostCompleteEvents
Definition: event_impl.hpp:351
void cleanDepEventsThroughOneLevel()
Cleans dependencies of this event's dependencies.
Definition: event_impl.cpp:481
void setComplete()
Marks this event as completed.
Definition: event_impl.cpp:81
pi_native_handle getNative()
Gets the native handle of the SYCL event.
Definition: event_impl.cpp:414
std::unique_ptr< HostProfilingInfo > MHostProfilingInfo
Definition: event_impl.hpp:338
void setContextImpl(const ContextImplPtr &Context)
Associate event with the context.
Definition: event_impl.cpp:126
void setHostEnqueueTime()
Calling this function to capture the host timestamp to use profiling base time.
Definition: event_impl.cpp:514
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
Store the command graph associated with this event, if any.
Definition: event_impl.hpp:367
const ContextImplPtr & getContextImpl()
Returns context that is associated with this event.
Definition: event_impl.cpp:114
void flushIfNeeded(const QueueImplPtr &UserQueue)
Performs a flush on the queue associated with this event if the user queue is different and the task ...
Definition: event_impl.cpp:448
std::weak_ptr< queue_impl > MSubmittedQueue
Definition: event_impl.hpp:345
std::atomic< int > MState
Definition: event_impl.hpp:360
sycl::detail::pi::PiEvent MEvent
Definition: event_impl.hpp:332
void instrumentationEpilog(void *TelementryEvent, const std::string &Name, int32_t StreamID, uint64_t IId) const
Definition: event_impl.cpp:216
event_impl(std::optional< HostEventState > State=HES_Complete)
Constructs a ready SYCL event.
Definition: event_impl.hpp:51
void associateWithQueue(const QueueImplPtr &Queue)
Associate event with provided queue.
Definition: event_impl.cpp:162
bool isCompleted()
Checks if this event is complete.
Definition: event_impl.cpp:524
std::vector< EventImplPtr > MPreparedHostDepsEvents
Definition: event_impl.hpp:349
void setStateIncomplete()
Clear the event state.
Definition: event_impl.cpp:124
std::condition_variable cv
Definition: event_impl.hpp:363
void waitInternal()
Waits for the event with respect to device type.
Definition: event_impl.cpp:61
std::vector< EventImplPtr > MPreparedDepsEvents
Dependency events prepared for waiting by backend.
Definition: event_impl.hpp:348
void setSubmissionTime()
Calling this function queries the current device timestamp and sets it as submission time for the com...
Definition: event_impl.cpp:491
void cleanupDependencyEvents()
Cleans dependencies of this event_impl.
Definition: event_impl.cpp:475
void wait_and_throw(std::shared_ptr< sycl::detail::event_impl > Self)
Waits for the event.
Definition: event_impl.cpp:263
void wait(std::shared_ptr< sycl::detail::event_impl > Self)
Waits for the event.
Definition: event_impl.cpp:232
const PluginPtr & getPlugin()
Definition: event_impl.cpp:119
sycl::detail::pi::PiEvent & getHandleRef()
Returns raw interoperability event handle.
Definition: event_impl.cpp:112
bool is_host()
Checks if this event is a SYCL host event.
Definition: event_impl.cpp:50
std::atomic< bool > MIsFlushed
Indicates that the task associated with this event has been submitted by the queue to the device.
Definition: event_impl.hpp:355
std::weak_ptr< queue_impl > MQueue
Definition: event_impl.hpp:340
static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device)
Definition: queue_impl.hpp:74
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:66
const char * what() const noexcept final
Definition: exception.cpp:89
const std::error_code & code() const noexcept
Definition: exception.cpp:83
::pi_context PiContext
Definition: pi.hpp:135
constexpr const char * SYCL_STREAM_NAME
std::string codeToString(pi_int32 code)
Definition: common.hpp:153
static uint64_t getTimestamp()
Definition: event_impl.cpp:103
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:32
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
std::string string
Definition: handler.hpp:426
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:34
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:107
Definition: access.hpp:18
int32_t pi_int32
Definition: pi.h:204
uintptr_t pi_native_handle
Definition: pi.h:209
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
Definition: pi_cuda.cpp:621
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:583
@ PI_EVENT_INFO_COMMAND_EXECUTION_STATUS
Definition: pi.h:537
@ PI_EVENT_INFO_CONTEXT
Definition: pi.h:535
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_cuda.cpp:598
pi_result piQueueFlush(pi_queue command_queue)
Definition: pi_cuda.cpp:188
pi_result piEventRelease(pi_event event)
Definition: pi_cuda.cpp:617
_pi_event_status
Definition: pi.h:224
@ PI_EVENT_QUEUED
Definition: pi.h:228
pi_result piEventRetain(pi_event event)
Definition: pi_cuda.cpp:615
decltype(piEventCreate) piEventCreate