DPC++ Runtime
Runtime libraries for oneAPI DPC++
event_impl.cpp
Go to the documentation of this file.
1 //==---------------- event_impl.cpp - SYCL event ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <detail/event_impl.hpp>
10 #include <detail/event_info.hpp>
11 #include <detail/plugin.hpp>
12 #include <detail/queue_impl.hpp>
14 #include <sycl/context.hpp>
15 #include <sycl/device_selector.hpp>
16 
17 #include "detail/config.hpp"
18 
19 #include <chrono>
20 
21 #ifdef XPTI_ENABLE_INSTRUMENTATION
22 #include "xpti/xpti_trace_framework.hpp"
23 #include <atomic>
24 #include <detail/xpti_registry.hpp>
25 #include <sstream>
26 #endif
27 
28 namespace sycl {
29 inline namespace _V1 {
30 namespace detail {
31 #ifdef XPTI_ENABLE_INSTRUMENTATION
32 extern xpti::trace_event_data_t *GSYCLGraphEvent;
33 #endif
34 
35 // If we do not yet have a context, use the default one.
38  return;
39 
40  if (MHostEvent) {
42  this->setContextImpl(detail::getSyclObjImpl(HostQueue->get_context()));
43  } else {
44  const device SyclDevice;
46  detail::getSyclObjImpl(SyclDevice)));
47  }
48 }
49 
51  // Treat all devices that don't support interoperability as host devices to
52  // avoid attempts to call method get on such events.
53  return MHostEvent;
54 }
55 
57  if (MEvent)
59 }
60 
62  if (!MHostEvent && MEvent) {
63  // Wait for the native event
65  } else if (MState == HES_Discarded) {
66  // Waiting for the discarded event is invalid
67  throw sycl::exception(
69  "waitInternal method cannot be used for a discarded event.");
70  } else if (MState != HES_Complete) {
71  // Wait for the host event
72  std::unique_lock<std::mutex> lock(MMutex);
73  cv.wait(lock, [this] { return MState == HES_Complete; });
74  }
75 
76  // Wait for connected events(e.g. streams prints)
77  for (const EventImplPtr &Event : MPostCompleteEvents)
78  Event->wait(Event);
79 }
80 
82  if (MHostEvent || !MEvent) {
83  {
84  std::unique_lock<std::mutex> lock(MMutex);
85 #ifndef NDEBUG
86  int Expected = HES_NotComplete;
87  int Desired = HES_Complete;
88 
89  bool Succeeded = MState.compare_exchange_strong(Expected, Desired);
90 
91  assert(Succeeded && "Unexpected state of event");
92 #else
93  MState.store(static_cast<int>(HES_Complete));
94 #endif
95  }
96  cv.notify_all();
97  return;
98  }
99 
100  assert(false && "setComplete is not supported for non-host event");
101 }
102 
103 static uint64_t inline getTimestamp() {
104  auto Timestamp = std::chrono::high_resolution_clock::now().time_since_epoch();
105  return std::chrono::duration_cast<std::chrono::nanoseconds>(Timestamp)
106  .count();
107 }
108 
110  return MEvent;
111 }
113 
116  return MContext;
117 }
118 
121  return MContext->getPlugin();
122 }
123 
125 
127  MHostEvent = Context->is_host();
128  MContext = Context;
129  MIsContextInitialized = true;
130 }
131 
133  const context &SyclContext)
134  : MIsContextInitialized(true), MEvent(Event),
135  MContext(detail::getSyclObjImpl(SyclContext)), MHostEvent(false),
136  MIsFlushed(true), MState(HES_Complete) {
137 
138  if (MContext->is_host()) {
139  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
140  "The syclContext must match the OpenCL context "
141  "associated with the clEvent. " +
142  codeToString(PI_ERROR_INVALID_CONTEXT));
143  }
144 
145  sycl::detail::pi::PiContext TempContext;
148  &TempContext, nullptr);
149  if (MContext->getHandleRef() != TempContext) {
150  throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
151  "The syclContext must match the OpenCL context "
152  "associated with the clEvent. " +
153  codeToString(PI_ERROR_INVALID_CONTEXT));
154  }
155 }
156 
158  : MQueue{Queue},
159  MIsProfilingEnabled{Queue->is_host() || Queue->MIsProfilingEnabled},
160  MFallbackProfiling{MIsProfilingEnabled && Queue->isProfilingFallback()} {
161  this->setContextImpl(Queue->getContextImplPtr());
162  if (Queue->is_host()) {
163  MState.store(HES_NotComplete);
164  if (Queue->has_property<property::queue::enable_profiling>()) {
166  if (!MHostProfilingInfo)
167  throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
168  "Out of host memory " +
169  codeToString(PI_ERROR_OUT_OF_HOST_MEMORY));
170  }
171  return;
172  }
173  MState.store(HES_Complete);
174 }
175 
176 void *event_impl::instrumentationProlog(std::string &Name, int32_t StreamID,
177  uint64_t &IId) const {
178  void *TraceEvent = nullptr;
179 #ifdef XPTI_ENABLE_INSTRUMENTATION
180  constexpr uint16_t NotificationTraceType = xpti::trace_wait_begin;
181  if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
182  return TraceEvent;
183  // Use a thread-safe counter to get a unique instance ID for the wait() on the
184  // event
185  static std::atomic<uint64_t> InstanceID = {1};
186  xpti::trace_event_data_t *WaitEvent = nullptr;
187 
188  // Create a string with the event address so it
189  // can be associated with other debug data
190  xpti::utils::StringHelper SH;
191  Name = SH.nameWithAddress<sycl::detail::pi::PiEvent>("event.wait", MEvent);
192 
193  // We can emit the wait associated with the graph if the
194  // event does not have a command object or associated with
195  // the command object, if it exists
196  if (MCommand) {
197  Command *Cmd = (Command *)MCommand;
198  WaitEvent = Cmd->MTraceEvent ? static_cast<xpti_td *>(Cmd->MTraceEvent)
199  : GSYCLGraphEvent;
200  } else
201  WaitEvent = GSYCLGraphEvent;
202 
203  // Record the current instance ID for use by Epilog
204  IId = InstanceID++;
205  xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, WaitEvent,
206  IId, static_cast<const void *>(Name.c_str()));
207  TraceEvent = (void *)WaitEvent;
208 #endif
209  return TraceEvent;
210 }
211 
212 void event_impl::instrumentationEpilog(void *TelemetryEvent,
213  const std::string &Name,
214  int32_t StreamID, uint64_t IId) const {
215 #ifdef XPTI_ENABLE_INSTRUMENTATION
216  constexpr uint16_t NotificationTraceType = xpti::trace_wait_end;
217  if (!(xptiCheckTraceEnabled(StreamID, NotificationTraceType) &&
218  TelemetryEvent))
219  return;
220  // Close the wait() scope
221  xpti::trace_event_data_t *TraceEvent =
222  (xpti::trace_event_data_t *)TelemetryEvent;
223  xptiNotifySubscribers(StreamID, NotificationTraceType, nullptr, TraceEvent,
224  IId, static_cast<const void *>(Name.c_str()));
225 #endif
226 }
227 
228 void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self) {
229  if (MState == HES_Discarded)
231  "wait method cannot be used for a discarded event.");
232 
233  if (MGraph.lock()) {
235  "wait method cannot be used for an event associated "
236  "with a command graph.");
237  }
238 
239 #ifdef XPTI_ENABLE_INSTRUMENTATION
240  void *TelemetryEvent = nullptr;
241  uint64_t IId = 0;
242  std::string Name;
243  int32_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME);
244  TelemetryEvent = instrumentationProlog(Name, StreamID, IId);
245 #endif
246 
247  if (MEvent)
248  // presence of MEvent means the command has been enqueued, so no need to
249  // go via the slow path event waiting in the scheduler
250  waitInternal();
251  else if (MCommand)
253 
254 #ifdef XPTI_ENABLE_INSTRUMENTATION
255  instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
256 #endif
257 }
258 
260  std::shared_ptr<sycl::detail::event_impl> Self) {
261  wait(Self);
262 
263  if (QueueImplPtr SubmittedQueue = MSubmittedQueue.lock())
264  SubmittedQueue->throw_asynchronous();
265 }
266 
268  std::weak_ptr<queue_impl> EmptyPtr;
269 
270  if (!EmptyPtr.owner_before(MQueue) && !MQueue.owner_before(EmptyPtr)) {
271  throw sycl::exception(make_error_code(sycl::errc::invalid),
272  "Profiling information is unavailable as the event "
273  "has no associated queue.");
274  }
275  if (!MIsProfilingEnabled) {
276  throw sycl::exception(
277  make_error_code(sycl::errc::invalid),
278  "Profiling information is unavailable as the queue associated with "
279  "the event does not have the 'enable_profiling' property.");
280  }
281 }
282 
283 template <>
284 uint64_t
285 event_impl::get_profiling_info<info::event_profiling::command_submit>() {
286  checkProfilingPreconditions();
287  // The delay between the submission and the actual start of a CommandBuffer
288  // can be short. Consequently, the submission time, which is based on
289  // an estimated clock and not on the real device clock, may be ahead of the
290  // start time, which is based on the actual device clock.
291  // MSubmitTime is set in a critical performance path.
292  // Force reading the device clock when setting MSubmitTime may deteriorate
293  // the performance.
294  // Since submit time is an estimated time, we implement this little hack
295  // that allows all profiled time to be meaningful.
296  // (Note that the observed time deviation between the estimated clock and
297  // the real device clock is typically less than 0.5ms. The approximation we
298  // made by forcing the re-sync of submit time to start time is less than
299  // 0.5ms. These timing values were obtained empirically using an integrated
300  // Intel GPU).
301  if (MEventFromSubmittedExecCommandBuffer && !MHostEvent && MEvent) {
302  uint64_t StartTime =
303  get_event_profiling_info<info::event_profiling::command_start>(
304  this->getHandleRef(), this->getPlugin());
305  if (StartTime < MSubmitTime)
306  MSubmitTime = StartTime;
307  }
308  return MSubmitTime;
309 }
310 
311 template <>
312 uint64_t
313 event_impl::get_profiling_info<info::event_profiling::command_start>() {
314  checkProfilingPreconditions();
315  if (!MHostEvent) {
316  if (MEvent) {
317  auto StartTime =
318  get_event_profiling_info<info::event_profiling::command_start>(
319  this->getHandleRef(), this->getPlugin());
320  if (!MFallbackProfiling) {
321  return StartTime;
322  } else {
323  auto DeviceBaseTime =
324  get_event_profiling_info<info::event_profiling::command_submit>(
325  this->getHandleRef(), this->getPlugin());
326  return MHostBaseTime - DeviceBaseTime + StartTime;
327  }
328  }
329  return 0;
330  }
331  if (!MHostProfilingInfo)
332  throw sycl::exception(
333  sycl::make_error_code(sycl::errc::invalid),
334  "Profiling info is not available. " +
335  codeToString(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE));
336  return MHostProfilingInfo->getStartTime();
337 }
338 
339 template <>
340 uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
341  checkProfilingPreconditions();
342  if (!MHostEvent) {
343  if (MEvent) {
344  auto EndTime =
345  get_event_profiling_info<info::event_profiling::command_end>(
346  this->getHandleRef(), this->getPlugin());
347  if (!MFallbackProfiling) {
348  return EndTime;
349  } else {
350  auto DeviceBaseTime =
351  get_event_profiling_info<info::event_profiling::command_submit>(
352  this->getHandleRef(), this->getPlugin());
353  return MHostBaseTime - DeviceBaseTime + EndTime;
354  }
355  }
356  return 0;
357  }
358  if (!MHostProfilingInfo)
359  throw sycl::exception(
360  sycl::make_error_code(sycl::errc::invalid),
361  "Profiling info is not available. " +
362  codeToString(PI_ERROR_PROFILING_INFO_NOT_AVAILABLE));
363  return MHostProfilingInfo->getEndTime();
364 }
365 
366 template <> uint32_t event_impl::get_info<info::event::reference_count>() {
367  if (!MHostEvent && MEvent) {
368  return get_event_info<info::event::reference_count>(this->getHandleRef(),
369  this->getPlugin());
370  }
371  return 0;
372 }
373 
374 template <>
376 event_impl::get_info<info::event::command_execution_status>() {
377  if (MState == HES_Discarded)
379 
380  if (!MHostEvent) {
381  // Command is enqueued and PiEvent is ready
382  if (MEvent)
383  return get_event_info<info::event::command_execution_status>(
384  this->getHandleRef(), this->getPlugin());
385  // Command is blocked and not enqueued, PiEvent is not assigned yet
386  else if (MCommand)
387  return sycl::info::event_command_status::submitted;
388  }
389 
390  return MHostEvent && MState.load() != HES_Complete
391  ? sycl::info::event_command_status::submitted
393 }
394 
395 void HostProfilingInfo::start() { StartTime = getTimestamp(); }
396 
397 void HostProfilingInfo::end() { EndTime = getTimestamp(); }
398 
401 
402  auto Plugin = getPlugin();
403  if (!MIsInitialized) {
404  MIsInitialized = true;
405  auto TempContext = MContext.get()->getHandleRef();
406  Plugin->call<PiApiKind::piEventCreate>(TempContext, &MEvent);
407  }
408  if (MContext->getBackend() == backend::opencl)
409  Plugin->call<PiApiKind::piEventRetain>(getHandleRef());
410  pi_native_handle Handle;
411  Plugin->call<PiApiKind::piextEventGetNativeHandle>(getHandleRef(), &Handle);
412  return Handle;
413 }
414 
415 std::vector<EventImplPtr> event_impl::getWaitList() {
416  if (MState == HES_Discarded)
417  throw sycl::exception(
419  "get_wait_list() cannot be used for a discarded event.");
420 
421  std::lock_guard<std::mutex> Lock(MMutex);
422 
423  std::vector<EventImplPtr> Result;
424  Result.reserve(MPreparedDepsEvents.size() + MPreparedHostDepsEvents.size());
425  Result.insert(Result.end(), MPreparedDepsEvents.begin(),
426  MPreparedDepsEvents.end());
427  Result.insert(Result.end(), MPreparedHostDepsEvents.begin(),
429 
430  return Result;
431 }
432 
433 void event_impl::flushIfNeeded(const QueueImplPtr &UserQueue) {
434  // Some events might not have a native handle underneath even at this point,
435  // e.g. those produced by memset with 0 size (no PI call is made).
436  if (MIsFlushed || !MEvent)
437  return;
438 
439  QueueImplPtr Queue = MQueue.lock();
440  // If the queue has been released, all of the commands have already been
441  // implicitly flushed by piQueueRelease.
442  if (!Queue) {
443  MIsFlushed = true;
444  return;
445  }
446  if (Queue == UserQueue)
447  return;
448 
449  // Check if the task for this event has already been submitted.
453  nullptr);
454  if (Status == PI_EVENT_QUEUED) {
455  getPlugin()->call<PiApiKind::piQueueFlush>(Queue->getHandleRef());
456  }
457  MIsFlushed = true;
458 }
459 
461  std::lock_guard<std::mutex> Lock(MMutex);
462  MPreparedDepsEvents.clear();
463  MPreparedHostDepsEvents.clear();
464 }
465 
467  std::lock_guard<std::mutex> Lock(MMutex);
468  for (auto &Event : MPreparedDepsEvents) {
469  Event->cleanupDependencyEvents();
470  }
471  for (auto &Event : MPreparedHostDepsEvents) {
472  Event->cleanupDependencyEvents();
473  }
474 }
475 
477  if (!MIsProfilingEnabled)
478  return;
479  if (!MFallbackProfiling) {
480  if (QueueImplPtr Queue = MQueue.lock()) {
481  try {
482  MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime();
483  } catch (sycl::exception &e) {
484  if (e.code() == sycl::errc::feature_not_supported)
485  throw sycl::exception(
487  std::string("Unable to get command group submission time: ") +
488  e.what());
489  std::rethrow_exception(std::current_exception());
490  }
491  }
492  } else {
493  // Capture the host timestamp for a return value of function call
494  // <info::event_profiling::command_submit>. See MFallbackProfiling
496  }
497 }
498 
501  return;
502  // Capture a host timestamp to use normalize profiling time in
503  // <command_start> and <command_end>. See MFallbackProfiling
505 }
506 
508 
510  return get_info<info::event::command_execution_status>() ==
512 }
513 
514 } // namespace detail
515 } // namespace _V1
516 } // namespace sycl
The context class represents a SYCL context on which kernel functions may be executed.
Definition: context.hpp:51
The Command class represents some action that needs to be performed on one or more memory objects.
Definition: commands.hpp:102
void * MTraceEvent
The event for node_create and task_begin.
Definition: commands.hpp:345
Profiling info for the host execution.
void end()
Measures event's end time.
Definition: event_impl.cpp:397
void start()
Measures event's start time.
Definition: event_impl.cpp:395
void waitForEvent(const EventImplPtr &Event)
Waits for the event.
Definition: scheduler.cpp:268
QueueImplPtr getDefaultHostQueue()
Definition: scheduler.hpp:444
static Scheduler & getInstance()
Definition: scheduler.cpp:260
void checkProfilingPreconditions() const
Definition: event_impl.cpp:267
std::vector< EventImplPtr > getWaitList()
Returns vector of event_impl that this event_impl depends on.
Definition: event_impl.cpp:415
void * instrumentationProlog(std::string &Name, int32_t StreamID, uint64_t &instance_id) const
Definition: event_impl.cpp:176
std::vector< EventImplPtr > MPostCompleteEvents
Definition: event_impl.hpp:326
void cleanDepEventsThroughOneLevel()
Cleans dependencies of this event's dependencies.
Definition: event_impl.cpp:466
void setComplete()
Marks this event as completed.
Definition: event_impl.cpp:81
pi_native_handle getNative()
Gets the native handle of the SYCL event.
Definition: event_impl.cpp:399
std::unique_ptr< HostProfilingInfo > MHostProfilingInfo
Definition: event_impl.hpp:313
void setContextImpl(const ContextImplPtr &Context)
Associate event with the context.
Definition: event_impl.cpp:126
void setHostEnqueueTime()
Calling this function to capture the host timestamp to use profiling base time.
Definition: event_impl.cpp:499
std::weak_ptr< ext::oneapi::experimental::detail::graph_impl > MGraph
Store the command graph associated with this event, if any.
Definition: event_impl.hpp:342
const ContextImplPtr & getContextImpl()
Returns context that is associated with this event.
Definition: event_impl.cpp:114
void flushIfNeeded(const QueueImplPtr &UserQueue)
Performs a flush on the queue associated with this event if the user queue is different and the task ...
Definition: event_impl.cpp:433
std::weak_ptr< queue_impl > MSubmittedQueue
Definition: event_impl.hpp:320
std::atomic< int > MState
Definition: event_impl.hpp:335
sycl::detail::pi::PiEvent MEvent
Definition: event_impl.hpp:307
void instrumentationEpilog(void *TelementryEvent, const std::string &Name, int32_t StreamID, uint64_t IId) const
Definition: event_impl.cpp:212
event_impl(std::optional< HostEventState > State=HES_Complete)
Constructs a ready SYCL event.
Definition: event_impl.hpp:52
bool isCompleted()
Checks if this event is complete.
Definition: event_impl.cpp:509
std::vector< EventImplPtr > MPreparedHostDepsEvents
Definition: event_impl.hpp:324
void setStateIncomplete()
Clear the event state.
Definition: event_impl.cpp:124
std::condition_variable cv
Definition: event_impl.hpp:338
void waitInternal()
Waits for the event with respect to device type.
Definition: event_impl.cpp:61
std::vector< EventImplPtr > MPreparedDepsEvents
Dependency events prepared for waiting by backend.
Definition: event_impl.hpp:323
void setSubmissionTime()
Calling this function queries the current device timestamp and sets it as submission time for the com...
Definition: event_impl.cpp:476
void cleanupDependencyEvents()
Cleans dependencies of this event_impl.
Definition: event_impl.cpp:460
void wait_and_throw(std::shared_ptr< sycl::detail::event_impl > Self)
Waits for the event.
Definition: event_impl.cpp:259
void wait(std::shared_ptr< sycl::detail::event_impl > Self)
Waits for the event.
Definition: event_impl.cpp:228
const PluginPtr & getPlugin()
Definition: event_impl.cpp:119
sycl::detail::pi::PiEvent & getHandleRef()
Returns raw interoperability event handle.
Definition: event_impl.cpp:112
bool is_host()
Checks if this event is a SYCL host event.
Definition: event_impl.cpp:50
std::atomic< bool > MIsFlushed
Indicates that the task associated with this event has been submitted by the queue to the device.
Definition: event_impl.hpp:330
std::weak_ptr< queue_impl > MQueue
Definition: event_impl.hpp:315
static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device)
Definition: queue_impl.hpp:75
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
Definition: device.hpp:59
const char * what() const noexcept final
Definition: exception.cpp:76
const std::error_code & code() const noexcept
Definition: exception.cpp:70
::pi_context PiContext
Definition: pi.hpp:135
constexpr const char * SYCL_STREAM_NAME
std::string codeToString(pi_int32 code)
Definition: common.hpp:153
static uint64_t getTimestamp()
Definition: event_impl.cpp:103
std::shared_ptr< sycl::detail::context_impl > ContextImplPtr
Definition: event_impl.hpp:33
static const PluginPtr & getPlugin(backend Backend)
Definition: backend.cpp:32
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
std::shared_ptr< plugin > PluginPtr
Definition: pi.hpp:48
std::shared_ptr< sycl::detail::queue_impl > QueueImplPtr
Definition: event_impl.hpp:35
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:94
Definition: access.hpp:18
int32_t pi_int32
Definition: pi.h:199
uintptr_t pi_native_handle
Definition: pi.h:204
pi_result piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle)
Gets the native handle of a PI event object.
Definition: pi_cuda.cpp:604
pi_result piEventGetInfo(pi_event event, pi_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:566
@ PI_EVENT_INFO_COMMAND_EXECUTION_STATUS
Definition: pi.h:528
@ PI_EVENT_INFO_CONTEXT
Definition: pi.h:526
pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list)
Definition: pi_cuda.cpp:581
pi_result piQueueFlush(pi_queue command_queue)
Definition: pi_cuda.cpp:188
pi_result piEventRelease(pi_event event)
Definition: pi_cuda.cpp:600
_pi_event_status
Definition: pi.h:219
@ PI_EVENT_QUEUED
Definition: pi.h:223
pi_result piEventRetain(pi_event event)
Definition: pi_cuda.cpp:598
decltype(piEventCreate) piEventCreate