DPC++ Runtime
Runtime libraries for oneAPI DPC++
handler.cpp
Go to the documentation of this file.
1 //==-------- handler.cpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <algorithm>
10 
14 #include <CL/sycl/detail/pi.hpp>
15 #include <CL/sycl/event.hpp>
16 #include <CL/sycl/handler.hpp>
18 #include <CL/sycl/stream.hpp>
19 #include <detail/config.hpp>
21 #include <detail/handler_impl.hpp>
23 #include <detail/kernel_impl.hpp>
24 #include <detail/queue_impl.hpp>
27 
29 namespace sycl {
30 
31 handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
32  : handler(Queue, Queue, nullptr, IsHost) {}
33 
34 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
35  std::shared_ptr<detail::queue_impl> PrimaryQueue,
36  std::shared_ptr<detail::queue_impl> SecondaryQueue,
37  bool IsHost)
38  : MQueue(std::move(Queue)), MIsHost(IsHost) {
39  // Create extended members and insert handler_impl
40  // TODO: When allowed to break ABI the handler_impl should be made a member
41  // of the handler class.
42  auto ExtendedMembers =
43  std::make_shared<std::vector<detail::ExtendedMemberT>>();
44  detail::ExtendedMemberT HandlerImplMember = {
46  std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
47  std::move(SecondaryQueue))};
48  ExtendedMembers->push_back(std::move(HandlerImplMember));
49  MSharedPtrStorage.push_back(std::move(ExtendedMembers));
50 }
51 
53  std::vector<std::shared_ptr<const void>> &SharedPtrStorage) {
54  assert(!SharedPtrStorage.empty());
55  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
56  detail::convertToExtendedMembers(SharedPtrStorage[0]);
57  assert(ExtendedMembersVec->size() > 0);
58  auto &HandlerImplMember = (*ExtendedMembersVec)[0];
59  assert(detail::ExtendedMembersType::HANDLER_IMPL == HandlerImplMember.MType);
60  return HandlerImplMember;
61 }
62 
64 std::shared_ptr<detail::handler_impl> handler::getHandlerImpl() const {
65  std::lock_guard<std::mutex> Lock(
66  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
67  return std::static_pointer_cast<detail::handler_impl>(
68  getHandlerImplMember(MSharedPtrStorage).MData);
69 }
70 
72 std::shared_ptr<detail::handler_impl> handler::evictHandlerImpl() const {
73  std::lock_guard<std::mutex> Lock(
74  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
75  auto &HandlerImplMember = getHandlerImplMember(MSharedPtrStorage);
76  auto Impl =
77  std::static_pointer_cast<detail::handler_impl>(HandlerImplMember.MData);
78 
79  // Reset the data of the member.
80  // NOTE: We let it stay because removing the front can be expensive. This will
81  // be improved when the impl is made a member of handler. In fact eviction is
82  // likely to not be needed when that happens.
83  HandlerImplMember.MData.reset();
84 
85  return Impl;
86 }
87 
88 // Sets the submission state to indicate that an explicit kernel bundle has been
89 // set. Throws a sycl::exception with errc::invalid if the current state
90 // indicates that a specialization constant has been set.
91 void handler::setStateExplicitKernelBundle() {
92  getHandlerImpl()->setStateExplicitKernelBundle();
93 }
94 
95 // Sets the submission state to indicate that a specialization constant has been
96 // set. Throws a sycl::exception with errc::invalid if the current state
97 // indicates that an explicit kernel bundle has been set.
98 void handler::setStateSpecConstSet() {
99  getHandlerImpl()->setStateSpecConstSet();
100 }
101 
102 // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and
103 // false otherwise.
104 bool handler::isStateExplicitKernelBundle() const {
105  return getHandlerImpl()->isStateExplicitKernelBundle();
106 }
107 
108 // Returns a shared_ptr to kernel_bundle stored in the extended members vector.
109 // If there is no kernel_bundle created:
110 // returns newly created kernel_bundle if Insert is true
111 // returns shared_ptr(nullptr) if Insert is false
112 std::shared_ptr<detail::kernel_bundle_impl>
113 handler::getOrInsertHandlerKernelBundle(bool Insert) const {
114 
115  std::lock_guard<std::mutex> Lock(
116  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
117 
118  assert(!MSharedPtrStorage.empty());
119 
120  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
121  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
122  // Look for the kernel bundle in extended members
123  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr;
124  for (const detail::ExtendedMemberT &EMember : *ExtendedMembersVec)
126  KernelBundleImpPtr =
127  std::static_pointer_cast<detail::kernel_bundle_impl>(EMember.MData);
128  break;
129  }
130 
131  // No kernel bundle yet, create one
132  if (!KernelBundleImpPtr && Insert) {
133  // Create an empty kernel bundle to add kernels to later
134  KernelBundleImpPtr =
135  detail::getSyclObjImpl(get_kernel_bundle<bundle_state::input>(
136  MQueue->get_context(), {MQueue->get_device()}, {}));
137 
138  detail::ExtendedMemberT EMember = {
140  ExtendedMembersVec->push_back(EMember);
141  }
142 
143  return KernelBundleImpPtr;
144 }
145 
146 // Sets kernel bundle to the provided one. Either replaces existing one or
147 // create a new entry in the extended members vector.
148 void handler::setHandlerKernelBundle(
149  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
150  assert(!MSharedPtrStorage.empty());
151 
152  std::lock_guard<std::mutex> Lock(
153  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
154 
155  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExendedMembersVec =
156  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
157 
158  // Look for kernel bundle in extended members and overwrite it.
159  for (detail::ExtendedMemberT &EMember : *ExendedMembersVec) {
161  EMember.MData = NewKernelBundleImpPtr;
162  return;
163  }
164  }
165 
166  // Kernel bundle was set found so we add it.
167  detail::ExtendedMemberT EMember = {
169  NewKernelBundleImpPtr};
170  ExendedMembersVec->push_back(EMember);
171 }
172 
173 event handler::finalize() {
174  // This block of code is needed only for reduction implementation.
175  // It is harmless (does nothing) for everything else.
176  if (MIsFinalized)
177  return MLastEvent;
178  MIsFinalized = true;
179 
180  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr = nullptr;
181  // Kernel_bundles could not be used before CGType version 1
182  if (getCGTypeVersion(MCGType) >
183  static_cast<unsigned int>(detail::CG::CG_VERSION::V0)) {
184  // If there were uses of set_specialization_constant build the kernel_bundle
185  KernelBundleImpPtr = getOrInsertHandlerKernelBundle(/*Insert=*/false);
186  if (KernelBundleImpPtr) {
187  // Make sure implicit non-interop kernel bundles have the kernel
188  if (!KernelBundleImpPtr->isInterop() &&
189  !getHandlerImpl()->isStateExplicitKernelBundle()) {
190  kernel_id KernelID =
192  bool KernelInserted =
193  KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
194  // If kernel was not inserted and the bundle is in input mode we try
195  // building it and trying to find the kernel in executable mode
196  if (!KernelInserted &&
197  KernelBundleImpPtr->get_bundle_state() == bundle_state::input) {
198  auto KernelBundle =
199  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
200  KernelBundleImpPtr);
201  kernel_bundle<bundle_state::executable> ExecKernelBundle =
202  build(KernelBundle);
203  KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
204  setHandlerKernelBundle(KernelBundleImpPtr);
205  KernelInserted =
206  KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
207  }
208  // If the kernel was not found in executable mode we throw an exception
209  if (!KernelInserted)
210  throw sycl::exception(make_error_code(errc::runtime),
211  "Failed to add kernel to kernel bundle.");
212  }
213 
214  switch (KernelBundleImpPtr->get_bundle_state()) {
215  case bundle_state::input: {
216  // Underlying level expects kernel_bundle to be in executable state
217  kernel_bundle<bundle_state::executable> ExecBundle = build(
218  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
219  KernelBundleImpPtr));
220  KernelBundleImpPtr = detail::getSyclObjImpl(ExecBundle);
221  setHandlerKernelBundle(KernelBundleImpPtr);
222  break;
223  }
225  // Nothing to do
226  break;
228  assert(0 && "Expected that the bundle is either in input or executable "
229  "states.");
230  break;
231  }
232  }
233  }
234 
235  const auto &type = getType();
236  if (type == detail::CG::Kernel &&
237  MRequirements.size() + MEvents.size() + MStreamStorage.size() == 0) {
238  // if user does not add a new dependency to the dependency graph, i.e.
239  // the graph is not changed, then this faster path is used to submit kernel
240  // bypassing scheduler and avoiding CommandGroup, Command objects creation.
241 
242  std::vector<RT::PiEvent> RawEvents;
243  detail::EventImplPtr NewEvent;
244  RT::PiEvent *OutEvent = nullptr;
245 
246  auto EnqueueKernel = [&]() {
247  // 'Result' for single point of return
248  cl_int Result = CL_INVALID_VALUE;
249 
250  if (MQueue->is_host()) {
251  MHostKernel->call(
252  MNDRDesc, (NewEvent) ? NewEvent->getHostProfilingInfo() : nullptr);
253  Result = CL_SUCCESS;
254  } else {
255  if (MQueue->getPlugin().getBackend() ==
257  MQueue->getPlugin().call<detail::PiApiKind::piEnqueueKernelLaunch>(
258  nullptr, reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
259  MNDRDesc.Dims, &MNDRDesc.GlobalOffset[0], &MNDRDesc.GlobalSize[0],
260  &MNDRDesc.LocalSize[0], 0, nullptr, nullptr);
261  Result = CL_SUCCESS;
262  } else {
263  Result = enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr,
264  MKernel, MKernelName, MOSModuleHandle,
265  RawEvents, OutEvent, nullptr);
266  }
267  }
268  // assert(Result != CL_INVALID_VALUE);
269  return Result;
270  };
271 
272  bool DiscardEvent = false;
273  if (MQueue->has_discard_events_support()) {
274  // Kernel only uses assert if it's non interop one
275  bool KernelUsesAssert =
276  !(MKernel && MKernel->isInterop()) &&
278  MOSModuleHandle, MKernelName);
279  DiscardEvent = !KernelUsesAssert;
280  }
281 
282  if (DiscardEvent) {
283  if (CL_SUCCESS != EnqueueKernel())
284  throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
285  } else {
286  NewEvent = std::make_shared<detail::event_impl>(MQueue);
287  NewEvent->setContextImpl(MQueue->getContextImplPtr());
288  OutEvent = &NewEvent->getHandleRef();
289 
290  if (CL_SUCCESS != EnqueueKernel())
291  throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION);
292  else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr)
293  NewEvent->setComplete();
294 
295  MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
296  }
297  return MLastEvent;
298  }
299 
300  // Evict handler_impl from extended members to make sure the command group
301  // does not keep it alive.
302  std::shared_ptr<detail::handler_impl> Impl = evictHandlerImpl();
303 
304  std::unique_ptr<detail::CG> CommandGroup;
305  switch (type) {
306  case detail::CG::Kernel:
308  // Copy kernel name here instead of move so that it's available after
309  // running of this method by reductions implementation. This allows for
310  // assert feature to check if kernel uses assertions
311  CommandGroup.reset(new detail::CGExecKernel(
312  std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
313  std::move(MArgsStorage), std::move(MAccStorage),
314  std::move(MSharedPtrStorage), std::move(MRequirements),
315  std::move(MEvents), std::move(MArgs), MKernelName, MOSModuleHandle,
316  std::move(MStreamStorage), std::move(Impl->MAuxiliaryResources),
317  MCGType, MCodeLoc));
318  break;
319  }
321  CommandGroup.reset(new detail::CGInteropTask(
322  std::move(MInteropTask), std::move(MArgsStorage),
323  std::move(MAccStorage), std::move(MSharedPtrStorage),
324  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
325  break;
329  CommandGroup.reset(new detail::CGCopy(
330  MCGType, MSrcPtr, MDstPtr, std::move(MArgsStorage),
331  std::move(MAccStorage), std::move(MSharedPtrStorage),
332  std::move(MRequirements), std::move(MEvents), MCodeLoc));
333  break;
334  case detail::CG::Fill:
335  CommandGroup.reset(new detail::CGFill(
336  std::move(MPattern), MDstPtr, std::move(MArgsStorage),
337  std::move(MAccStorage), std::move(MSharedPtrStorage),
338  std::move(MRequirements), std::move(MEvents), MCodeLoc));
339  break;
341  CommandGroup.reset(new detail::CGUpdateHost(
342  MDstPtr, std::move(MArgsStorage), std::move(MAccStorage),
343  std::move(MSharedPtrStorage), std::move(MRequirements),
344  std::move(MEvents), MCodeLoc));
345  break;
346  case detail::CG::CopyUSM:
347  CommandGroup.reset(new detail::CGCopyUSM(
348  MSrcPtr, MDstPtr, MLength, std::move(MArgsStorage),
349  std::move(MAccStorage), std::move(MSharedPtrStorage),
350  std::move(MRequirements), std::move(MEvents), MCodeLoc));
351  break;
352  case detail::CG::FillUSM:
353  CommandGroup.reset(new detail::CGFillUSM(
354  std::move(MPattern), MDstPtr, MLength, std::move(MArgsStorage),
355  std::move(MAccStorage), std::move(MSharedPtrStorage),
356  std::move(MRequirements), std::move(MEvents), MCodeLoc));
357  break;
359  CommandGroup.reset(new detail::CGPrefetchUSM(
360  MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage),
361  std::move(MSharedPtrStorage), std::move(MRequirements),
362  std::move(MEvents), MCodeLoc));
363  break;
365  CommandGroup.reset(new detail::CGAdviseUSM(
366  MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage),
367  std::move(MSharedPtrStorage), std::move(MRequirements),
368  std::move(MEvents), MCGType, MCodeLoc));
369  break;
371  CommandGroup.reset(new detail::CGHostTask(
372  std::move(MHostTask), MQueue, MQueue->getContextImplPtr(),
373  std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage),
374  std::move(MSharedPtrStorage), std::move(MRequirements),
375  std::move(MEvents), MCGType, MCodeLoc));
376  break;
377  case detail::CG::Barrier:
379  CommandGroup.reset(new detail::CGBarrier(
380  std::move(MEventsWaitWithBarrier), std::move(MArgsStorage),
381  std::move(MAccStorage), std::move(MSharedPtrStorage),
382  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
383  break;
384  case detail::CG::None:
386  std::cout << "WARNING: An empty command group is submitted." << std::endl;
387  }
388  detail::EventImplPtr Event =
389  std::make_shared<cl::sycl::detail::event_impl>();
390  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
391  return MLastEvent;
392  }
393 
394  if (!CommandGroup)
395  throw sycl::runtime_error(
396  "Internal Error. Command group cannot be constructed.",
398 
400  std::move(CommandGroup), std::move(MQueue));
401 
402  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
403  return MLastEvent;
404 }
405 
406 void handler::addReduction(const std::shared_ptr<const void> &ReduObj) {
407  getHandlerImpl()->MAuxiliaryResources.push_back(ReduObj);
408 }
409 
410 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
411  access::target AccTarget) {
413  detail::Requirement *Req = AccImpl.get();
414  // Add accessor to the list of requirements.
415  MRequirements.push_back(Req);
416  // Store copy of the accessor.
417  MAccStorage.push_back(std::move(AccImpl));
418  // Add an accessor to the handler list of associated accessors.
419  // For associated accessors index does not means nothing.
420  MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor,
421  Req, static_cast<int>(AccTarget),
422  /*index*/ 0);
423 }
424 
425 static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index,
426  size_t &IndexShift, int Size,
427  bool IsKernelCreatedFromSource,
428  size_t GlobalSize,
429  std::vector<detail::ArgDesc> &Args,
430  bool isESIMD) {
432  if (AccImpl->PerWI)
433  AccImpl->resize(GlobalSize);
434 
435  Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
436  Index + IndexShift);
437 
438  // TODO ESIMD currently does not suport offset, memory and access ranges -
439  // accessor::init for ESIMD-mode accessor has a single field, translated
440  // to a single kernel argument set above.
441  if (!isESIMD && !IsKernelCreatedFromSource) {
442  // Dimensionality of the buffer is 1 when dimensionality of the
443  // accessor is 0.
444  const size_t SizeAccField =
445  sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
446  ++IndexShift;
447  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
448  &AccImpl->MAccessRange[0], SizeAccField,
449  Index + IndexShift);
450  ++IndexShift;
451  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
452  &AccImpl->MMemoryRange[0], SizeAccField,
453  Index + IndexShift);
454  ++IndexShift;
455  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
456  &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
457  }
458 }
459 
460 // TODO remove this one once ABI breaking changes are allowed.
461 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
462  const int Size, const size_t Index, size_t &IndexShift,
463  bool IsKernelCreatedFromSource) {
464  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
465  false);
466 }
467 
468 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
469  const int Size, const size_t Index, size_t &IndexShift,
470  bool IsKernelCreatedFromSource, bool IsESIMD) {
472 
473  switch (Kind) {
476  MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
477  break;
478  }
480  // Stream contains several accessors inside.
481  stream *S = static_cast<stream *>(Ptr);
482 
483  detail::AccessorBaseHost *GBufBase =
484  static_cast<detail::AccessorBaseHost *>(&S->GlobalBuf);
485  detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase);
486  detail::Requirement *GBufReq = GBufImpl.get();
487  addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size,
488  IsKernelCreatedFromSource,
489  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
490  ++IndexShift;
491  detail::AccessorBaseHost *GOffsetBase =
492  static_cast<detail::AccessorBaseHost *>(&S->GlobalOffset);
493  detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase);
494  detail::Requirement *GOffsetReq = GOfssetImpl.get();
495  addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size,
496  IsKernelCreatedFromSource,
497  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
498  ++IndexShift;
499  detail::AccessorBaseHost *GFlushBase =
500  static_cast<detail::AccessorBaseHost *>(&S->GlobalFlushBuf);
501  detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase);
502  detail::Requirement *GFlushReq = GFlushImpl.get();
503 
504  size_t GlobalSize = MNDRDesc.GlobalSize.size();
505  // If work group size wasn't set explicitly then it must be recieved
506  // from kernel attribute or set to default values.
507  // For now we can't get this attribute here.
508  // So we just suppose that WG size is always default for stream.
509  // TODO adjust MNDRDesc when device image contains kernel's attribute
510  if (GlobalSize == 0) {
511  // Suppose that work group size is 1 for every dimension
512  GlobalSize = MNDRDesc.NumWorkGroups.size();
513  }
514  addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size,
515  IsKernelCreatedFromSource, GlobalSize, MArgs,
516  IsESIMD);
517  ++IndexShift;
518  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
519  &S->FlushBufferSize, sizeof(S->FlushBufferSize),
520  Index + IndexShift);
521 
522  break;
523  }
525  // For args kind of accessor Size is information about accessor.
526  // The first 11 bits of Size encodes the accessor target.
527  const access::target AccTarget = static_cast<access::target>(Size & 0x7ff);
528  switch (AccTarget) {
531  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
532  addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size,
533  IsKernelCreatedFromSource,
534  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
535  break;
536  }
537  case access::target::local: {
538  detail::LocalAccessorImplHost *LAcc =
539  static_cast<detail::LocalAccessorImplHost *>(Ptr);
540 
541  range<3> &Size = LAcc->MSize;
542  const int Dims = LAcc->MDims;
543  int SizeInBytes = LAcc->MElemSize;
544  for (int I = 0; I < Dims; ++I)
545  SizeInBytes *= Size[I];
546  // Some backends do not accept zero-sized local memory arguments, so we
547  // make it a minimum allocation of 1 byte.
548  SizeInBytes = std::max(SizeInBytes, 1);
549  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
550  SizeInBytes, Index + IndexShift);
551  if (!IsKernelCreatedFromSource) {
552  ++IndexShift;
553  const size_t SizeAccField = Dims * sizeof(Size[0]);
554  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
555  SizeAccField, Index + IndexShift);
556  ++IndexShift;
557  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
558  SizeAccField, Index + IndexShift);
559  ++IndexShift;
560  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
561  SizeAccField, Index + IndexShift);
562  }
563  break;
564  }
567  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
568  MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
569  if (!IsKernelCreatedFromSource) {
570  // TODO Handle additional kernel arguments for image class
571  // if the compiler front-end adds them.
572  }
573  break;
574  }
577  throw cl::sycl::invalid_parameter_error(
578  "Unsupported accessor target case.", PI_INVALID_OPERATION);
579  break;
580  }
581  }
582  break;
583  }
585  MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler),
586  Index + IndexShift);
587  break;
588  }
590  MArgs.emplace_back(
592  Index + IndexShift);
593  break;
594  }
596  throw runtime_error("Invalid kernel param kind", PI_INVALID_VALUE);
597  break;
598  }
599 }
600 
601 // The argument can take up more space to store additional information about
602 // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor.
603 // We use the worst-case estimate because the lifetime of the vector is short.
604 // In processArg the kind_stream case introduces the maximum number of
605 // additional arguments. The case adds additional 12 arguments to the currently
606 // processed argument, hence worst-case estimate is 12+1=13.
607 // TODO: the constant can be removed if the size of MArgs will be calculated at
608 // compile time.
609 inline constexpr size_t MaxNumAdditionalArgs = 13;
610 
611 void handler::extractArgsAndReqs() {
612  assert(MKernel && "MKernel is not initialized");
613  std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
614  MArgs.clear();
615 
616  std::sort(
617  UnPreparedArgs.begin(), UnPreparedArgs.end(),
618  [](const detail::ArgDesc &first, const detail::ArgDesc &second) -> bool {
619  return (first.MIndex < second.MIndex);
620  });
621 
622  const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
623  MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size());
624 
625  size_t IndexShift = 0;
626  for (size_t I = 0; I < UnPreparedArgs.size(); ++I) {
627  void *Ptr = UnPreparedArgs[I].MPtr;
628  const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType;
629  const int &Size = UnPreparedArgs[I].MSize;
630  const int Index = UnPreparedArgs[I].MIndex;
631  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
632  false);
633  }
634 }
635 
636 // TODO remove once ABI breaking changes are allowed
637 void handler::extractArgsAndReqsFromLambda(
638  char *LambdaPtr, size_t KernelArgsNum,
639  const detail::kernel_param_desc_t *KernelArgs) {
640  extractArgsAndReqsFromLambda(LambdaPtr, KernelArgsNum, KernelArgs, false);
641 }
642 
643 void handler::extractArgsAndReqsFromLambda(
644  char *LambdaPtr, size_t KernelArgsNum,
645  const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
646  const bool IsKernelCreatedFromSource = false;
647  size_t IndexShift = 0;
648  MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum);
649 
650  for (size_t I = 0; I < KernelArgsNum; ++I) {
651  void *Ptr = LambdaPtr + KernelArgs[I].offset;
652  const detail::kernel_param_kind_t &Kind = KernelArgs[I].kind;
653  const int &Size = KernelArgs[I].info;
655  // For args kind of accessor Size is information about accessor.
656  // The first 11 bits of Size encodes the accessor target.
657  const access::target AccTarget =
658  static_cast<access::target>(Size & 0x7ff);
659  if ((AccTarget == access::target::device ||
660  AccTarget == access::target::constant_buffer) ||
661  (AccTarget == access::target::image ||
662  AccTarget == access::target::image_array)) {
663  detail::AccessorBaseHost *AccBase =
664  static_cast<detail::AccessorBaseHost *>(Ptr);
665  Ptr = detail::getSyclObjImpl(*AccBase).get();
666  } else if (AccTarget == access::target::local) {
667  detail::LocalAccessorBaseHost *LocalAccBase =
668  static_cast<detail::LocalAccessorBaseHost *>(Ptr);
669  Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
670  }
671  }
672  processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
673  IsESIMD);
674  }
675 }
676 
677 // Calling methods of kernel_impl requires knowledge of class layout.
678 // As this is impossible in header, there's a function that calls necessary
679 // method inside the library and returns the result.
680 std::string handler::getKernelName() {
681  return MKernel->get_info<info::kernel::function_name>();
682 }
683 
684 void handler::verifyUsedKernelBundle(const std::string &KernelName) {
685  auto UsedKernelBundleImplPtr =
686  getOrInsertHandlerKernelBundle(/*Insert=*/false);
687  if (!UsedKernelBundleImplPtr)
688  return;
689 
690  // Implicit kernel bundles are populated late so we ignore them
691  if (!getHandlerImpl()->isStateExplicitKernelBundle())
692  return;
693 
694  kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
696  if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
697  throw sycl::exception(
699  "The kernel bundle in use does not contain the kernel");
700 }
701 
702 void handler::ext_oneapi_barrier(const std::vector<event> &WaitList) {
703  throwIfActionIsCreated();
704  MCGType = detail::CG::BarrierWaitlist;
705  MEventsWaitWithBarrier.resize(WaitList.size());
706  std::transform(
707  WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
708  [](const event &Event) { return detail::getSyclObjImpl(Event); });
709 }
710 
711 __SYCL2020_DEPRECATED("use 'ext_oneapi_barrier' instead")
712 void handler::barrier(const std::vector<event> &WaitList) {
713  handler::ext_oneapi_barrier(WaitList);
714 }
715 
716 using namespace sycl::detail;
717 bool handler::DisableRangeRounding() {
719 }
720 
721 bool handler::RangeRoundingTrace() {
723 }
724 
725 void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
726  size_t &MinRange) {
728  MinFactor, GoodFactor, MinRange);
729 }
730 
731 void handler::memcpy(void *Dest, const void *Src, size_t Count) {
732  throwIfActionIsCreated();
733  MSrcPtr = const_cast<void *>(Src);
734  MDstPtr = Dest;
735  MLength = Count;
736  setType(detail::CG::CopyUSM);
737 }
738 
739 void handler::memset(void *Dest, int Value, size_t Count) {
740  throwIfActionIsCreated();
741  MDstPtr = Dest;
742  MPattern.push_back(static_cast<char>(Value));
743  MLength = Count;
744  setType(detail::CG::FillUSM);
745 }
746 
747 void handler::prefetch(const void *Ptr, size_t Count) {
748  throwIfActionIsCreated();
749  MDstPtr = const_cast<void *>(Ptr);
750  MLength = Count;
751  setType(detail::CG::PrefetchUSM);
752 }
753 
754 void handler::mem_advise(const void *Ptr, size_t Count, int Advice) {
755  throwIfActionIsCreated();
756  MDstPtr = const_cast<void *>(Ptr);
757  MLength = Count;
758  setType(detail::CG::AdviseUSM);
759 
760  assert(!MSharedPtrStorage.empty());
761 
762  std::lock_guard<std::mutex> Lock(
763  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
764 
765  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
766  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
767 
768  detail::ExtendedMemberT EMember = {
770  std::make_shared<pi_mem_advice>(pi_mem_advice(Advice))};
771 
772  ExtendedMembersVec->push_back(EMember);
773 }
774 
776  const kernel_bundle<bundle_state::executable> &ExecBundle) {
777 
778  std::shared_ptr<detail::queue_impl> PrimaryQueue =
779  getHandlerImpl()->MSubmissionPrimaryQueue;
780  if (PrimaryQueue->get_context() != ExecBundle.get_context())
781  throw sycl::exception(
783  "Context associated with the primary queue is different from the "
784  "context associated with the kernel bundle");
785 
786  std::shared_ptr<detail::queue_impl> SecondaryQueue =
787  getHandlerImpl()->MSubmissionSecondaryQueue;
788  if (SecondaryQueue &&
789  SecondaryQueue->get_context() != ExecBundle.get_context())
790  throw sycl::exception(
792  "Context associated with the secondary queue is different from the "
793  "context associated with the kernel bundle");
794 
795  setStateExplicitKernelBundle();
796  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
797 }
798 
800  auto EventImpl = detail::getSyclObjImpl(Event);
801  if (EventImpl->isDiscarded()) {
803  "Queue operation cannot depend on discarded event.");
804  }
805  MEvents.push_back(EventImpl);
806 }
807 
808 void handler::depends_on(const std::vector<event> &Events) {
809  for (const event &Event : Events) {
810  auto EventImpl = detail::getSyclObjImpl(Event);
811  if (EventImpl->isDiscarded()) {
812  throw sycl::exception(
814  "Queue operation cannot depend on discarded event.");
815  }
816  MEvents.push_back(EventImpl);
817  }
818 }
819 
820 } // namespace sycl
821 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::CG::CopyUSM
@ CopyUSM
Definition: cg.hpp:167
cl::sycl::detail::Scheduler::addCG
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, QueueImplPtr Queue)
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:73
cl::sycl::handler::mem_advise
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:754
cl::sycl::detail::AccessorImplHost::MDims
unsigned int MDims
Definition: accessor_impl.hpp:114
piEnqueueKernelLaunch
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_esimd_emulator.cpp:1759
cl::sycl::kernel_bundle< bundle_state::executable >
cl::sycl::detail::SYCLConfig::get
static const char * get()
Definition: config.hpp:109
cl::sycl::kernel_bundle::get_context
context get_context() const noexcept
Definition: kernel_bundle.hpp:212
cl::sycl::detail::CG::BarrierWaitlist
@ BarrierWaitlist
Definition: cg.hpp:163
cl::sycl::info::device
device
Definition: info_desc.hpp:53
cl::sycl::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:31
cl::sycl::detail::GlobalHandler::instance
static GlobalHandler & instance()
Definition: global_handler.cpp:35
PI_INVALID_OPERATION
@ PI_INVALID_OPERATION
Definition: pi.h:88
cl::sycl::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:714
cl::sycl::handler::memcpy
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: handler.cpp:731
cl::sycl::detail::ExtendedMemberT
Definition: cg.hpp:107
config.hpp
cl::sycl::detail::createSyclObjFromImpl
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: common.hpp:221
cl::sycl::detail::pi::PI_TRACE_ALL
@ PI_TRACE_ALL
Definition: pi.hpp:58
cl::sycl::access::target::local
@ local
cl::sycl::access::target::host_image
@ host_image
cl::sycl::bundle_state::object
@ object
cl::sycl::detail::ProgramManager::kernelUsesAssert
bool kernelUsesAssert(OSModuleHandle M, const std::string &KernelName) const
Definition: program_manager.cpp:1080
cl::sycl::backend::ext_intel_esimd_emulator
@ ext_intel_esimd_emulator
cl::__ESIMD_NS::barrier
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:891
cl::sycl::detail::kernel_param_kind_t::kind_sampler
@ kind_sampler
cl::sycl::detail::ExtendedMembersType::HANDLER_IMPL
@ HANDLER_IMPL
cl::sycl::addArgsForGlobalAccessor
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
Definition: handler.cpp:425
cl::sycl::detail::SYCLConfig< SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS >::GetSettings
static void GetSettings(size_t &MinFactor, size_t &GoodFactor, size_t &MinRange)
Definition: config.hpp:235
event.hpp
cl::sycl::access::target::image_array
@ image_array
cl::sycl::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:121
cl::sycl::detail::ProgramManager::getSYCLKernelID
kernel_id getSYCLKernelID(const std::string &KernelName)
Definition: program_manager.cpp:1442
cl::sycl::detail::CG::AdviseUSM
@ AdviseUSM
Definition: cg.hpp:172
helpers.hpp
sycl
Definition: invoke_simd.hpp:68
cl::sycl::detail::kernel_param_kind_t::kind_pointer
@ kind_pointer
queue_impl.hpp
pi.hpp
cl::sycl::bundle_state::input
@ input
cl::sycl::MaxNumAdditionalArgs
constexpr size_t MaxNumAdditionalArgs
Definition: handler.cpp:609
scheduler.hpp
_pi_kernel
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:626
cl::sycl::detail::Requirement
AccessorImplHost Requirement
Definition: accessor_impl.hpp:218
handler_impl.hpp
cl::sycl::handler::depends_on
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:799
cl::sycl::detail::CG::CodeplayHostTask
@ CodeplayHostTask
Definition: cg.hpp:171
cl::sycl::detail::CG::FillUSM
@ FillUSM
Definition: cg.hpp:168
cl::sycl::detail::CG::PrefetchUSM
@ PrefetchUSM
Definition: cg.hpp:169
cl::sycl::detail::AccessorImplHost
Definition: accessor_impl.hpp:74
cl::sycl::detail::AccessorImplHost::PerWI
bool PerWI
Definition: accessor_impl.hpp:123
cl::sycl::detail::enqueueImpKernel
cl_int enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, const detail::OSModuleHandle &OSModuleHandle, std::vector< RT::PiEvent > &RawEvents, RT::PiEvent *OutEvent, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
Definition: commands.cpp:2103
cl::sycl::detail::getCGTypeVersion
constexpr unsigned char getCGTypeVersion(unsigned int Type)
Definition: cg.hpp:139
cl::sycl::handler::memset
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:739
cl::sycl::access::target::host_buffer
@ host_buffer
cl::sycl::detail::Scheduler::getInstance
static Scheduler & getInstance()
Definition: scheduler.cpp:209
cl::sycl::handler::prefetch
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:747
cl::sycl::detail::AccessorImplHost::MOffset
id< 3 > MOffset
Definition: accessor_impl.hpp:105
cl::sycl::detail::CG::CG_VERSION::V0
@ V0
cl::sycl::detail::ArgDesc
Definition: cg_types.hpp:27
cl::sycl::handler::use_kernel_bundle
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
Definition: handler.cpp:775
cl::sycl::detail::AccessorImplPtr
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor_impl.hpp:130
cl::sycl::detail::CG::CopyAccToAcc
@ CopyAccToAcc
Definition: cg.hpp:161
cl::sycl::handler::ext_oneapi_barrier
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2537
cl::sycl::detail::CG::Barrier
@ Barrier
Definition: cg.hpp:162
cl::sycl::detail::EventImplPtr
std::shared_ptr< detail::event_impl > EventImplPtr
Definition: memory_manager.hpp:31
commands.hpp
cl::sycl::detail::kernel_param_kind_t::kind_std_layout
@ kind_std_layout
cl::sycl::getHandlerImplMember
static detail::ExtendedMemberT & getHandlerImplMember(std::vector< std::shared_ptr< const void >> &SharedPtrStorage)
Definition: handler.cpp:52
cl::sycl::access::target
target
Definition: access.hpp:17
cl::sycl::bundle_state::executable
@ executable
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::kernel_param_kind_t::kind_specialization_constants_buffer
@ kind_specialization_constants_buffer
global_handler.hpp
cl::sycl::detail::__SYCL2020_DEPRECATED
struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") IsDeprecatedDeviceCopyable< T
cl::sycl::detail::ProgramManager::getInstance
static ProgramManager & getInstance()
Definition: program_manager.cpp:63
cl::sycl::detail::kernel_param_kind_t::kind_stream
@ kind_stream
pi_mem_advice
_pi_mem_advice pi_mem_advice
Definition: pi.h:649
cl::sycl::handler
Command group handler class.
Definition: handler.hpp:361
cl::sycl::detail::CG::None
@ None
Definition: cg.hpp:157
cl::sycl::detail::kernel_param_kind_t::kind_accessor
@ kind_accessor
cl::sycl::access::target::constant_buffer
@ constant_buffer
PI_INVALID_VALUE
@ PI_INVALID_VALUE
Definition: pi.h:91
cl::sycl::access::target::image
@ image
cl::sycl::detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE
@ HANDLER_KERNEL_BUNDLE
cl::sycl::detail::CG::UpdateHost
@ UpdateHost
Definition: cg.hpp:165
cl::sycl::detail::CG::RunOnHostIntel
@ RunOnHostIntel
Definition: cg.hpp:166
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:204
cl::sycl::cl_int
std::int32_t cl_int
Definition: aliases.hpp:82
handler.hpp
cl::sycl::detail::CG::CopyPtrToAcc
@ CopyPtrToAcc
Definition: cg.hpp:160
cl::sycl::detail::AccessorImplHost::resize
void resize(size_t GlobalSize)
Definition: accessor_impl.cpp:27
cl::sycl::errc::kernel_not_supported
@ kernel_not_supported
std
Definition: accessor.hpp:2616
kernel_desc.hpp
_pi_event
PI Event mapping to CUevent.
Definition: pi_cuda.hpp:460
cl::sycl::detail::AccessorImplHost::MMemoryRange
range< 3 > MMemoryRange
Definition: accessor_impl.hpp:109
cl::sycl::access::target::device
@ device
cl::sycl::detail::kernel_param_kind_t::kind_invalid
@ kind_invalid
cl::sycl::detail::CG::CopyAccToPtr
@ CopyAccToPtr
Definition: cg.hpp:159
cl::sycl::detail::ExtendedMembersType::HANDLER_MEM_ADVICE
@ HANDLER_MEM_ADVICE
cl::sycl::detail::getDeviceFromHandler
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:15
cl::sycl::detail::convertToExtendedMembers
static std::shared_ptr< std::vector< ExtendedMemberT > > convertToExtendedMembers(const std::shared_ptr< const void > &SPtr)
Definition: cg.hpp:113
cl::sycl::errc::invalid
@ invalid
kernel_bundle_impl.hpp
cl::sycl::detail::kernel_param_kind_t
kernel_param_kind_t
Definition: kernel_desc.hpp:25
cl::sycl::detail::CG::Kernel
@ Kernel
Definition: cg.hpp:158
cl::sycl::detail::CG::CodeplayInteropTask
@ CodeplayInteropTask
Definition: cg.hpp:170
stream.hpp
cl::sycl::detail::CG::Fill
@ Fill
Definition: cg.hpp:164
info_desc.hpp
common.hpp
cl::sycl::info::kernel::function_name
@ function_name
cl::sycl::exception
Definition: exception.hpp:63
kernel_impl.hpp
cl::sycl::detail::AccessorImplHost::MAccessRange
range< 3 > MAccessRange
Definition: accessor_impl.hpp:107
cl::sycl::detail::pi::trace
bool trace(TraceLevel level)
Definition: pi.cpp:368
cl::sycl::errc::runtime
@ runtime
cl::sycl::detail::get_kernel_id_impl
kernel_id get_kernel_id_impl(std::string KernelName)
Definition: kernel_bundle.cpp:116
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12