DPC++ Runtime
Runtime libraries for oneAPI Data Parallel C++
handler.cpp
Go to the documentation of this file.
1 //==-------- handler.cpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <algorithm>
10 
14 #include <CL/sycl/event.hpp>
15 #include <CL/sycl/handler.hpp>
17 #include <CL/sycl/stream.hpp>
18 #include <detail/config.hpp>
20 #include <detail/handler_impl.hpp>
22 #include <detail/kernel_impl.hpp>
23 #include <detail/queue_impl.hpp>
25 
27 namespace sycl {
28 
29 handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
30  : handler(Queue, Queue, nullptr, IsHost) {}
31 
32 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
33  std::shared_ptr<detail::queue_impl> PrimaryQueue,
34  std::shared_ptr<detail::queue_impl> SecondaryQueue,
35  bool IsHost)
36  : MQueue(std::move(Queue)), MIsHost(IsHost) {
37  // Create extended members and insert handler_impl
38  // TODO: When allowed to break ABI the handler_impl should be made a member
39  // of the handler class.
40  auto ExtendedMembers =
41  std::make_shared<std::vector<detail::ExtendedMemberT>>();
42  detail::ExtendedMemberT HandlerImplMember = {
44  std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
45  std::move(SecondaryQueue))};
46  ExtendedMembers->push_back(std::move(HandlerImplMember));
47  MSharedPtrStorage.push_back(std::move(ExtendedMembers));
48 }
49 
51 std::shared_ptr<detail::handler_impl> handler::getHandlerImpl() const {
52  std::lock_guard<std::mutex> Lock(
53  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
54 
55  assert(!MSharedPtrStorage.empty());
56 
57  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
58  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
59 
60  assert(ExtendedMembersVec->size() > 0);
61 
62  auto HandlerImplMember = (*ExtendedMembersVec)[0];
63 
64  assert(detail::ExtendedMembersType::HANDLER_IMPL == HandlerImplMember.MType);
65 
66  return std::static_pointer_cast<detail::handler_impl>(
67  HandlerImplMember.MData);
68 }
69 
70 // Sets the submission state to indicate that an explicit kernel bundle has been
71 // set. Throws a sycl::exception with errc::invalid if the current state
72 // indicates that a specialization constant has been set.
73 void handler::setStateExplicitKernelBundle() {
74  getHandlerImpl()->setStateExplicitKernelBundle();
75 }
76 
77 // Sets the submission state to indicate that a specialization constant has been
78 // set. Throws a sycl::exception with errc::invalid if the current state
79 // indicates that an explicit kernel bundle has been set.
80 void handler::setStateSpecConstSet() {
81  getHandlerImpl()->setStateSpecConstSet();
82 }
83 
84 // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and
85 // false otherwise.
86 bool handler::isStateExplicitKernelBundle() const {
87  return getHandlerImpl()->isStateExplicitKernelBundle();
88 }
89 
90 // Returns a shared_ptr to kernel_bundle stored in the extended members vector.
91 // If there is no kernel_bundle created:
92 // returns newly created kernel_bundle if Insert is true
93 // returns shared_ptr(nullptr) if Insert is false
94 std::shared_ptr<detail::kernel_bundle_impl>
95 handler::getOrInsertHandlerKernelBundle(bool Insert) const {
96 
97  std::lock_guard<std::mutex> Lock(
98  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
99 
100  assert(!MSharedPtrStorage.empty());
101 
102  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
103  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
104  // Look for the kernel bundle in extended members
105  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr;
106  for (const detail::ExtendedMemberT &EMember : *ExtendedMembersVec)
108  KernelBundleImpPtr =
109  std::static_pointer_cast<detail::kernel_bundle_impl>(EMember.MData);
110  break;
111  }
112 
113  // No kernel bundle yet, create one
114  if (!KernelBundleImpPtr && Insert) {
115  KernelBundleImpPtr = detail::getSyclObjImpl(
116  get_kernel_bundle<bundle_state::input>(MQueue->get_context()));
117  if (KernelBundleImpPtr->empty()) {
118  KernelBundleImpPtr = detail::getSyclObjImpl(
119  get_kernel_bundle<bundle_state::executable>(MQueue->get_context()));
120  }
121 
122  detail::ExtendedMemberT EMember = {
124  ExtendedMembersVec->push_back(EMember);
125  }
126 
127  return KernelBundleImpPtr;
128 }
129 
130 // Sets kernel bundle to the provided one. Either replaces existing one or
131 // create a new entry in the extended members vector.
132 void handler::setHandlerKernelBundle(
133  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
134  assert(!MSharedPtrStorage.empty());
135 
136  std::lock_guard<std::mutex> Lock(
137  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
138 
139  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExendedMembersVec =
140  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
141 
142  // Look for kernel bundle in extended members and overwrite it.
143  for (detail::ExtendedMemberT &EMember : *ExendedMembersVec) {
145  EMember.MData = NewKernelBundleImpPtr;
146  return;
147  }
148  }
149 
150  // Kernel bundle was set found so we add it.
151  detail::ExtendedMemberT EMember = {
153  NewKernelBundleImpPtr};
154  ExendedMembersVec->push_back(EMember);
155 }
156 
157 event handler::finalize() {
158  // This block of code is needed only for reduction implementation.
159  // It is harmless (does nothing) for everything else.
160  if (MIsFinalized)
161  return MLastEvent;
162  MIsFinalized = true;
163 
164  // Kernel_bundles could not be used before CGType version 1
165  if (getCGTypeVersion(MCGType) >
166  static_cast<unsigned int>(detail::CG::CG_VERSION::V0)) {
167  // If there were uses of set_specialization_constant build the kernel_bundle
168  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
169  getOrInsertHandlerKernelBundle(/*Insert=*/false);
170  if (KernelBundleImpPtr) {
171  switch (KernelBundleImpPtr->get_bundle_state()) {
172  case bundle_state::input: {
173  // Underlying level expects kernel_bundle to be in executable state
174  kernel_bundle<bundle_state::executable> ExecBundle = build(
175  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
176  KernelBundleImpPtr));
177  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
178  break;
179  }
181  // Nothing to do
182  break;
184  assert(0 && "Expected that the bundle is either in input or executable "
185  "states.");
186  break;
187  }
188  }
189  }
190 
191  std::unique_ptr<detail::CG> CommandGroup;
192  switch (getType()) {
193  case detail::CG::Kernel:
195  // Copy kernel name here instead of move so that it's available after
196  // running of this method by reductions implementation. This allows for
197  // assert feature to check if kernel uses assertions
198  CommandGroup.reset(new detail::CGExecKernel(
199  std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
200  std::move(MArgsStorage), std::move(MAccStorage),
201  std::move(MSharedPtrStorage), std::move(MRequirements),
202  std::move(MEvents), std::move(MArgs), MKernelName, MOSModuleHandle,
203  std::move(MStreamStorage), MCGType, MCodeLoc));
204  break;
205  }
207  CommandGroup.reset(new detail::CGInteropTask(
208  std::move(MInteropTask), std::move(MArgsStorage),
209  std::move(MAccStorage), std::move(MSharedPtrStorage),
210  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
211  break;
215  CommandGroup.reset(new detail::CGCopy(
216  MCGType, MSrcPtr, MDstPtr, std::move(MArgsStorage),
217  std::move(MAccStorage), std::move(MSharedPtrStorage),
218  std::move(MRequirements), std::move(MEvents), MCodeLoc));
219  break;
220  case detail::CG::Fill:
221  CommandGroup.reset(new detail::CGFill(
222  std::move(MPattern), MDstPtr, std::move(MArgsStorage),
223  std::move(MAccStorage), std::move(MSharedPtrStorage),
224  std::move(MRequirements), std::move(MEvents), MCodeLoc));
225  break;
227  CommandGroup.reset(new detail::CGUpdateHost(
228  MDstPtr, std::move(MArgsStorage), std::move(MAccStorage),
229  std::move(MSharedPtrStorage), std::move(MRequirements),
230  std::move(MEvents), MCodeLoc));
231  break;
232  case detail::CG::CopyUSM:
233  CommandGroup.reset(new detail::CGCopyUSM(
234  MSrcPtr, MDstPtr, MLength, std::move(MArgsStorage),
235  std::move(MAccStorage), std::move(MSharedPtrStorage),
236  std::move(MRequirements), std::move(MEvents), MCodeLoc));
237  break;
238  case detail::CG::FillUSM:
239  CommandGroup.reset(new detail::CGFillUSM(
240  std::move(MPattern), MDstPtr, MLength, std::move(MArgsStorage),
241  std::move(MAccStorage), std::move(MSharedPtrStorage),
242  std::move(MRequirements), std::move(MEvents), MCodeLoc));
243  break;
245  CommandGroup.reset(new detail::CGPrefetchUSM(
246  MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage),
247  std::move(MSharedPtrStorage), std::move(MRequirements),
248  std::move(MEvents), MCodeLoc));
249  break;
251  CommandGroup.reset(new detail::CGAdviseUSM(
252  MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage),
253  std::move(MSharedPtrStorage), std::move(MRequirements),
254  std::move(MEvents), MCGType, MCodeLoc));
255  break;
257  CommandGroup.reset(new detail::CGHostTask(
258  std::move(MHostTask), MQueue, MQueue->getContextImplPtr(),
259  std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage),
260  std::move(MSharedPtrStorage), std::move(MRequirements),
261  std::move(MEvents), MCGType, MCodeLoc));
262  break;
263  case detail::CG::Barrier:
265  CommandGroup.reset(new detail::CGBarrier(
266  std::move(MEventsWaitWithBarrier), std::move(MArgsStorage),
267  std::move(MAccStorage), std::move(MSharedPtrStorage),
268  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
269  break;
270  case detail::CG::None:
272  std::cout << "WARNING: An empty command group is submitted." << std::endl;
273  }
274  detail::EventImplPtr Event =
275  std::make_shared<cl::sycl::detail::event_impl>();
276  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
277  return MLastEvent;
278  }
279 
280  if (!CommandGroup)
281  throw sycl::runtime_error(
282  "Internal Error. Command group cannot be constructed.",
284 
286  std::move(CommandGroup), std::move(MQueue));
287 
288  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
289  return MLastEvent;
290 }
291 
292 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
293  access::target AccTarget) {
295  detail::Requirement *Req = AccImpl.get();
296  // Add accessor to the list of requirements.
297  MRequirements.push_back(Req);
298  // Store copy of the accessor.
299  MAccStorage.push_back(std::move(AccImpl));
300  // Add an accessor to the handler list of associated accessors.
301  // For associated accessors index does not means nothing.
302  MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor,
303  Req, static_cast<int>(AccTarget),
304  /*index*/ 0);
305 }
306 
307 static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index,
308  size_t &IndexShift, int Size,
309  bool IsKernelCreatedFromSource,
310  size_t GlobalSize,
311  std::vector<detail::ArgDesc> &Args,
312  bool isESIMD) {
314  if (AccImpl->PerWI)
315  AccImpl->resize(GlobalSize);
316 
317  Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
318  Index + IndexShift);
319 
320  // TODO ESIMD currently does not suport offset, memory and access ranges -
321  // accessor::init for ESIMD-mode accessor has a single field, translated
322  // to a single kernel argument set above.
323  if (!isESIMD && !IsKernelCreatedFromSource) {
324  // Dimensionality of the buffer is 1 when dimensionality of the
325  // accessor is 0.
326  const size_t SizeAccField =
327  sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
328  ++IndexShift;
329  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
330  &AccImpl->MAccessRange[0], SizeAccField,
331  Index + IndexShift);
332  ++IndexShift;
333  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
334  &AccImpl->MMemoryRange[0], SizeAccField,
335  Index + IndexShift);
336  ++IndexShift;
337  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
338  &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
339  }
340 }
341 
342 // TODO remove this one once ABI breaking changes are allowed.
343 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
344  const int Size, const size_t Index, size_t &IndexShift,
345  bool IsKernelCreatedFromSource) {
346  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
347  false);
348 }
349 
350 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
351  const int Size, const size_t Index, size_t &IndexShift,
352  bool IsKernelCreatedFromSource, bool IsESIMD) {
354 
355  switch (Kind) {
358  MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
359  break;
360  }
362  // Stream contains several accessors inside.
363  stream *S = static_cast<stream *>(Ptr);
364 
365  detail::AccessorBaseHost *GBufBase =
366  static_cast<detail::AccessorBaseHost *>(&S->GlobalBuf);
367  detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase);
368  detail::Requirement *GBufReq = GBufImpl.get();
369  addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size,
370  IsKernelCreatedFromSource,
371  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
372  ++IndexShift;
373  detail::AccessorBaseHost *GOffsetBase =
374  static_cast<detail::AccessorBaseHost *>(&S->GlobalOffset);
375  detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase);
376  detail::Requirement *GOffsetReq = GOfssetImpl.get();
377  addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size,
378  IsKernelCreatedFromSource,
379  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
380  ++IndexShift;
381  detail::AccessorBaseHost *GFlushBase =
382  static_cast<detail::AccessorBaseHost *>(&S->GlobalFlushBuf);
383  detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase);
384  detail::Requirement *GFlushReq = GFlushImpl.get();
385  addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size,
386  IsKernelCreatedFromSource,
387  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
388  ++IndexShift;
389  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
390  &S->FlushBufferSize, sizeof(S->FlushBufferSize),
391  Index + IndexShift);
392 
393  break;
394  }
396  // For args kind of accessor Size is information about accessor.
397  // The first 11 bits of Size encodes the accessor target.
398  const access::target AccTarget = static_cast<access::target>(Size & 0x7ff);
399  switch (AccTarget) {
402  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
403  addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size,
404  IsKernelCreatedFromSource,
405  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
406  break;
407  }
408  case access::target::local: {
409  detail::LocalAccessorImplHost *LAcc =
410  static_cast<detail::LocalAccessorImplHost *>(Ptr);
411 
412  range<3> &Size = LAcc->MSize;
413  const int Dims = LAcc->MDims;
414  int SizeInBytes = LAcc->MElemSize;
415  for (int I = 0; I < Dims; ++I)
416  SizeInBytes *= Size[I];
417  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
418  SizeInBytes, Index + IndexShift);
419  if (!IsKernelCreatedFromSource) {
420  ++IndexShift;
421  const size_t SizeAccField = Dims * sizeof(Size[0]);
422  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
423  SizeAccField, Index + IndexShift);
424  ++IndexShift;
425  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
426  SizeAccField, Index + IndexShift);
427  ++IndexShift;
428  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
429  SizeAccField, Index + IndexShift);
430  }
431  break;
432  }
435  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
436  MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
437  if (!IsKernelCreatedFromSource) {
438  // TODO Handle additional kernel arguments for image class
439  // if the compiler front-end adds them.
440  }
441  break;
442  }
445  throw cl::sycl::invalid_parameter_error(
446  "Unsupported accessor target case.", PI_INVALID_OPERATION);
447  break;
448  }
449  }
450  break;
451  }
453  MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler),
454  Index + IndexShift);
455  break;
456  }
458  MArgs.emplace_back(
460  Index + IndexShift);
461  break;
462  }
464  throw runtime_error("Invalid kernel param kind", PI_INVALID_VALUE);
465  break;
466  }
467 }
468 
469 // The argument can take up more space to store additional information about
470 // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor.
471 // We use the worst-case estimate because the lifetime of the vector is short.
472 // In processArg the kind_stream case introduces the maximum number of
473 // additional arguments. The case adds additional 12 arguments to the currently
474 // processed argument, hence worst-case estimate is 12+1=13.
475 // TODO: the constant can be removed if the size of MArgs will be calculated at
476 // compile time.
477 inline constexpr size_t MaxNumAdditionalArgs = 13;
478 
479 void handler::extractArgsAndReqs() {
480  assert(MKernel && "MKernel is not initialized");
481  std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
482  MArgs.clear();
483 
484  std::sort(
485  UnPreparedArgs.begin(), UnPreparedArgs.end(),
486  [](const detail::ArgDesc &first, const detail::ArgDesc &second) -> bool {
487  return (first.MIndex < second.MIndex);
488  });
489 
490  const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
491  MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size());
492 
493  size_t IndexShift = 0;
494  for (size_t I = 0; I < UnPreparedArgs.size(); ++I) {
495  void *Ptr = UnPreparedArgs[I].MPtr;
496  const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType;
497  const int &Size = UnPreparedArgs[I].MSize;
498  const int Index = UnPreparedArgs[I].MIndex;
499  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
500  false);
501  }
502 }
503 
504 // TODO remove once ABI breaking changes are allowed
505 void handler::extractArgsAndReqsFromLambda(
506  char *LambdaPtr, size_t KernelArgsNum,
507  const detail::kernel_param_desc_t *KernelArgs) {
508  extractArgsAndReqsFromLambda(LambdaPtr, KernelArgsNum, KernelArgs, false);
509 }
510 
511 void handler::extractArgsAndReqsFromLambda(
512  char *LambdaPtr, size_t KernelArgsNum,
513  const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
514  const bool IsKernelCreatedFromSource = false;
515  size_t IndexShift = 0;
516  MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum);
517 
518  for (size_t I = 0; I < KernelArgsNum; ++I) {
519  void *Ptr = LambdaPtr + KernelArgs[I].offset;
520  const detail::kernel_param_kind_t &Kind = KernelArgs[I].kind;
521  const int &Size = KernelArgs[I].info;
523  // For args kind of accessor Size is information about accessor.
524  // The first 11 bits of Size encodes the accessor target.
525  const access::target AccTarget =
526  static_cast<access::target>(Size & 0x7ff);
527  if ((AccTarget == access::target::device ||
528  AccTarget == access::target::constant_buffer) ||
529  (AccTarget == access::target::image ||
530  AccTarget == access::target::image_array)) {
531  detail::AccessorBaseHost *AccBase =
532  static_cast<detail::AccessorBaseHost *>(Ptr);
533  Ptr = detail::getSyclObjImpl(*AccBase).get();
534  } else if (AccTarget == access::target::local) {
535  detail::LocalAccessorBaseHost *LocalAccBase =
536  static_cast<detail::LocalAccessorBaseHost *>(Ptr);
537  Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
538  }
539  }
540  processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
541  IsESIMD);
542  }
543 }
544 
545 // Calling methods of kernel_impl requires knowledge of class layout.
546 // As this is impossible in header, there's a function that calls necessary
547 // method inside the library and returns the result.
548 std::string handler::getKernelName() {
549  return MKernel->get_info<info::kernel::function_name>();
550 }
551 
552 void handler::verifyUsedKernelBundle(const std::string &KernelName) {
553  auto UsedKernelBundleImplPtr =
554  getOrInsertHandlerKernelBundle(/*Insert=*/false);
555  if (!UsedKernelBundleImplPtr)
556  return;
557 
558  kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
560  if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
561  throw sycl::exception(
563  "The kernel bundle in use does not contain the kernel");
564 }
565 
566 void handler::ext_oneapi_barrier(const std::vector<event> &WaitList) {
567  throwIfActionIsCreated();
568  MCGType = detail::CG::BarrierWaitlist;
569  MEventsWaitWithBarrier.resize(WaitList.size());
570  std::transform(
571  WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
572  [](const event &Event) { return detail::getSyclObjImpl(Event); });
573 }
574 
575 __SYCL2020_DEPRECATED("use 'ext_oneapi_barrier' instead")
576 void handler::barrier(const std::vector<event> &WaitList) {
577  handler::ext_oneapi_barrier(WaitList);
578 }
579 
580 using namespace sycl::detail;
581 bool handler::DisableRangeRounding() {
583 }
584 
585 bool handler::RangeRoundingTrace() {
587 }
588 
589 void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
590  size_t &MinRange) {
592  MinFactor, GoodFactor, MinRange);
593 }
594 
595 void handler::memcpy(void *Dest, const void *Src, size_t Count) {
596  throwIfActionIsCreated();
597  MSrcPtr = const_cast<void *>(Src);
598  MDstPtr = Dest;
599  MLength = Count;
600  setType(detail::CG::CopyUSM);
601 }
602 
603 void handler::memset(void *Dest, int Value, size_t Count) {
604  throwIfActionIsCreated();
605  MDstPtr = Dest;
606  MPattern.push_back(static_cast<char>(Value));
607  MLength = Count;
608  setType(detail::CG::FillUSM);
609 }
610 
611 void handler::prefetch(const void *Ptr, size_t Count) {
612  throwIfActionIsCreated();
613  MDstPtr = const_cast<void *>(Ptr);
614  MLength = Count;
615  setType(detail::CG::PrefetchUSM);
616 }
617 
618 void handler::mem_advise(const void *Ptr, size_t Count, int Advice) {
619  throwIfActionIsCreated();
620  MDstPtr = const_cast<void *>(Ptr);
621  MLength = Count;
622  setType(detail::CG::AdviseUSM);
623 
624  assert(!MSharedPtrStorage.empty());
625 
626  std::lock_guard<std::mutex> Lock(
627  detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
628 
629  std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
630  detail::convertToExtendedMembers(MSharedPtrStorage[0]);
631 
632  detail::ExtendedMemberT EMember = {
634  std::make_shared<pi_mem_advice>(pi_mem_advice(Advice))};
635 
636  ExtendedMembersVec->push_back(EMember);
637 }
638 
640  const kernel_bundle<bundle_state::executable> &ExecBundle) {
641 
642  std::shared_ptr<detail::queue_impl> PrimaryQueue =
643  getHandlerImpl()->MSubmissionPrimaryQueue;
644  if (PrimaryQueue->get_context() != ExecBundle.get_context())
645  throw sycl::exception(
647  "Context associated with the primary queue is different from the "
648  "context associated with the kernel bundle");
649 
650  std::shared_ptr<detail::queue_impl> SecondaryQueue =
651  getHandlerImpl()->MSubmissionSecondaryQueue;
652  if (SecondaryQueue &&
653  SecondaryQueue->get_context() != ExecBundle.get_context())
654  throw sycl::exception(
656  "Context associated with the secondary queue is different from the "
657  "context associated with the kernel bundle");
658 
659  setStateExplicitKernelBundle();
660  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
661 }
662 
663 } // namespace sycl
664 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::detail::CG::CopyUSM
@ CopyUSM
Definition: cg.hpp:167
cl::sycl::detail::Scheduler::addCG
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, QueueImplPtr Queue)
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:71
cl::sycl::handler::mem_advise
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:618
cl::sycl::detail::AccessorImplHost::MDims
unsigned int MDims
Definition: accessor_impl.hpp:116
cl::sycl::kernel_bundle
The kernel_bundle class represents collection of device images in a particular state.
Definition: kernel.hpp:28
cl::sycl::detail::SYCLConfig::get
static const char * get()
Definition: config.hpp:108
cl::sycl::kernel_bundle::get_context
context get_context() const noexcept
Definition: kernel_bundle.hpp:211
cl::sycl::detail::CG::BarrierWaitlist
@ BarrierWaitlist
Definition: cg.hpp:163
cl::sycl::info::device
device
Definition: info_desc.hpp:49
cl::sycl::event
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:31
cl::sycl::detail::GlobalHandler::instance
static GlobalHandler & instance()
Definition: global_handler.cpp:33
PI_INVALID_OPERATION
@ PI_INVALID_OPERATION
Definition: pi.h:84
cl::sycl::build
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
Definition: kernel_bundle.hpp:668
cl::sycl::handler::memcpy
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, both pointed by USM pointers.
Definition: handler.cpp:595
cl::sycl::detail::ExtendedMemberT
Definition: cg.hpp:107
config.hpp
cl::sycl::detail::createSyclObjFromImpl
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: common.hpp:199
cl::sycl::detail::pi::PI_TRACE_ALL
@ PI_TRACE_ALL
Definition: pi.hpp:58
cl::sycl::access::target::local
@ local
cl::sycl::access::target::host_image
@ host_image
cl::sycl::bundle_state::object
@ object
cl::sycl::detail::kernel_param_kind_t::kind_sampler
@ kind_sampler
cl::sycl::detail::ExtendedMembersType::HANDLER_IMPL
@ HANDLER_IMPL
cl::sycl::addArgsForGlobalAccessor
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
Definition: handler.cpp:307
cl::sycl::detail::SYCLConfig< SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS >::GetSettings
static void GetSettings(size_t &MinFactor, size_t &GoodFactor, size_t &MinRange)
Definition: config.hpp:207
event.hpp
cl::sycl::access::target::image_array
@ image_array
cl::sycl::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:121
cl::sycl::detail::CG::AdviseUSM
@ AdviseUSM
Definition: cg.hpp:172
helpers.hpp
cl::sycl::detail::kernel_param_kind_t::kind_pointer
@ kind_pointer
queue_impl.hpp
cl::sycl::bundle_state::input
@ input
cl::sycl::MaxNumAdditionalArgs
constexpr size_t MaxNumAdditionalArgs
Definition: handler.cpp:477
scheduler.hpp
cl::sycl::detail::Requirement
AccessorImplHost Requirement
Definition: accessor_impl.hpp:208
handler_impl.hpp
cl::sycl::detail::CG::CodeplayHostTask
@ CodeplayHostTask
Definition: cg.hpp:171
cl::sycl::detail::CG::FillUSM
@ FillUSM
Definition: cg.hpp:168
cl::sycl::detail::CG::PrefetchUSM
@ PrefetchUSM
Definition: cg.hpp:169
cl::sycl::detail::AccessorImplHost
Definition: accessor_impl.hpp:76
cl::sycl::detail::AccessorImplHost::PerWI
bool PerWI
Definition: accessor_impl.hpp:125
cl::sycl::detail::getCGTypeVersion
constexpr unsigned char getCGTypeVersion(unsigned int Type)
Definition: cg.hpp:139
cl::sycl::handler::memset
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:603
cl::sycl::access::target::host_buffer
@ host_buffer
cl::sycl::detail::Scheduler::getInstance
static Scheduler & getInstance()
Definition: scheduler.cpp:258
cl::sycl::handler::prefetch
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:611
cl::sycl::detail::AccessorImplHost::MOffset
id< 3 > MOffset
Definition: accessor_impl.hpp:107
cl::sycl::detail::CG::CG_VERSION::V0
@ V0
cl::sycl::detail::ArgDesc
Definition: cg_types.hpp:27
cl::sycl::handler::use_kernel_bundle
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
Definition: handler.cpp:639
cl::sycl::detail::AccessorImplPtr
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor_impl.hpp:132
cl::sycl::detail::CG::CopyAccToAcc
@ CopyAccToAcc
Definition: cg.hpp:161
cl::sycl::handler::ext_oneapi_barrier
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2399
cl::sycl::detail::CG::Barrier
@ Barrier
Definition: cg.hpp:162
cl::sycl::detail::EventImplPtr
std::shared_ptr< detail::event_impl > EventImplPtr
Definition: memory_manager.hpp:30
cl::sycl::detail::kernel_param_kind_t::kind_std_layout
@ kind_std_layout
cl::sycl::access::target
target
Definition: access.hpp:17
cl::sycl::bundle_state::executable
@ executable
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::detail::kernel_param_kind_t::kind_specialization_constants_buffer
@ kind_specialization_constants_buffer
global_handler.hpp
cl::sycl::detail::__SYCL2020_DEPRECATED
struct __SYCL2020_DEPRECATED("This type isn't device copyable in SYCL 2020") IsDeprecatedDeviceCopyable< T
cl::sycl::detail::kernel_param_kind_t::kind_stream
@ kind_stream
pi_mem_advice
_pi_mem_advice pi_mem_advice
Definition: pi.h:596
cl::sycl::handler
Command group handler class.
Definition: handler.hpp:361
cl::sycl::detail::CG::None
@ None
Definition: cg.hpp:157
cl::sycl::detail::kernel_param_kind_t::kind_accessor
@ kind_accessor
cl::sycl::access::target::constant_buffer
@ constant_buffer
PI_INVALID_VALUE
@ PI_INVALID_VALUE
Definition: pi.h:87
cl::sycl::access::target::image
@ image
cl::sycl::detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE
@ HANDLER_KERNEL_BUNDLE
cl::sycl::detail::CG::UpdateHost
@ UpdateHost
Definition: cg.hpp:165
cl::sycl::detail::CG::RunOnHostIntel
@ RunOnHostIntel
Definition: cg.hpp:166
cl::sycl::detail::getSyclObjImpl
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:182
handler.hpp
cl::sycl::detail::CG::CopyPtrToAcc
@ CopyPtrToAcc
Definition: cg.hpp:160
cl::sycl::detail::AccessorImplHost::resize
void resize(size_t GlobalSize)
Definition: accessor_impl.cpp:26
cl::sycl::errc::kernel_not_supported
@ kernel_not_supported
std
Definition: accessor.hpp:2358
kernel_desc.hpp
cl::sycl::detail::AccessorImplHost::MMemoryRange
range< 3 > MMemoryRange
Definition: accessor_impl.hpp:111
cl::sycl::access::target::device
@ device
cl::sycl::detail::kernel_param_kind_t::kind_invalid
@ kind_invalid
cl::sycl::detail::CG::CopyAccToPtr
@ CopyAccToPtr
Definition: cg.hpp:159
cl::sycl::detail::ExtendedMembersType::HANDLER_MEM_ADVICE
@ HANDLER_MEM_ADVICE
cl::sycl::detail::getDeviceFromHandler
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:15
cl::sycl::detail::convertToExtendedMembers
static std::shared_ptr< std::vector< ExtendedMemberT > > convertToExtendedMembers(const std::shared_ptr< const void > &SPtr)
Definition: cg.hpp:113
cl::sycl::errc::invalid
@ invalid
kernel_bundle_impl.hpp
cl::sycl::detail::kernel_param_kind_t
kernel_param_kind_t
Definition: kernel_desc.hpp:25
cl::sycl::detail::CG::Kernel
@ Kernel
Definition: cg.hpp:158
cl::sycl::detail::CG::CodeplayInteropTask
@ CodeplayInteropTask
Definition: cg.hpp:170
stream.hpp
cl::sycl::detail::CG::Fill
@ Fill
Definition: cg.hpp:164
info_desc.hpp
common.hpp
cl::sycl::info::kernel::function_name
@ function_name
cl::sycl::exception
Definition: exception.hpp:63
kernel_impl.hpp
cl::sycl::detail::AccessorImplHost::MAccessRange
range< 3 > MAccessRange
Definition: accessor_impl.hpp:109
cl::sycl::detail::pi::trace
bool trace(TraceLevel level)
Definition: pi.cpp:354
cl::sycl::detail::get_kernel_id_impl
kernel_id get_kernel_id_impl(std::string KernelName)
Definition: kernel_bundle.cpp:116
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12