28 inline namespace _V1 {
33 : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(1, Device),
38 MSupportBufferLocationByDevices(NotChecked) {
45 : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(Devices),
46 MContext(nullptr), MPlatform(), MPropList(PropList), MHostContext(false),
47 MSupportBufferLocationByDevices(NotChecked) {
49 std::vector<sycl::detail::pi::PiDevice> DeviceIds;
50 for (
const auto &D : MDevices) {
51 if (D.has(aspect::ext_oneapi_is_composite)) {
57 std::vector<device> ComponentDevices = D.get_info<
58 ext::oneapi::experimental::info::device::component_devices>();
59 for (
const auto &CD : ComponentDevices)
67 const bool UseCUDAPrimaryContext = MPropList.
has_property<
68 ext::oneapi::cuda::property::context::use_primary_context>();
75 Props, DeviceIds.size(), DeviceIds.data(),
nullptr,
nullptr, &MContext);
78 DeviceIds.data(),
nullptr,
87 const std::vector<sycl::device> &DeviceList,
89 : MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler),
90 MDevices(DeviceList), MContext(
PiContext), MPlatform(),
91 MHostContext(false), MSupportBufferLocationByDevices(NotChecked) {
92 if (!MDevices.empty()) {
95 std::vector<sycl::detail::pi::PiDevice> DeviceIds;
96 uint32_t DevicesNum = 0;
101 DeviceIds.resize(DevicesNum);
108 if (!DeviceIds.empty()) {
109 std::shared_ptr<detail::platform_impl> Platform =
112 MDevices.emplace_back(createSyclObjFromImpl<device>(
113 Platform->getOrMakeDeviceImpl(Dev, Platform)));
115 MPlatform = Platform;
117 throw invalid_parameter_error(
118 "No devices in the provided device list and native context.",
119 PI_ERROR_INVALID_VALUE);
136 throw invalid_object_error(
137 "This instance of context doesn't support OpenCL interoperability.",
138 PI_ERROR_INVALID_CONTEXT);
142 return pi::cast<cl_context>(MContext);
149 for (
auto &DeviceGlobalInitializer : MDeviceGlobalInitializers)
150 DeviceGlobalInitializer.second.ClearEvents(
getPlugin());
152 for (
const void *DeviceGlobal : MAssociatedDeviceGlobals) {
158 for (
auto LibProg : MCachedLibPrograms) {
159 assert(LibProg.second &&
"Null program must not be kept in the cache");
169 return MAsyncHandler;
173 uint32_t context_impl::get_info<info::context::reference_count>()
const {
176 return get_context_info<info::context::reference_count>(this->getHandleRef(),
179 template <>
platform context_impl::get_info<info::context::platform>()
const {
181 return createSyclObjFromImpl<platform>(
183 return createSyclObjFromImpl<platform>(MPlatform);
186 std::vector<sycl::device>
187 context_impl::get_info<info::context::devices>()
const {
191 std::vector<sycl::memory_order>
192 context_impl::get_info<info::context::atomic_memory_order_capabilities>()
194 std::vector<sycl::memory_order> CapabilityList{
195 sycl::memory_order::relaxed, sycl::memory_order::acquire,
197 sycl::memory_order::seq_cst};
199 return CapabilityList;
203 MDevices, CapabilityList);
205 return CapabilityList;
208 std::vector<sycl::memory_scope>
209 context_impl::get_info<info::context::atomic_memory_scope_capabilities>()
211 std::vector<sycl::memory_scope> CapabilityList{
214 sycl::memory_scope::system};
216 return CapabilityList;
220 MDevices, CapabilityList);
222 return CapabilityList;
225 std::vector<sycl::memory_order>
226 context_impl::get_info<info::context::atomic_fence_order_capabilities>()
const {
227 std::vector<sycl::memory_order> CapabilityList{
228 sycl::memory_order::relaxed, sycl::memory_order::acquire,
230 sycl::memory_order::seq_cst};
232 return CapabilityList;
235 info::device::atomic_fence_order_capabilities>(
236 MDevices, CapabilityList);
238 return CapabilityList;
241 std::vector<sycl::memory_scope>
242 context_impl::get_info<info::context::atomic_fence_scope_capabilities>()
const {
243 std::vector<sycl::memory_scope> CapabilityList{
246 sycl::memory_scope::system};
248 return CapabilityList;
251 info::device::atomic_fence_scope_capabilities>(
252 MDevices, CapabilityList);
254 return CapabilityList;
258 typename info::platform::version::return_type
259 context_impl::get_backend_info<info::platform::version>()
const {
262 "the info::platform::version info descriptor can "
263 "only be queried with an OpenCL backend");
265 return MDevices[0].get_platform().get_info<info::platform::version>();
269 std::vector<device> &Devices);
272 typename info::device::version::return_type
273 context_impl::get_backend_info<info::device::version>()
const {
276 "the info::device::version info descriptor can only "
277 "be queried with an OpenCL backend");
279 auto Devices = get_info<info::context::devices>();
280 if (Devices.empty()) {
281 return "No available device";
289 typename info::device::backend_version::return_type
290 context_impl::get_backend_info<info::device::backend_version>()
const {
293 "the info::device::backend_version info descriptor "
294 "can only be queried with a Level Zero backend");
308 return MKernelProgramCache;
312 std::shared_ptr<detail::device_impl> Device)
const {
313 for (
auto D : MDevices)
338 if (MSupportBufferLocationByDevices !=
NotChecked)
339 return MSupportBufferLocationByDevices ==
Supported ? true :
false;
341 MSupportBufferLocationByDevices =
Supported;
342 for (
auto &Device : MDevices) {
343 if (!Device.has_extension(
"cl_intel_mem_alloc_buffer_location")) {
348 return MSupportBufferLocationByDevices ==
Supported ? true :
false;
352 std::lock_guard<std::mutex> Lock{MAssociatedDeviceGlobalsMutex};
353 MAssociatedDeviceGlobals.insert(DeviceGlobalPtr);
359 std::lock_guard<std::mutex> Lock(MDeviceGlobalInitializersMutex);
360 for (
const device &Dev : Devs) {
362 MDeviceGlobalInitializers.emplace(Key, BinImage);
367 pi::PiProgram NativePrg,
const std::shared_ptr<queue_impl> &QueueImpl) {
369 const DeviceImplPtr &DeviceImpl = QueueImpl->getDeviceImplPtr();
370 std::lock_guard<std::mutex> NativeProgramLock(MDeviceGlobalInitializersMutex);
371 auto ImgIt = MDeviceGlobalInitializers.find(
372 std::make_pair(NativePrg, DeviceImpl->getHandleRef()));
373 if (ImgIt == MDeviceGlobalInitializers.end() ||
374 ImgIt->second.MDeviceGlobalsFullyInitialized)
377 DeviceGlobalInitializer &InitRef = ImgIt->second;
379 std::lock_guard<std::mutex> InitLock(InitRef.MDeviceGlobalInitMutex);
380 std::vector<sycl::detail::pi::PiEvent> &InitEventsRef =
381 InitRef.MDeviceGlobalInitEvents;
382 if (!InitEventsRef.empty()) {
384 auto NewEnd = std::remove_if(
385 InitEventsRef.begin(), InitEventsRef.end(),
387 return get_event_info<info::event::command_execution_status>(
388 Event, Plugin) == info::event_command_status::complete;
391 for (
auto EventIt = NewEnd; EventIt != InitEventsRef.end(); ++EventIt)
394 InitEventsRef.erase(NewEnd, InitEventsRef.end());
396 if (InitEventsRef.empty())
397 InitRef.MDeviceGlobalsFullyInitialized =
true;
398 return InitEventsRef;
399 }
else if (InitRef.MDeviceGlobalsFullyInitialized) {
407 auto DeviceGlobals = InitRef.MBinImage->getDeviceGlobals();
408 std::vector<std::string> DeviceGlobalIds;
409 DeviceGlobalIds.reserve(DeviceGlobals.size());
411 DeviceGlobalIds.push_back(DeviceGlobal->Name);
412 std::vector<DeviceGlobalMapEntry *> DeviceGlobalEntries =
419 if (DeviceGlobalEntries.empty()) {
420 InitRef.MDeviceGlobalsFullyInitialized =
true;
427 InitEventsRef.reserve(DeviceGlobalEntries.size());
435 DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(QueueImpl);
443 InitEventsRef.push_back(ZIEvent.TransferOwnership());
448 void *
const &USMPtr = DeviceGlobalUSM.
getPtr();
450 QueueImpl->getHandleRef(), NativePrg,
451 DeviceGlobalEntry->MUniqueId.c_str(),
false,
sizeof(
void *), 0,
452 &USMPtr, 0,
nullptr, &InitEvent);
454 InitEventsRef.push_back(InitEvent);
456 return InitEventsRef;
460 void context_impl::DeviceGlobalInitializer::ClearEvents(
464 MDeviceGlobalInitEvents.clear();
468 const std::shared_ptr<device_impl> &DeviceImpl,
const void *DeviceGlobalPtr,
469 const void *Src,
size_t DeviceGlobalTSize,
bool IsDeviceImageScoped,
470 size_t NumBytes,
size_t Offset) {
471 std::optional<sycl::detail::pi::PiDevice> KeyDevice = std::nullopt;
472 if (IsDeviceImageScoped)
473 KeyDevice = DeviceImpl->getHandleRef();
474 auto Key = std::make_pair(DeviceGlobalPtr, KeyDevice);
476 std::lock_guard<std::mutex> InitLock(MDeviceGlobalUnregisteredDataMutex);
478 auto UnregisteredDataIt = MDeviceGlobalUnregisteredData.find(Key);
479 if (UnregisteredDataIt == MDeviceGlobalUnregisteredData.end()) {
480 std::unique_ptr<std::byte[]> NewData =
481 std::make_unique<std::byte[]>(DeviceGlobalTSize);
483 MDeviceGlobalUnregisteredData.insert({Key, std::move(NewData)}).first;
485 std::byte *ValuePtr = UnregisteredDataIt->second.get();
486 std::memcpy(ValuePtr + Offset, Src, NumBytes);
490 const std::shared_ptr<device_impl> &DeviceImpl,
void *Dest,
491 const void *DeviceGlobalPtr,
bool IsDeviceImageScoped,
size_t NumBytes,
494 std::optional<sycl::detail::pi::PiDevice> KeyDevice = std::nullopt;
495 if (IsDeviceImageScoped)
496 KeyDevice = DeviceImpl->getHandleRef();
497 auto Key = std::make_pair(DeviceGlobalPtr, KeyDevice);
499 std::lock_guard<std::mutex> InitLock(MDeviceGlobalUnregisteredDataMutex);
501 auto UnregisteredDataIt = MDeviceGlobalUnregisteredData.find(Key);
502 if (UnregisteredDataIt == MDeviceGlobalUnregisteredData.end()) {
505 char *FillableDest =
reinterpret_cast<char *
>(Dest);
506 std::fill(FillableDest, FillableDest + NumBytes, 0);
509 std::byte *ValuePtr = UnregisteredDataIt->second.get();
510 std::memcpy(Dest, ValuePtr + Offset, NumBytes);
514 const device &Device,
const std::set<std::uintptr_t> &ImgIdentifiers,
515 const std::string &ObjectTypeName) {
520 auto &KeyMap = LockedCache.get().KeyMap;
521 auto &Cache = LockedCache.get().Cache;
524 for (std::uintptr_t ImageIDs : ImgIdentifiers) {
525 auto OuterKey = std::make_pair(ImageIDs, DevHandle);
526 size_t NProgs = KeyMap.count(OuterKey);
532 if (NProgs > 1 || (BuildRes && NProgs == 1))
534 "More than one image exists with the " +
535 ObjectTypeName +
".");
537 auto KeyMappingsIt = KeyMap.find(OuterKey);
538 assert(KeyMappingsIt != KeyMap.end());
539 auto CachedProgIt = Cache.find(KeyMappingsIt->second);
540 assert(CachedProgIt != Cache.end());
541 BuildRes = CachedProgIt->second;
547 BuildState NewState = BuildRes->waitUntilTransition();
548 if (NewState == BuildState::BS_Failed)
549 throw compile_program_error(BuildRes->Error.Msg, BuildRes->Error.Code);
551 assert(NewState == BuildState::BS_Done);
552 return BuildRes->Val;
555 std::optional<sycl::detail::pi::PiProgram>
562 std::optional<sycl::detail::pi::PiProgram>
566 std::set<std::uintptr_t> ImgIdentifiers;
std::shared_ptr< ProgramBuildResult > ProgramBuildResultPtr
Locked< ProgramCache > acquireCachedPrograms()
BuildState
Denotes the state of a build.
void setContextPtr(const ContextPtr &AContext)
static ProgramManager & getInstance()
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
std::vector< DeviceGlobalMapEntry * > getDeviceGlobalEntries(const std::vector< std::string > &UniqueIds, bool ExcludeDeviceImageScopeDecorated=false)
std::uintptr_t getImageID() const
bool hasDevice(std::shared_ptr< detail::device_impl > Device) const
Returns true if and only if context contains the given device.
context_impl(const device &Device, async_handler AsyncHandler, const property_list &PropList)
Constructs a context_impl using a single SYCL devices.
KernelProgramCache & getKernelProgramCache() const
void addDeviceGlobalInitializer(sycl::detail::pi::PiProgram Program, const std::vector< device > &Devs, const RTDeviceBinaryImage *BinImage)
Adds a device global initializer.
pi_native_handle getNative() const
Gets the native handle of the SYCL context.
bool isBufferLocationSupported() const
void addAssociatedDeviceGlobal(const void *DeviceGlobalPtr)
Adds an associated device global to the tracked associates.
std::optional< sycl::detail::pi::PiProgram > getProgramForDeviceGlobal(const device &Device, DeviceGlobalMapEntry *DeviceGlobalEntry)
Gets a program associated with a device global from the cache.
std::optional< sycl::detail::pi::PiProgram > getProgramForDevImgs(const device &Device, const std::set< std::uintptr_t > &ImgIdentifiers, const std::string &ObjectTypeName)
Gets a program associated with Dev / Images pairs.
void memcpyFromHostOnlyDeviceGlobal(const std::shared_ptr< device_impl > &DeviceImpl, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScoped, size_t NumBytes, size_t Offset)
void memcpyToHostOnlyDeviceGlobal(const std::shared_ptr< device_impl > &DeviceImpl, const void *DeviceGlobalPtr, const void *Src, size_t DeviceGlobalTSize, bool IsDeviceImageScoped, size_t NumBytes, size_t Offset)
backend getBackend() const
std::vector< sycl::detail::pi::PiEvent > initializeDeviceGlobals(pi::PiProgram NativePrg, const std::shared_ptr< queue_impl > &QueueImpl)
Initializes device globals for a program on the associated queue.
std::optional< sycl::detail::pi::PiProgram > getProgramForHostPipe(const device &Device, HostPipeMapEntry *HostPipeEntry)
Gets a program associated with a HostPipe Entry from the cache.
const PluginPtr & getPlugin() const
bool is_host() const
Checks if this context is a host context.
cl_context get() const
Gets OpenCL interoperability context handle.
const async_handler & get_async_handler() const
Gets asynchronous exception handler.
DeviceImplPtr findMatchingDeviceImpl(sycl::detail::pi::PiDevice &DevicePI) const
Given a PiDevice, returns the matching shared_ptr<device_impl> within this context.
sycl::detail::pi::PiContext & getHandleRef()
Gets the underlying context object (if any) without reference count modification.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
detail::is_device_info_desc< Param >::return_type get_info() const
Queries this SYCL device for information requested by the template parameter param.
Objects of the property_list class are containers for the SYCL properties.
bool has_property() const noexcept
#define __SYCL_PI_CONTEXT_PROPERTIES_CUDA_PRIMARY
class __SYCL2020_DEPRECATED("Host device is no longer supported.") host_selector int default_selector_v(const device &dev)
Selects SYCL host device.
static const PluginPtr & getPlugin(backend Backend)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
std::function< int(const sycl::device &)> DSelectorInvocableType
device select_device(const DSelectorInvocableType &DeviceSelectorInvocable)
void GetCapabilitiesIntersectionSet(const std::vector< sycl::device > &Devices, std::vector< T > &CapabilityList)
std::shared_ptr< plugin > PluginPtr
std::shared_ptr< device_impl > DeviceImplPtr
std::function< void(sycl::exception_list)> async_handler
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static sycl::event fill(sycl::queue q, void *dev_ptr, const T &pattern, size_t count)
Set pattern to the first count elements of type T starting from dev_ptr.
uintptr_t pi_native_handle
pi_result piProgramRelease(pi_program program)
pi_result piextEnqueueDeviceGlobalVariableWrite(pi_queue queue, pi_program program, const char *name, pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Device global variable.
pi_result piContextRetain(pi_context context)
intptr_t pi_context_properties
pi_result piContextCreate(const pi_context_properties *properties, pi_uint32 num_devices, const pi_device *devices, void(*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data), void *user_data, pi_context *ret_context)
pi_result piextContextGetNativeHandle(pi_context context, pi_native_handle *nativeHandle)
Gets the native handle of a PI context object.
@ PI_CONTEXT_INFO_NUM_DEVICES
@ PI_CONTEXT_INFO_DEVICES
pi_result piEventRelease(pi_event event)
pi_result piContextRelease(pi_context context)
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
C++ wrapper of extern "C" PI interfaces.
std::set< std::uintptr_t > MImageIdentifiers
void removeAssociatedResources(const context_impl *CtxImpl)
OwnedPiEvent getInitEvent(const PluginPtr &Plugin)
void *const & getPtr() const noexcept
RTDeviceBinaryImage * getDevBinImage()