18 #ifndef PI_LEVEL_ZERO_HPP
19 #define PI_LEVEL_ZERO_HPP
23 #define _PI_LEVEL_ZERO_PLUGIN_VERSION 1
25 #define _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING \
26 _PI_PLUGIN_VERSION_STRING(_PI_LEVEL_ZERO_PLUGIN_VERSION)
38 #include <shared_mutex>
40 #include <unordered_map>
41 #include <unordered_set>
44 #include <level_zero/ze_api.h>
45 #include <level_zero/zes_api.h>
49 template <
class To,
class From> To
pi_cast(From Value) {
51 assert(
sizeof(From) ==
sizeof(To));
55 template <> uint32_t
pi_cast(uint64_t Value) {
57 uint32_t CastedValue = (uint32_t)(Value);
58 assert((uint64_t)CastedValue == Value);
64 [[noreturn]]
void die(
const char *Message) {
65 std::cerr <<
"die: " << Message << std::endl;
75 return ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
78 return ZE_STRUCTURE_TYPE_FENCE_DESC;
81 return ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
84 return ZE_STRUCTURE_TYPE_CONTEXT_DESC;
89 return ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC;
92 return ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
96 return ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
99 return ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
102 return ZE_STRUCTURE_TYPE_IMAGE_DESC;
105 return ZE_STRUCTURE_TYPE_MODULE_DESC;
109 return ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC;
112 return ZE_STRUCTURE_TYPE_KERNEL_DESC;
115 return ZE_STRUCTURE_TYPE_EVENT_DESC;
118 return ZE_STRUCTURE_TYPE_SAMPLER_DESC;
121 return ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES;
124 return ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
128 return ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES;
132 return ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES;
136 return ZE_STRUCTURE_TYPE_DEVICE_IMAGE_PROPERTIES;
140 return ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES;
144 return ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES;
148 return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
152 return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES;
155 return ZE_STRUCTURE_TYPE_MODULE_PROPERTIES;
158 return ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES;
162 return ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES;
166 return ZES_STRUCTURE_TYPE_PCI_PROPERTIES;
173 this->stype = getZeStructureType<T>();
174 this->pNext =
nullptr;
179 this->stype = getZesStructureType<T>();
180 this->pNext =
nullptr;
188 const char *Ret = std::getenv(
"SYCL_PI_LEVEL_ZERO_SINGLE_THREAD_MODE");
189 const bool RetVal = Ret ? std::stoi(Ret) : 0;
200 std::shared_mutex::lock();
207 std::shared_mutex::unlock();
212 std::shared_mutex::lock_shared();
219 std::shared_mutex::unlock_shared();
235 std::mutex::unlock();
304 std::atomic<pi_uint32> RefCount;
403 void *
allocate(
size_t Size)
override final;
404 void *
allocate(
size_t Size,
size_t Alignment)
override final;
405 void deallocate(
void *Ptr,
bool OwnZeMemHandle)
override final;
459 : ZeDevice{
Device}, Platform{Plt}, RootDevice{ParentDevice},
460 ZeDeviceProperties{}, ZeDeviceComputeProperties{} {
478 int32_t ZeOrdinal{-1};
490 std::vector<queue_group_info_t> QueueGroup =
491 std::vector<queue_group_info_t>(queue_group_info_t::Size);
495 return QueueGroup[queue_group_info_t::MainCopy].ZeOrdinal >= 0;
500 return QueueGroup[queue_group_info_t::LinkCopy].ZeOrdinal >= 0;
505 return hasMainCopyEngine() || hasLinkCopyEngine();
513 int SubSubDeviceIndex = -1);
557 ze_fence_handle_t ZeFence{
nullptr};
563 bool ZeFenceInUse{
false};
568 uint32_t ZeQueueGroupOrdinal{0};
577 std::vector<pi_event> EventList{};
578 size_t size()
const {
return EventList.size(); }
583 typedef std::unordered_map<ze_command_list_handle_t, pi_command_list_info_t>
590 const pi_device *Devs,
bool OwnZeContext)
591 : ZeContext{ZeContext},
592 OwnZeContext{OwnZeContext}, Devices{Devs, Devs + NumDevices},
593 SingleRootDevice(getRootDevice()), ZeCommandListInit{
nullptr} {
598 for (uint32_t I = 0; I < NumDevices; I++) {
600 SharedMemAllocContexts.emplace(
604 SharedReadOnlyMemAllocContexts.emplace(
608 DeviceMemAllocContexts.emplace(
616 HostMemAllocContext = std::make_unique<USMAllocContext>(
620 if (SingleRootDevice && DeviceMemAllocContexts.find(SingleRootDevice) ==
621 DeviceMemAllocContexts.end()) {
622 SharedMemAllocContexts.emplace(
626 DeviceMemAllocContexts.emplace(
689 std::unordered_map<ze_device_handle_t, std::list<ze_command_list_handle_t>>
691 std::unordered_map<ze_device_handle_t, std::list<ze_command_list_handle_t>>
714 bool AllowBatching =
false);
720 pi_result getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &,
size_t &,
722 bool ProfilingEnabled);
776 std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
777 auto getZeEventPoolCache(
bool HostVisible,
bool WithProfiling) {
779 return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
781 return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
786 std::unordered_map<ze_event_pool_handle_t, pi_uint32>
787 NumEventsAvailableInEventPool;
792 std::unordered_map<ze_event_pool_handle_t, pi_uint32>
793 NumEventsUnreleasedInEventPool;
801 _pi_queue(std::vector<ze_command_queue_handle_t> &ComputeQueues,
802 std::vector<ze_command_queue_handle_t> &CopyQueues,
817 : Queue(Queue), Type(Type) {}
821 bool isCopy()
const {
return Type != queue_type::Compute; }
833 uint32_t getQueueIndex(uint32_t *QueueGroupOrdinal, uint32_t *QueueIndex);
844 uint32_t UpperIndex{0};
845 uint32_t LowerIndex{0};
846 uint32_t NextIndex{0};
860 return UseCopyEngine ? CopyQueueGroup : ComputeQueueGroup;
866 bool useCopyEngine(
bool PreferCopyEngine =
true)
const;
893 void CaptureIndirectAccesses();
931 bool isBatchingAllowed(
bool IsCopy)
const;
937 bool isInOrderQueue()
const;
943 void adjustBatchSizeForFullBatch(
bool IsCopy);
949 void adjustBatchSizeForPartialBatch(
bool IsCopy);
958 std::vector<_pi_event *> &EventListToCleanup);
965 auto CommandBatch = (IsCopy) ? CopyCommandBatch : ComputeCommandBatch;
980 bool IsBlocking =
false,
981 bool OKToBatchCommand =
false);
988 pi_result executeOpenCommandList(
bool IsCopy);
998 if (
auto Res = executeOpenCommandList(IsCopy{
false}))
1000 if (
auto Res = executeOpenCommandList(IsCopy{
true}))
1031 virtual pi_result getZeHandle(
char *&ZeHandle, access_mode_t,
1035 virtual pi_result getZeHandlePtr(
char **&ZeHandlePtr, access_mode_t,
1039 virtual bool isImage()
const = 0;
1053 bool ImportedHostPtr =
false)
1063 ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
1067 MapHostPtr = HostPtr;
1070 if (ImportedHostPtr) {
1071 Allocations[
nullptr].ZeHandle = HostPtr;
1072 Allocations[
nullptr].Valid =
true;
1082 Allocations[LastDeviceWithValidAllocation].Valid =
true;
1087 :
_pi_mem(Parent->
Context), Size(Size), SubBuffer{Parent, Origin} {}
1091 char *ZeMemHandle,
bool OwnZeMemHandle)
1095 Allocations[
Device].ZeHandle = ZeMemHandle;
1096 Allocations[
Device].Valid =
true;
1097 Allocations[
Device].ReleaseAction =
1098 OwnZeMemHandle ? allocation_t::free_native : allocation_t::keep;
1105 ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) {
1107 MapHostPtr = ZeMemHandle;
1110 LastDeviceWithValidAllocation =
Device;
1120 virtual pi_result getZeHandle(
char *&ZeHandle, access_mode_t,
1122 virtual pi_result getZeHandlePtr(
char **&ZeHandlePtr, access_mode_t,
1136 char *ZeHandle{
nullptr};
1146 } ReleaseAction{
free};
1160 char *MapHostPtr{
nullptr};
1177 size_t getAlignment()
const;
1189 :
_pi_mem(Ctx), ZeImage{Image} {}
1193 ZeHandle = pi_cast<char *>(ZeImage);
1198 ZeHandlePtr = pi_cast<char **>(&ZeImage);
1243 pi_queue CurQueue,
bool UseCopyEngine);
1248 pi_result collectEventsForReleaseAndDestroyPiZeEventList(
1249 std::list<pi_event> &EventsToBeReleased);
1257 this->Length = other.
Length;
1265 : ZeEvent{ZeEvent}, OwnZeEvent{OwnZeEvent}, ZeEventPool{ZeEventPool},
1266 CommandType{CommandType},
Context{
Context}, CommandData{
nullptr} {}
1322 bool CleanedUp = {
false};
1328 bool Completed = {
false};
1363 ZeSpecConstants.numConstants = Program->
SpecConstants.size();
1364 ZeSpecContantsIds.reserve(ZeSpecConstants.numConstants);
1365 ZeSpecContantsValues.reserve(ZeSpecConstants.numConstants);
1368 ZeSpecContantsIds.push_back(SpecConstant.first);
1369 ZeSpecContantsValues.push_back(SpecConstant.second);
1371 ZeSpecConstants.pConstantIds = ZeSpecContantsIds.data();
1372 ZeSpecConstants.pConstantValues = ZeSpecContantsValues.data();
1375 const ze_module_constants_t *
ze() {
return &ZeSpecConstants; }
1378 std::vector<uint32_t> ZeSpecContantsIds;
1379 std::vector<const void *> ZeSpecContantsValues;
1380 ze_module_constants_t ZeSpecConstants;
1386 OwnZeModule{
true}, State{St}, Code{
new uint8_t[Length]},
1387 CodeLength{Length}, ZeModule{
nullptr}, ZeBuildLog{
nullptr} {
1393 ze_module_build_log_handle_t ZeBuildLog)
1394 :
Context{
Context}, OwnZeModule{
true}, State{St}, ZeModule{ZeModule},
1395 ZeBuildLog{ZeBuildLog} {}
1401 ZeModule{ZeModule}, ZeBuildLog{
nullptr} {}
1406 State{St}, ZeModule{
nullptr}, ZeBuildLog{
nullptr} {}
1446 : ZeKernel{Kernel}, OwnZeKernel{OwnZeKernel}, Program{Program},
1447 MemAllocs{}, SubmissionsCount{0} {}
1471 size_t operator()(
const std::pair<void *const, MemAllocRecord> *P)
const {
1472 return std::hash<void *>()(P->first);
1528 #endif // PI_LEVEL_ZERO_HPP