21 #include <hip/hip_runtime.h>
33 inline void getArrayDesc(hipArray *array, hipArray_Format &format,
35 #if defined(__HIP_PLATFORM_AMD__)
36 format = array->Format;
37 channels = array->NumChannels;
38 #elif defined(__HIP_PLATFORM_NVIDIA__)
39 CUDA_ARRAY_DESCRIPTOR arrayDesc;
40 cuArrayGetDescriptor(&arrayDesc, (CUarray)array);
42 format = arrayDesc.Format;
43 channels = arrayDesc.NumChannels;
45 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
52 #if defined(__HIP_PLATFORM_NVIDIA__) && !defined(__CUDACC__)
53 inline static hipError_t
54 hipArray3DCreate(hiparray *pHandle,
55 const HIP_ARRAY3D_DESCRIPTOR *pAllocateArray) {
56 return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray));
67 #if defined(__HIP_PLATFORM_NVIDIA__)
68 typedef CUarray hipCUarray;
69 #elif defined(__HIP_PLATFORM_AMD__)
70 typedef hipArray *hipCUarray;
72 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
76 #if defined(__HIP_PLATFORM_NVIDIA__)
77 #define hipMemoryType CUmemorytype
78 #define hipMemoryTypeHost CU_MEMORYTYPE_HOST
79 #define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE
80 #define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY
81 #define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED
84 std::string getHipVersionString() {
86 if (hipDriverGetVersion(&driver_version) != hipSuccess) {
90 std::stringstream stream;
100 case hipErrorInvalidContext:
101 return PI_ERROR_INVALID_CONTEXT;
102 case hipErrorInvalidDevice:
103 return PI_ERROR_INVALID_DEVICE;
104 case hipErrorInvalidValue:
105 return PI_ERROR_INVALID_VALUE;
106 case hipErrorOutOfMemory:
107 return PI_ERROR_OUT_OF_HOST_MEMORY;
108 case hipErrorLaunchOutOfResources:
109 return PI_ERROR_OUT_OF_RESOURCES;
111 return PI_ERROR_UNKNOWN;
129 pi_result hip_piPluginGetLastError(
char **message) {
137 template <
typename Func>
139 std::size_t num_events_in_wait_list, Func &&f) {
141 if (event_wait_list ==
nullptr || num_events_in_wait_list == 0) {
142 return PI_ERROR_INVALID_EVENT_WAIT_LIST;
146 if (num_events_in_wait_list == 1) {
147 return f(event_wait_list[0]);
150 std::vector<pi_event> events{event_wait_list,
151 event_wait_list + num_events_in_wait_list};
155 return e0->get_queue()->stream_ < e1->get_queue()->stream_ ||
156 (e0->get_queue()->stream_ == e1->get_queue()->stream_ &&
157 e0->get_event_id() > e1->get_event_id());
161 hipStream_t lastSeenStream = 0;
163 if (!
event || (!first &&
event->get_queue()->stream_ == lastSeenStream)) {
168 lastSeenStream =
event->get_queue()->stream_;
170 auto result = f(
event);
171 if (result != PI_SUCCESS) {
186 pi_result check_error(hipError_t result,
const char *
function,
int line,
188 if (result == hipSuccess) {
192 const char *errorString =
nullptr;
193 const char *errorName =
nullptr;
194 errorName = hipGetErrorName(result);
195 errorString = hipGetErrorString(result);
196 std::cerr <<
"\nPI HIP ERROR:"
197 <<
"\n\tValue: " << result
198 <<
"\n\tName: " << errorName
199 <<
"\n\tDescription: " << errorString
200 <<
"\n\tFunction: " <<
function
201 <<
"\n\tSource Location: " << file <<
":" <<
line <<
"\n"
204 if (std::getenv(
"PI_HIP_ABORT") !=
nullptr) {
208 throw map_error(result);
212 #define PI_CHECK_ERROR(result) check_error(result, __func__, __LINE__, __FILE__)
220 class ScopedContext {
226 ScopedContext(
pi_context ctxt) : placedContext_{ctxt}, needToRecover_{
false} {
228 if (!placedContext_) {
229 throw PI_ERROR_INVALID_CONTEXT;
232 hipCtx_t desired = placedContext_->
get();
233 PI_CHECK_ERROR(hipCtxGetCurrent(&original_));
234 if (original_ != desired) {
236 PI_CHECK_ERROR(hipCtxSetCurrent(desired));
237 if (original_ ==
nullptr) {
245 needToRecover_ =
true;
251 if (needToRecover_) {
252 PI_CHECK_ERROR(hipCtxSetCurrent(original_));
258 template <
typename T,
typename Assign>
259 pi_result getInfoImpl(
size_t param_value_size,
void *param_value,
260 size_t *param_value_size_ret,
T value,
size_t value_size,
261 Assign &&assign_func) {
263 if (param_value !=
nullptr) {
265 if (param_value_size < value_size) {
266 return PI_ERROR_INVALID_VALUE;
269 assign_func(param_value, value, value_size);
272 if (param_value_size_ret !=
nullptr) {
273 *param_value_size_ret = value_size;
279 template <
typename T>
280 pi_result getInfo(
size_t param_value_size,
void *param_value,
281 size_t *param_value_size_ret,
T value) {
283 auto assignment = [](
void *param_value,
T value,
size_t value_size) {
285 *
static_cast<T *
>(param_value) = value;
288 return getInfoImpl(param_value_size, param_value, param_value_size_ret, value,
289 sizeof(
T), std::move(assignment));
292 template <
typename T>
293 pi_result getInfoArray(
size_t array_length,
size_t param_value_size,
294 void *param_value,
size_t *param_value_size_ret,
297 auto assignment = [](
void *param_value,
T *value,
size_t value_size) {
298 memcpy(param_value,
static_cast<const void *
>(value), value_size);
301 return getInfoImpl(param_value_size, param_value, param_value_size_ret, value,
302 array_length *
sizeof(
T), std::move(assignment));
306 pi_result getInfo<const char *>(
size_t param_value_size,
void *param_value,
307 size_t *param_value_size_ret,
309 return getInfoArray(strlen(value) + 1, param_value_size, param_value,
310 param_value_size_ret, value);
316 hipDeviceGetAttribute(&value, attribute,
device->get()) == hipSuccess);
321 void simpleGuessLocalWorkSize(
size_t *threadsPerBlock,
322 const size_t *global_work_size,
323 const size_t maxThreadsPerBlock[3],
325 assert(threadsPerBlock !=
nullptr);
326 assert(global_work_size !=
nullptr);
327 assert(
kernel !=
nullptr);
336 threadsPerBlock[0] = std::min(maxThreadsPerBlock[0], global_work_size[0]);
340 while (0u != (global_work_size[0] % threadsPerBlock[0])) {
341 --threadsPerBlock[0];
357 [[noreturn]]
void die(
const char *Message) {
358 std::cerr <<
"pi_die: " << Message << std::endl;
364 std::cerr <<
"pi_print: " << Message << std::endl;
367 void assertion(
bool Condition,
const char *Message) {
399 : commandType_{type}, refCount_{1}, isCompleted_{
false}, isRecorded_{
false},
400 isStarted_{
false}, evEnd_{
nullptr}, evStart_{
nullptr}, evQueued_{
nullptr},
407 PI_CHECK_ERROR(hipEventCreateWithFlags(
408 &evEnd_, profilingEnabled ? hipEventDefault : hipEventDisableTiming));
410 if (profilingEnabled) {
411 PI_CHECK_ERROR(hipEventCreateWithFlags(&evQueued_, hipEventDefault));
412 PI_CHECK_ERROR(hipEventCreateWithFlags(&evStart_, hipEventDefault));
415 if (queue_ !=
nullptr) {
422 if (queue_ !=
nullptr) {
429 assert(!is_started());
435 PI_CHECK_ERROR(hipEventRecord(evQueued_, 0));
436 PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get()));
451 const hipError_t ret = hipEventQuery(evEnd_);
452 if (ret != hipSuccess && ret != hipErrorNotReady) {
456 if (ret == hipErrorNotReady) {
464 float miliSeconds = 0.0f;
465 assert(is_started());
467 PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, evStart_, evEnd_));
468 return static_cast<pi_uint64>(miliSeconds * 1.0e6);
472 float miliSeconds = 0.0f;
473 assert(is_started());
476 hipEventElapsedTime(&miliSeconds, context_->evBase_, evStart_));
477 return static_cast<pi_uint64>(miliSeconds * 1.0e6);
481 float miliSeconds = 0.0f;
482 assert(is_started() && is_recorded());
484 PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, context_->evBase_, evEnd_));
485 return static_cast<pi_uint64>(miliSeconds * 1.0e6);
490 if (is_recorded() || !is_started()) {
491 return PI_ERROR_INVALID_EVENT;
494 pi_result result = PI_ERROR_INVALID_OPERATION;
497 return PI_ERROR_INVALID_QUEUE;
500 hipStream_t hipStream = queue_->get();
503 eventId_ = queue_->get_next_event_id();
506 "Unrecoverable program state reached in event identifier overflow");
508 result = PI_CHECK_ERROR(hipEventRecord(evEnd_, hipStream));
513 if (result == PI_SUCCESS) {
523 retErr = PI_CHECK_ERROR(hipEventSynchronize(evEnd_));
533 assert(queue_ !=
nullptr);
534 PI_CHECK_ERROR(hipEventDestroy(evEnd_));
537 PI_CHECK_ERROR(hipEventDestroy(evQueued_));
538 PI_CHECK_ERROR(hipEventDestroy(evStart_));
549 return PI_CHECK_ERROR(hipStreamWaitEvent(
queue->get(), event->get(), 0));
553 : module_{
nullptr}, binary_{},
554 binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} {
562 "Re-setting program binary data which has already been set");
572 constexpr
const unsigned int numberOfOptions = 4u;
574 hipJitOption options[numberOfOptions];
575 void *optionVals[numberOfOptions];
578 options[0] = hipJitOptionInfoLogBuffer;
581 options[1] = hipJitOptionInfoLogBufferSizeBytes;
584 options[2] = hipJitOptionErrorLogBuffer;
587 options[3] = hipJitOptionErrorLogBufferSizeBytes;
590 auto result = PI_CHECK_ERROR(
592 numberOfOptions, options, optionVals));
594 const auto success = (result == PI_SUCCESS);
600 return success ? PI_SUCCESS : PI_ERROR_BUILD_PROGRAM_FAILURE;
655 Other.Captive =
nullptr;
663 if (Captive !=
nullptr) {
665 if (ret != PI_SUCCESS) {
671 "Unrecoverable program state reached in hip_piMemRelease");
679 Captive = Other.Captive;
680 Other.Captive =
nullptr;
704 static std::once_flag initFlag;
706 static std::vector<_pi_platform> platformIds;
708 if (num_entries == 0 and platforms !=
nullptr) {
709 return PI_ERROR_INVALID_VALUE;
711 if (platforms ==
nullptr and num_platforms ==
nullptr) {
712 return PI_ERROR_INVALID_VALUE;
720 if (hipInit(0) != hipSuccess) {
725 hipError_t hipErrorCode = hipGetDeviceCount(&numDevices);
726 if (hipErrorCode == hipErrorNoDevice) {
730 err = PI_CHECK_ERROR(hipErrorCode);
731 if (numDevices == 0) {
736 numPlatforms = numDevices;
737 platformIds.resize(numDevices);
739 for (
int i = 0; i < numDevices; ++i) {
741 err = PI_CHECK_ERROR(hipDeviceGet(&
device, i));
742 platformIds[i].devices_.emplace_back(
745 }
catch (
const std::bad_alloc &) {
747 for (
int i = 0; i < numDevices; ++i) {
748 platformIds[i].devices_.clear();
751 err = PI_ERROR_OUT_OF_HOST_MEMORY;
754 for (
int i = 0; i < numDevices; ++i) {
755 platformIds[i].devices_.clear();
763 if (num_platforms !=
nullptr) {
764 *num_platforms = numPlatforms;
767 if (platforms !=
nullptr) {
768 for (
unsigned i = 0; i < std::min(num_entries, numPlatforms); ++i) {
769 platforms[i] = &platformIds[i];
777 return PI_ERROR_OUT_OF_RESOURCES;
783 size_t param_value_size,
void *param_value,
784 size_t *param_value_size_ret) {
787 switch (param_name) {
789 return getInfo(param_value_size, param_value, param_value_size_ret,
792 return getInfo(param_value_size, param_value, param_value_size_ret,
795 return getInfo(param_value_size, param_value, param_value_size_ret,
798 auto version = getHipVersionString();
799 return getInfo(param_value_size, param_value, param_value_size_ret,
803 return getInfo(param_value_size, param_value, param_value_size_ret,
"");
824 const bool returnDevices = askingForDefault || askingForGPU;
826 size_t numDevices = returnDevices ?
platform->devices_.size() : 0;
830 *num_devices = numDevices;
833 if (returnDevices && devices) {
834 for (
size_t i = 0; i < std::min(
size_t(num_entries), numDevices); ++i) {
835 devices[i] =
platform->devices_[i].get();
843 return PI_ERROR_OUT_OF_RESOURCES;
855 size_t param_value_size,
void *param_value,
856 size_t *param_value_size_ret) {
858 switch (param_name) {
860 return getInfo(param_value_size, param_value, param_value_size_ret, 1);
862 return getInfo(param_value_size, param_value, param_value_size_ret,
865 return getInfo(param_value_size, param_value, param_value_size_ret,
866 context->get_reference_count());
872 return PI_ERROR_OUT_OF_RESOURCES;
877 assert(
context->get_reference_count() > 0);
879 context->increment_reference_count();
885 context->set_extended_deleter(
function, user_data);
899 (void)out_num_devices;
901 return PI_ERROR_INVALID_OPERATION;
914 if (num_binaries < 1) {
920 #if defined(__HIP_PLATFORM_AMD__)
922 #elif defined(__HIP_PLATFORM_NVIDIA__)
925 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
928 for (
pi_uint32 i = 0; i < num_binaries; i++) {
929 if (strcmp(binaries[i]->DeviceTargetSpec, binary_type) == 0) {
930 *selected_binary = i;
936 return PI_ERROR_INVALID_BINARY;
941 const char *func_name,
945 assert(func_pointer_ret !=
nullptr);
948 hipError_t ret = hipModuleGetFunction(&func, program->
get(), func_name);
949 *func_pointer_ret =
reinterpret_cast<pi_uint64>(func);
952 if (ret != hipSuccess && ret != hipErrorNotFound)
953 retError = PI_CHECK_ERROR(ret);
954 if (ret == hipErrorNotFound) {
955 *func_pointer_ret = 0;
956 retError = PI_ERROR_INVALID_KERNEL_NAME;
970 size_t param_value_size,
void *param_value,
971 size_t *param_value_size_ret) {
973 static constexpr
pi_uint32 max_work_item_dimensions = 3u;
975 assert(
device !=
nullptr);
977 switch (param_name) {
979 return getInfo(param_value_size, param_value, param_value_size_ret,
983 #if defined(__HIP_PLATFORM_AMD__)
985 #elif defined(__HIP_PLATFORM_NVIDIA__)
991 return getInfo(param_value_size, param_value, param_value_size_ret,
995 int compute_units = 0;
997 hipDeviceGetAttribute(&compute_units,
998 hipDeviceAttributeMultiprocessorCount,
999 device->get()) == hipSuccess);
1001 return getInfo(param_value_size, param_value, param_value_size_ret,
1005 return getInfo(param_value_size, param_value, param_value_size_ret,
1006 max_work_item_dimensions);
1009 size_t return_sizes[max_work_item_dimensions];
1011 int max_x = 0, max_y = 0, max_z = 0;
1013 hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxBlockDimX,
1014 device->get()) == hipSuccess);
1018 hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxBlockDimY,
1019 device->get()) == hipSuccess);
1023 hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxBlockDimZ,
1024 device->get()) == hipSuccess);
1027 return_sizes[0] = size_t(max_x);
1028 return_sizes[1] = size_t(max_y);
1029 return_sizes[2] = size_t(max_z);
1030 return getInfoArray(max_work_item_dimensions, param_value_size, param_value,
1031 param_value_size_ret, return_sizes);
1035 size_t return_sizes[max_work_item_dimensions];
1036 int max_x = 0, max_y = 0, max_z = 0;
1038 hipDeviceGetAttribute(&max_x, hipDeviceAttributeMaxGridDimX,
1039 device->get()) == hipSuccess);
1043 hipDeviceGetAttribute(&max_y, hipDeviceAttributeMaxGridDimY,
1044 device->get()) == hipSuccess);
1048 hipDeviceGetAttribute(&max_z, hipDeviceAttributeMaxGridDimZ,
1049 device->get()) == hipSuccess);
1052 return_sizes[0] = size_t(max_x);
1053 return_sizes[1] = size_t(max_y);
1054 return_sizes[2] = size_t(max_z);
1055 return getInfoArray(max_work_item_dimensions, param_value_size, param_value,
1056 param_value_size_ret, return_sizes);
1060 int max_work_group_size = 0;
1062 hipDeviceGetAttribute(&max_work_group_size,
1063 hipDeviceAttributeMaxThreadsPerBlock,
1064 device->get()) == hipSuccess);
1068 return getInfo(param_value_size, param_value, param_value_size_ret,
1069 size_t(max_work_group_size));
1072 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1075 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1078 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1081 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1084 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1087 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1090 return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1093 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1096 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1099 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1102 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1105 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1108 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
1111 return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1115 int max_threads = 0;
1117 hipDeviceGetAttribute(&max_threads,
1118 hipDeviceAttributeMaxThreadsPerBlock,
1119 device->get()) == hipSuccess);
1122 hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
1123 device->get()) == hipSuccess);
1124 int maxWarps = (max_threads + warpSize - 1) / warpSize;
1125 return getInfo(param_value_size, param_value, param_value_size_ret,
1126 static_cast<uint32_t
>(maxWarps));
1133 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor,
1134 device->get()) == hipSuccess);
1135 bool ifp = (major >= 7);
1136 return getInfo(param_value_size, param_value, param_value_size_ret, ifp);
1141 hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
1142 device->get()) == hipSuccess);
1143 size_t sizes[1] = {
static_cast<size_t>(warpSize)};
1144 return getInfoArray<size_t>(1, param_value_size, param_value,
1145 param_value_size_ret, sizes);
1150 hipDeviceGetAttribute(&clock_freq, hipDeviceAttributeClockRate,
1151 device->get()) == hipSuccess);
1153 return getInfo(param_value_size, param_value, param_value_size_ret,
1157 auto bits =
pi_uint32{std::numeric_limits<uintptr_t>::digits};
1158 return getInfo(param_value_size, param_value, param_value_size_ret, bits);
1171 auto quarter_global =
static_cast<pi_uint32>(global / 4u);
1173 auto max_alloc = std::max(std::min(1024u * 1024u * 1024u, quarter_global),
1174 32u * 1024u * 1024u);
1176 return getInfo(param_value_size, param_value, param_value_size_ret,
1180 return getInfo(param_value_size, param_value, param_value_size_ret,
1187 return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1193 return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1200 hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture2DHeight,
1201 device->get()) == hipSuccess);
1203 int surf_height = 0;
1205 hipDeviceGetAttribute(&surf_height,
1206 hipDeviceAttributeMaxTexture2DHeight,
1207 device->get()) == hipSuccess);
1210 int min = std::min(tex_height, surf_height);
1212 return getInfo(param_value_size, param_value, param_value_size_ret,
min);
1218 hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture2DWidth,
1219 device->get()) == hipSuccess);
1223 hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture2DWidth,
1224 device->get()) == hipSuccess);
1227 int min = std::min(tex_width, surf_width);
1229 return getInfo(param_value_size, param_value, param_value_size_ret,
min);
1235 hipDeviceGetAttribute(&tex_height, hipDeviceAttributeMaxTexture3DHeight,
1236 device->get()) == hipSuccess);
1238 int surf_height = 0;
1240 hipDeviceGetAttribute(&surf_height,
1241 hipDeviceAttributeMaxTexture3DHeight,
1242 device->get()) == hipSuccess);
1245 int min = std::min(tex_height, surf_height);
1247 return getInfo(param_value_size, param_value, param_value_size_ret,
min);
1253 hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture3DWidth,
1254 device->get()) == hipSuccess);
1258 hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture3DWidth,
1259 device->get()) == hipSuccess);
1262 int min = std::min(tex_width, surf_width);
1264 return getInfo(param_value_size, param_value, param_value_size_ret,
min);
1270 hipDeviceGetAttribute(&tex_depth, hipDeviceAttributeMaxTexture3DDepth,
1271 device->get()) == hipSuccess);
1275 hipDeviceGetAttribute(&surf_depth, hipDeviceAttributeMaxTexture3DDepth,
1276 device->get()) == hipSuccess);
1279 int min = std::min(tex_depth, surf_depth);
1281 return getInfo(param_value_size, param_value, param_value_size_ret,
min);
1287 hipDeviceGetAttribute(&tex_width, hipDeviceAttributeMaxTexture1DWidth,
1288 device->get()) == hipSuccess);
1292 hipDeviceGetAttribute(&surf_width, hipDeviceAttributeMaxTexture1DWidth,
1293 device->get()) == hipSuccess);
1296 int min = std::min(tex_width, surf_width);
1298 return getInfo(param_value_size, param_value, param_value_size_ret,
min);
1301 return getInfo(param_value_size, param_value, param_value_size_ret,
1307 return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1312 return getInfo(param_value_size, param_value, param_value_size_ret,
1316 int mem_base_addr_align = 0;
1318 hipDeviceGetAttribute(&mem_base_addr_align,
1319 hipDeviceAttributeTextureAlignment,
1320 device->get()) == hipSuccess);
1322 mem_base_addr_align *= 8;
1323 return getInfo(param_value_size, param_value, param_value_size_ret,
1324 mem_base_addr_align);
1327 return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1333 return getInfo(param_value_size, param_value, param_value_size_ret, config);
1338 return getInfo(param_value_size, param_value, param_value_size_ret, config);
1341 return getInfo(param_value_size, param_value, param_value_size_ret,
1347 return getInfo(param_value_size, param_value, param_value_size_ret, 128u);
1352 hipDeviceGetAttribute(&cache_size, hipDeviceAttributeL2CacheSize,
1353 device->get()) == hipSuccess);
1356 return getInfo(param_value_size, param_value, param_value_size_ret,
1364 return getInfo(param_value_size, param_value, param_value_size_ret,
1368 unsigned int constant_memory = 0;
1375 hipDeviceGetAttribute(
reinterpret_cast<int *
>(&constant_memory),
1376 hipDeviceAttributeTotalConstantMemory,
1377 device->get()) == hipSuccess);
1379 return getInfo(param_value_size, param_value, param_value_size_ret,
1386 return getInfo(param_value_size, param_value, param_value_size_ret, 9u);
1389 return getInfo(param_value_size, param_value, param_value_size_ret,
1396 int local_mem_size = 0;
1398 hipDeviceGetAttribute(&local_mem_size,
1399 hipDeviceAttributeMaxSharedMemoryPerBlock,
1400 device->get()) == hipSuccess);
1402 return getInfo(param_value_size, param_value, param_value_size_ret,
1406 int ecc_enabled = 0;
1408 hipDeviceGetAttribute(&ecc_enabled, hipDeviceAttributeEccEnabled,
1409 device->get()) == hipSuccess);
1412 auto result =
static_cast<pi_bool>(ecc_enabled);
1413 return getInfo(param_value_size, param_value, param_value_size_ret, result);
1416 int is_integrated = 0;
1418 hipDeviceGetAttribute(&is_integrated, hipDeviceAttributeIntegrated,
1419 device->get()) == hipSuccess);
1422 (is_integrated == 1));
1423 auto result =
static_cast<pi_bool>(is_integrated);
1424 return getInfo(param_value_size, param_value, param_value_size_ret, result);
1429 return getInfo(param_value_size, param_value, param_value_size_ret,
1433 return getInfo(param_value_size, param_value, param_value_size_ret,
1437 return getInfo(param_value_size, param_value, param_value_size_ret,
1441 return getInfo(param_value_size, param_value, param_value_size_ret,
1445 return getInfo(param_value_size, param_value, param_value_size_ret,
1449 return getInfo(param_value_size, param_value, param_value_size_ret,
1454 return getInfo(param_value_size, param_value, param_value_size_ret,
1461 return getInfo(param_value_size, param_value, param_value_size_ret,
1467 return getInfo(param_value_size, param_value, param_value_size_ret,
1473 return getInfo(param_value_size, param_value, param_value_size_ret,
"");
1476 return getInfo(param_value_size, param_value, param_value_size_ret,
1480 static constexpr
size_t MAX_DEVICE_NAME_LENGTH = 256u;
1481 char name[MAX_DEVICE_NAME_LENGTH];
1483 hipDeviceGetName(name, MAX_DEVICE_NAME_LENGTH,
device->get()) ==
1488 if (strlen(name) == 0) {
1489 hipDeviceProp_t props;
1491 hipGetDeviceProperties(&props,
device->get()) == hipSuccess);
1493 return getInfoArray(strlen(props.gcnArchName) + 1, param_value_size,
1494 param_value, param_value_size_ret, props.gcnArchName);
1496 return getInfoArray(strlen(name) + 1, param_value_size, param_value,
1497 param_value_size_ret, name);
1500 return getInfo(param_value_size, param_value, param_value_size_ret,
1504 auto version = getHipVersionString();
1505 return getInfo(param_value_size, param_value, param_value_size_ret,
1509 return getInfo(param_value_size, param_value, param_value_size_ret,
"HIP");
1512 return getInfo(param_value_size, param_value, param_value_size_ret,
1513 device->get_reference_count());
1516 return getInfo(param_value_size, param_value, param_value_size_ret,
1520 return getInfo(param_value_size, param_value, param_value_size_ret,
"");
1527 std::string SupportedExtensions =
"";
1529 SupportedExtensions +=
" ";
1531 return getInfo(param_value_size, param_value, param_value_size_ret,
1532 SupportedExtensions.c_str());
1536 return getInfo(param_value_size, param_value, param_value_size_ret,
1540 return getInfo(param_value_size, param_value, param_value_size_ret,
1544 return getInfo(param_value_size, param_value, param_value_size_ret,
1548 return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1551 return getInfo(param_value_size, param_value, param_value_size_ret,
1555 return getInfo(param_value_size, param_value, param_value_size_ret, 0u);
1558 return getInfo(param_value_size, param_value, param_value_size_ret,
1573 if (getAttribute(
device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
1585 return getInfo(param_value_size, param_value, param_value_size_ret, value);
1596 return getInfo(param_value_size, param_value, param_value_size_ret, value);
1605 if (getAttribute(
device, hipDeviceAttributeManagedMemory)) {
1609 if (getAttribute(
device, hipDeviceAttributeConcurrentManagedAccess)) {
1613 if (getAttribute(
device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
1619 return getInfo(param_value_size, param_value, param_value_size_ret, value);
1631 if (getAttribute(
device, hipDeviceAttributeManagedMemory)) {
1635 if (getAttribute(
device, hipDeviceAttributeConcurrentManagedAccess)) {
1641 if (getAttribute(
device, hipDeviceAttributeComputeCapabilityMajor) >= 6) {
1649 return getInfo(param_value_size, param_value, param_value_size_ret, value);
1659 if (getAttribute(
device, hipDeviceAttributePageableMemoryAccess)) {
1664 return getInfo(param_value_size, param_value, param_value_size_ret, value);
1681 return PI_ERROR_INVALID_VALUE;
1718 "Creation of PI device from native handle not implemented");
1745 const void *private_info,
1746 size_t cb,
void *user_data),
1749 assert(devices !=
nullptr);
1752 assert(user_data ==
nullptr);
1753 assert(num_devices == 1);
1755 assert(retcontext !=
nullptr);
1759 bool property_hip_primary =
false;
1760 while (properties && (0 != *properties)) {
1770 property_hip_primary =
static_cast<bool>(value);
1775 "Unknown piContextCreate property in property list");
1776 return PI_ERROR_INVALID_VALUE;
1780 std::unique_ptr<_pi_context> piContextPtr{
nullptr};
1782 hipCtx_t current =
nullptr;
1784 if (property_hip_primary) {
1789 PI_CHECK_ERROR(hipDevicePrimaryCtxRetain(&Ctxt, devices[0]->
get()));
1790 piContextPtr = std::unique_ptr<_pi_context>(
1792 errcode_ret = PI_CHECK_ERROR(hipCtxPushCurrent(Ctxt));
1795 hipCtx_t newContext;
1796 PI_CHECK_ERROR(hipCtxGetCurrent(¤t));
1797 errcode_ret = PI_CHECK_ERROR(
1798 hipCtxCreate(&newContext, hipDeviceMapHost, devices[0]->
get()));
1799 piContextPtr = std::unique_ptr<_pi_context>(
new _pi_context{
1805 hipEventCreateWithFlags(&piContextPtr->evBase_, hipEventDefault));
1806 PI_CHECK_ERROR(hipEventRecord(piContextPtr->evBase_, 0));
1812 if (current !=
nullptr) {
1813 PI_CHECK_ERROR(hipCtxSetCurrent(current));
1816 *retcontext = piContextPtr.release();
1820 errcode_ret = PI_ERROR_OUT_OF_RESOURCES;
1827 assert(ctxt !=
nullptr);
1834 std::unique_ptr<_pi_context>
context{ctxt};
1836 PI_CHECK_ERROR(hipEventDestroy(
context->evBase_));
1839 hipCtx_t hipCtxt = ctxt->
get();
1842 #if defined(__HIP_PLATFORM_NVIDIA__)
1843 hipCtx_t current =
nullptr;
1844 PI_CHECK_ERROR(hipCtxGetCurrent(¤t));
1845 if (hipCtxt != current) {
1846 PI_CHECK_ERROR(hipCtxPushCurrent(hipCtxt));
1848 PI_CHECK_ERROR(hipCtxSynchronize());
1849 PI_CHECK_ERROR(hipCtxGetCurrent(¤t));
1850 if (hipCtxt == current) {
1851 PI_CHECK_ERROR(hipCtxPopCurrent(¤t));
1854 return PI_CHECK_ERROR(hipCtxDestroy(hipCtxt));
1859 PI_CHECK_ERROR(hipCtxPopCurrent(¤t));
1860 return PI_CHECK_ERROR(hipDevicePrimaryCtxRelease(hipDev));
1863 hipCtx_t hipCtxt = ctxt->
get();
1864 return PI_CHECK_ERROR(hipCtxDestroy(hipCtxt));
1890 bool ownNativeHandle,
1895 (void)ownNativeHandle;
1898 "Creation of PI context from native handle not implemented");
1910 assert(ret_mem !=
nullptr);
1911 assert((properties ==
nullptr || *properties == 0) &&
1912 "no mem properties goes to HIP RT yet");
1916 const bool enableUseHostPtr =
false;
1917 const bool performInitialCopy =
1921 pi_mem retMemObj =
nullptr;
1924 ScopedContext active(
context);
1930 retErr = PI_CHECK_ERROR(
1931 hipHostRegister(
host_ptr, size, hipHostRegisterMapped));
1932 retErr = PI_CHECK_ERROR(hipHostGetDevicePointer(&ptr,
host_ptr, 0));
1935 retErr = PI_CHECK_ERROR(hipHostMalloc(&
host_ptr, size));
1936 retErr = PI_CHECK_ERROR(hipHostGetDevicePointer(&ptr,
host_ptr, 0));
1939 retErr = PI_CHECK_ERROR(hipMalloc(&ptr, size));
1945 if (retErr == PI_SUCCESS) {
1946 pi_mem parentBuffer =
nullptr;
1949 reinterpret_cast<_pi_mem::mem_::mem_::buffer_mem_::native_type
>(ptr);
1950 auto piMemObj = std::unique_ptr<_pi_mem>(
new _pi_mem{
1952 if (piMemObj !=
nullptr) {
1953 retMemObj = piMemObj.release();
1954 if (performInitialCopy) {
1956 retErr = PI_CHECK_ERROR(hipMemcpyHtoD(devPtr,
host_ptr, size));
1960 if (retErr == PI_SUCCESS) {
1961 hipStream_t defaultStream = 0;
1962 retErr = PI_CHECK_ERROR(hipStreamSynchronize(defaultStream));
1966 retErr = PI_ERROR_OUT_OF_HOST_MEMORY;
1972 retErr = PI_ERROR_OUT_OF_RESOURCES;
1975 *ret_mem = retMemObj;
1985 assert((memObj !=
nullptr) &&
"PI_ERROR_INVALID_MEM_OBJECTS");
1997 std::unique_ptr<_pi_mem> uniqueMemObj(memObj);
2003 ScopedContext active(uniqueMemObj->get_context());
2006 switch (uniqueMemObj->mem_.buffer_mem_.allocMode_) {
2009 ret = PI_CHECK_ERROR(
2010 hipFree((
void *)uniqueMemObj->mem_.buffer_mem_.ptr_));
2013 ret = PI_CHECK_ERROR(
2014 hipHostUnregister(uniqueMemObj->mem_.buffer_mem_.hostPtr_));
2017 ret = PI_CHECK_ERROR(
2018 hipFreeHost(uniqueMemObj->mem_.buffer_mem_.hostPtr_));
2023 ret = PI_CHECK_ERROR(hipDestroySurfaceObject(
2024 uniqueMemObj->mem_.surface_mem_.get_surface()));
2025 auto array = uniqueMemObj->mem_.surface_mem_.get_array();
2026 ret = PI_CHECK_ERROR(hipFreeArray(array));
2032 ret = PI_ERROR_OUT_OF_RESOURCES;
2035 if (ret != PI_SUCCESS) {
2041 "Unrecoverable program state reached in hip_piMemRelease");
2053 void *buffer_create_info,
pi_mem *memObj) {
2054 assert((parent_buffer !=
nullptr) &&
"PI_ERROR_INVALID_MEM_OBJECT");
2055 assert(parent_buffer->
is_buffer() &&
"PI_ERROR_INVALID_MEM_OBJECTS");
2056 assert(!parent_buffer->
is_sub_buffer() &&
"PI_ERROR_INVALID_MEM_OBJECT");
2065 "PI_ERROR_INVALID_VALUE");
2066 assert((buffer_create_info !=
nullptr) &&
"PI_ERROR_INVALID_VALUE");
2067 assert(memObj !=
nullptr);
2069 const auto bufferRegion =
2071 assert((bufferRegion.size != 0u) &&
"PI_ERROR_INVALID_BUFFER_SIZE");
2073 assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) &&
2075 assert(((bufferRegion.origin + bufferRegion.size) <=
2077 "PI_ERROR_INVALID_BUFFER_SIZE");
2088 void *hostPtr =
nullptr;
2091 bufferRegion.origin;
2096 std::unique_ptr<_pi_mem> retMemObj{
nullptr};
2098 ScopedContext active(
context);
2100 retMemObj = std::unique_ptr<_pi_mem>{
new _pi_mem{
2101 context, parent_buffer, allocMode, ptr, hostPtr, bufferRegion.size}};
2107 return PI_ERROR_OUT_OF_HOST_MEMORY;
2111 *memObj = retMemObj.release();
2116 size_t expectedQuerySize,
void *queryOutput,
2117 size_t *writtenQuerySize) {
2120 (void)expectedQuerySize;
2122 (void)writtenQuerySize;
2135 #if defined(__HIP_PLATFORM_NVIDIA__)
2146 return PI_ERROR_INVALID_MEM_OBJECT;
2150 #elif defined(__HIP_PLATFORM_AMD__)
2154 #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
2172 bool ownNativeHandle,
2176 (void)ownNativeHandle;
2180 "Creation of PI mem from native handle not implemented");
2195 std::unique_ptr<_pi_queue> queueImpl{
nullptr};
2199 return PI_ERROR_INVALID_DEVICE;
2202 ScopedContext active(
context);
2204 hipStream_t hipStream;
2206 err = PI_CHECK_ERROR(hipStreamCreate(&hipStream));
2207 if (err != PI_SUCCESS) {
2211 queueImpl = std::unique_ptr<_pi_queue>(
2214 *
queue = queueImpl.release();
2223 return PI_ERROR_OUT_OF_RESOURCES;
2228 size_t param_value_size,
void *param_value,
2229 size_t *param_value_size_ret) {
2230 assert(command_queue !=
nullptr);
2232 switch (param_name) {
2234 return getInfo(param_value_size, param_value, param_value_size_ret,
2237 return getInfo(param_value_size, param_value, param_value_size_ret,
2240 return getInfo(param_value_size, param_value, param_value_size_ret,
2243 return getInfo(param_value_size, param_value, param_value_size_ret,
2253 assert(command_queue !=
nullptr);
2261 assert(command_queue !=
nullptr);
2268 std::unique_ptr<_pi_queue> queueImpl(command_queue);
2270 ScopedContext active(command_queue->
get_context());
2272 auto stream = queueImpl->stream_;
2273 PI_CHECK_ERROR(hipStreamSynchronize(stream));
2274 PI_CHECK_ERROR(hipStreamDestroy(stream));
2280 return PI_ERROR_OUT_OF_RESOURCES;
2287 pi_result result = PI_ERROR_OUT_OF_HOST_MEMORY;
2291 assert(command_queue !=
2293 ScopedContext active(command_queue->
get_context());
2294 result = PI_CHECK_ERROR(hipStreamSynchronize(command_queue->
stream_));
2302 result = PI_ERROR_OUT_OF_RESOURCES;
2312 (void)command_queue;
2343 bool ownNativeHandle,
2349 (void)ownNativeHandle;
2351 "Creation of PI queue from native handle not implemented");
2356 pi_bool blocking_write,
size_t offset,
2357 size_t size,
void *ptr,
2362 assert(buffer !=
nullptr);
2363 assert(command_queue !=
nullptr);
2365 hipStream_t hipStream = command_queue->
get();
2366 std::unique_ptr<_pi_event> retImplEv{
nullptr};
2369 ScopedContext active(command_queue->
get_context());
2372 event_wait_list,
nullptr);
2380 retErr = PI_CHECK_ERROR(
2382 ptr, size, hipStream));
2385 retErr = retImplEv->record();
2388 if (blocking_write) {
2389 retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
2393 *
event = retImplEv.release();
2402 pi_bool blocking_read,
size_t offset,
2403 size_t size,
void *ptr,
2408 assert(buffer !=
nullptr);
2409 assert(command_queue !=
nullptr);
2411 hipStream_t hipStream = command_queue->
get();
2412 std::unique_ptr<_pi_event> retImplEv{
nullptr};
2415 ScopedContext active(command_queue->
get_context());
2418 event_wait_list,
nullptr);
2426 retErr = PI_CHECK_ERROR(hipMemcpyDtoHAsync(
2431 retErr = retImplEv->record();
2434 if (blocking_read) {
2435 retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
2439 *
event = retImplEv.release();
2451 assert(num_events != 0);
2453 if (num_events == 0) {
2454 return PI_ERROR_INVALID_VALUE;
2458 return PI_ERROR_INVALID_EVENT;
2462 ScopedContext active(
context);
2466 return PI_ERROR_INVALID_EVENT;
2470 return PI_ERROR_INVALID_CONTEXT;
2473 return event->wait();
2475 return forLatestEvents(event_list, num_events, waitFunc);
2479 return PI_ERROR_OUT_OF_RESOURCES;
2485 assert(
kernel !=
nullptr);
2486 assert(program !=
nullptr);
2489 std::unique_ptr<_pi_kernel> retKernel{
nullptr};
2494 hipFunction_t hipFunc;
2495 retErr = PI_CHECK_ERROR(
2496 hipModuleGetFunction(&hipFunc, program->
get(), kernel_name));
2498 std::string kernel_name_woffset = std::string(kernel_name) +
"_with_offset";
2499 hipFunction_t hipFuncWithOffsetParam;
2500 hipError_t offsetRes = hipModuleGetFunction(
2501 &hipFuncWithOffsetParam, program->
get(), kernel_name_woffset.c_str());
2504 if (offsetRes == hipErrorNotFound) {
2505 hipFuncWithOffsetParam =
nullptr;
2507 retErr = PI_CHECK_ERROR(offsetRes);
2510 retKernel = std::unique_ptr<_pi_kernel>(
2511 new _pi_kernel{hipFunc, hipFuncWithOffsetParam, kernel_name, program,
2516 retErr = PI_ERROR_OUT_OF_HOST_MEMORY;
2519 *
kernel = retKernel.release();
2524 size_t arg_size,
const void *arg_value) {
2526 assert(
kernel !=
nullptr);
2530 kernel->set_kernel_arg(arg_index, arg_size, arg_value);
2532 kernel->set_kernel_local_arg(arg_index, arg_size);
2541 const pi_mem *arg_value) {
2543 assert(
kernel !=
nullptr);
2544 assert(arg_value !=
nullptr);
2548 pi_mem arg_mem = *arg_value;
2552 hipArray_Format Format;
2554 getArrayDesc(array, Format, NumChannels);
2555 if (Format != HIP_AD_FORMAT_UNSIGNED_INT32 &&
2556 Format != HIP_AD_FORMAT_SIGNED_INT32 &&
2557 Format != HIP_AD_FORMAT_HALF && Format != HIP_AD_FORMAT_FLOAT) {
2559 "PI HIP kernels only support images with channel types int32, "
2560 "uint32, float, and half.");
2563 kernel->set_kernel_arg(arg_index,
sizeof(hipSurf), (
void *)&hipSurf);
2568 kernel->set_kernel_arg(arg_index,
sizeof(
void *), (
void *)&hipPtr);
2579 assert(
kernel !=
nullptr);
2580 assert(arg_value !=
nullptr);
2584 pi_uint32 samplerProps = (*arg_value)->props_;
2585 kernel->set_kernel_arg(arg_index,
sizeof(
pi_uint32), (
void *)&samplerProps);
2594 const size_t *global_work_offset,
const size_t *global_work_size,
2595 const size_t *local_work_size,
pi_uint32 num_events_in_wait_list,
2599 assert(command_queue !=
nullptr);
2601 assert(
kernel !=
nullptr);
2602 assert(global_work_offset !=
nullptr);
2603 assert(work_dim > 0);
2604 assert(work_dim < 4);
2608 size_t threadsPerBlock[3] = {32u, 1u, 1u};
2609 size_t maxWorkGroupSize = 0u;
2610 size_t maxThreadsPerBlock[3] = {};
2611 bool providedLocalWorkGroupSize = (local_work_size !=
nullptr);
2616 sizeof(maxThreadsPerBlock), maxThreadsPerBlock,
nullptr);
2617 assert(retError == PI_SUCCESS);
2622 sizeof(maxWorkGroupSize), &maxWorkGroupSize,
nullptr);
2623 assert(retError == PI_SUCCESS);
2627 if (providedLocalWorkGroupSize) {
2628 auto isValid = [&](
int dim) {
2629 if (local_work_size[dim] > maxThreadsPerBlock[dim])
2630 return PI_ERROR_INVALID_WORK_ITEM_SIZE;
2634 if (0u == local_work_size[dim])
2635 return PI_ERROR_INVALID_WORK_GROUP_SIZE;
2636 if (0u != (global_work_size[dim] % local_work_size[dim]))
2637 return PI_ERROR_INVALID_WORK_GROUP_SIZE;
2638 threadsPerBlock[dim] = local_work_size[dim];
2642 for (
size_t dim = 0; dim < work_dim; dim++) {
2643 auto err = isValid(dim);
2644 if (err != PI_SUCCESS)
2648 simpleGuessLocalWorkSize(threadsPerBlock, global_work_size,
2649 maxThreadsPerBlock,
kernel);
2653 if (maxWorkGroupSize <
2654 size_t(threadsPerBlock[0] * threadsPerBlock[1] * threadsPerBlock[2])) {
2655 return PI_ERROR_INVALID_WORK_GROUP_SIZE;
2658 size_t blocksPerGrid[3] = {1u, 1u, 1u};
2660 for (
size_t i = 0; i < work_dim; i++) {
2662 (global_work_size[i] + threadsPerBlock[i] - 1) / threadsPerBlock[i];
2666 std::unique_ptr<_pi_event> retImplEv{
nullptr};
2669 ScopedContext active(command_queue->
get_context());
2670 hipStream_t hipStream = command_queue->
get();
2671 hipFunction_t hipFunc =
kernel->get();
2674 event_wait_list,
nullptr);
2677 if (
kernel->get_with_offset_parameter()) {
2678 std::uint32_t hip_implicit_offset[3] = {0, 0, 0};
2679 if (global_work_offset) {
2680 for (
size_t i = 0; i < work_dim; i++) {
2681 hip_implicit_offset[i] =
2682 static_cast<std::uint32_t
>(global_work_offset[i]);
2683 if (global_work_offset[i] != 0) {
2684 hipFunc =
kernel->get_with_offset_parameter();
2688 kernel->set_implicit_offset_arg(
sizeof(hip_implicit_offset),
2689 hip_implicit_offset);
2692 auto argIndices =
kernel->get_arg_indices();
2700 retError = PI_CHECK_ERROR(hipModuleLaunchKernel(
2701 hipFunc, blocksPerGrid[0], blocksPerGrid[1], blocksPerGrid[2],
2702 threadsPerBlock[0], threadsPerBlock[1], threadsPerBlock[2],
2703 kernel->get_local_size(), hipStream, argIndices.data(),
nullptr));
2705 kernel->clear_local_size();
2707 retError = retImplEv->record();
2711 *
event = retImplEv.release();
2722 size_t cb_args,
pi_uint32 num_mem_objects,
2723 const pi_mem *mem_list,
const void **args_mem_loc,
2730 (void)num_mem_objects;
2733 (void)num_events_in_wait_list;
2734 (void)event_wait_list;
2749 assert(ret_mem !=
nullptr);
2759 "hip_piMemImageCreate only supports RGBA channel order");
2765 HIP_ARRAY3D_DESCRIPTOR array_desc;
2766 array_desc.NumChannels = 4;
2767 array_desc.Flags = 0;
2770 array_desc.Height = 0;
2771 array_desc.Depth = 0;
2774 array_desc.Depth = 0;
2781 size_t pixel_type_size_bytes;
2786 array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT8;
2787 pixel_type_size_bytes = 1;
2790 array_desc.Format = HIP_AD_FORMAT_SIGNED_INT8;
2791 pixel_type_size_bytes = 1;
2795 array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT16;
2796 pixel_type_size_bytes = 2;
2799 array_desc.Format = HIP_AD_FORMAT_SIGNED_INT16;
2800 pixel_type_size_bytes = 2;
2803 array_desc.Format = HIP_AD_FORMAT_HALF;
2804 pixel_type_size_bytes = 2;
2807 array_desc.Format = HIP_AD_FORMAT_UNSIGNED_INT32;
2808 pixel_type_size_bytes = 4;
2811 array_desc.Format = HIP_AD_FORMAT_SIGNED_INT32;
2812 pixel_type_size_bytes = 4;
2815 array_desc.Format = HIP_AD_FORMAT_FLOAT;
2816 pixel_type_size_bytes = 4;
2820 "hip_piMemImageCreate given unsupported image_channel_data_type");
2824 size_t pixel_size_bytes =
2825 pixel_type_size_bytes * 4;
2826 size_t image_size_bytes = pixel_size_bytes * image_desc->
image_width *
2829 ScopedContext active(
context);
2830 hipArray *image_array;
2831 retErr = PI_CHECK_ERROR(hipArray3DCreate(
2832 reinterpret_cast<hipCUarray *
>(&image_array), &array_desc));
2835 if (performInitialCopy) {
2838 retErr = PI_CHECK_ERROR(
2839 hipMemcpyHtoA(image_array, 0,
host_ptr, image_size_bytes));
2841 hip_Memcpy2D cpy_desc;
2842 memset(&cpy_desc, 0,
sizeof(cpy_desc));
2843 cpy_desc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
2845 cpy_desc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
2846 cpy_desc.dstArray =
reinterpret_cast<hipCUarray
>(image_array);
2847 cpy_desc.WidthInBytes = pixel_size_bytes * image_desc->
image_width;
2849 retErr = PI_CHECK_ERROR(hipMemcpyParam2D(&cpy_desc));
2851 HIP_MEMCPY3D cpy_desc;
2852 memset(&cpy_desc, 0,
sizeof(cpy_desc));
2853 cpy_desc.srcMemoryType = hipMemoryType::hipMemoryTypeHost;
2855 cpy_desc.dstMemoryType = hipMemoryType::hipMemoryTypeArray;
2856 cpy_desc.dstArray =
reinterpret_cast<hipCUarray
>(image_array);
2857 cpy_desc.WidthInBytes = pixel_size_bytes * image_desc->
image_width;
2860 retErr = PI_CHECK_ERROR(hipDrvMemcpy3D(&cpy_desc));
2871 hipResourceDesc image_res_desc;
2872 image_res_desc.res.array.array = image_array;
2873 image_res_desc.resType = hipResourceTypeArray;
2875 hipSurfaceObject_t surface;
2876 retErr = PI_CHECK_ERROR(hipCreateSurfaceObject(&surface, &image_res_desc));
2878 auto piMemObj = std::unique_ptr<_pi_mem>(
new _pi_mem{
2881 if (piMemObj ==
nullptr) {
2882 return PI_ERROR_OUT_OF_HOST_MEMORY;
2885 *ret_mem = piMemObj.release();
2887 PI_CHECK_ERROR(hipFreeArray(image_array));
2890 PI_CHECK_ERROR(hipFreeArray(image_array));
2891 return PI_ERROR_UNKNOWN;
2898 size_t param_value_size,
void *param_value,
2899 size_t *param_value_size_ret) {
2902 (void)param_value_size;
2904 (void)param_value_size_ret;
2911 assert(mem !=
nullptr);
2921 const char **strings,
2922 const size_t *lengths,
2931 "hip_piclProgramCreateWithSource not implemented");
2932 return PI_ERROR_INVALID_OPERATION;
2940 const pi_device *device_list,
const char *options,
2945 assert(program !=
nullptr);
2946 assert(num_devices == 1 || num_devices == 0);
2947 assert(device_list !=
nullptr || num_devices == 0);
2949 assert(user_data ==
nullptr);
2983 const size_t *lengths,
const unsigned char **binaries,
2986 (void)num_metadata_entries;
2988 (void)binary_status;
2991 assert(binaries !=
nullptr);
2992 assert(program !=
nullptr);
2993 assert(device_list !=
nullptr);
2994 assert(num_devices == 1 &&
"HIP contexts are for a single device");
2995 assert((
context->get_device()->get() == device_list[0]->
get()) &&
2996 "Mismatch between devices context and passed context when creating "
2997 "program from binary");
3006 const bool has_length = (lengths !=
nullptr);
3007 size_t length = has_length
3009 : strlen(
reinterpret_cast<const char *
>(binaries[0])) + 1;
3013 retProgram->set_binary(
reinterpret_cast<const char *
>(binaries[0]),
length);
3015 *program = retProgram.release();
3021 size_t param_value_size,
void *param_value,
3022 size_t *param_value_size_ret) {
3023 assert(program !=
nullptr);
3025 switch (param_name) {
3027 return getInfo(param_value_size, param_value, param_value_size_ret,
3030 return getInfo(param_value_size, param_value, param_value_size_ret,
3033 return getInfo(param_value_size, param_value, param_value_size_ret, 1u);
3035 return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
3038 return getInfo(param_value_size, param_value, param_value_size_ret,
3041 return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
3044 return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
3047 return getInfo(param_value_size, param_value, param_value_size_ret,
3058 const pi_device *device_list,
const char *options,
3068 (void)num_input_programs;
3069 (void)input_programs;
3074 "hip_piProgramLink: linking not supported with hip backend");
3084 const char *options,
pi_uint32 num_input_headers,
3085 const pi_program *input_headers,
const char **header_include_names,
3087 (void)input_headers;
3088 (void)header_include_names;
3090 assert(program !=
nullptr);
3091 assert(num_devices == 1 || num_devices == 0);
3092 assert(device_list !=
nullptr || num_devices == 0);
3094 assert(user_data ==
nullptr);
3095 assert(num_input_headers == 0);
3111 size_t param_value_size,
void *param_value,
3112 size_t *param_value_size_ret) {
3115 assert(program !=
nullptr);
3117 switch (param_name) {
3119 return getInfo(param_value_size, param_value, param_value_size_ret,
3123 return getInfo(param_value_size, param_value, param_value_size_ret,
3126 return getInfoArray(program->
MAX_LOG_SIZE, param_value_size, param_value,
3127 param_value_size_ret, program->
infoLog_);
3136 assert(program !=
nullptr);
3146 assert(program !=
nullptr);
3151 "Reference count overflow detected in hip_piProgramRelease.");
3156 std::unique_ptr<_pi_program> program_ptr{program};
3158 pi_result result = PI_ERROR_INVALID_PROGRAM;
3162 auto hipModule = program->
get();
3163 result = PI_CHECK_ERROR(hipModuleUnload(hipModule));
3165 result = PI_ERROR_OUT_OF_RESOURCES;
3199 bool ownNativeHandle,
3203 (void)ownNativeHandle;
3207 "Creation of PI program from native handle not implemented");
3212 size_t param_value_size,
void *param_value,
3213 size_t *param_value_size_ret) {
3217 switch (param_name) {
3219 return getInfo(param_value_size, param_value, param_value_size_ret,
3222 return getInfo(param_value_size, param_value, param_value_size_ret,
3225 return getInfo(param_value_size, param_value, param_value_size_ret,
3226 kernel->get_reference_count());
3228 return getInfo(param_value_size, param_value, param_value_size_ret,
3232 return getInfo(param_value_size, param_value, param_value_size_ret,
3236 return getInfo(param_value_size, param_value, param_value_size_ret,
"");
3244 return PI_ERROR_INVALID_KERNEL;
3249 size_t param_value_size,
void *param_value,
3250 size_t *param_value_size_ret) {
3256 switch (param_name) {
3258 int max_threads = 0;
3260 hipFuncGetAttribute(&max_threads,
3261 HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
3262 kernel->get()) == hipSuccess);
3263 return getInfo(param_value_size, param_value, param_value_size_ret,
3264 size_t(max_threads));
3273 size_t group_size[3] = {0, 0, 0};
3274 return getInfoArray(3, param_value_size, param_value,
3275 param_value_size_ret, group_size);
3281 hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
3282 kernel->get()) == hipSuccess);
3283 return getInfo(param_value_size, param_value, param_value_size_ret,
3290 hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
3291 device->get()) == hipSuccess);
3292 return getInfo(param_value_size, param_value, param_value_size_ret,
3293 static_cast<size_t>(warpSize));
3299 hipFuncGetAttribute(&bytes, HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
3300 kernel->get()) == hipSuccess);
3301 return getInfo(param_value_size, param_value, param_value_size_ret,
3306 "piKernelGetGroupInfo not implemented\n");
3315 return PI_ERROR_INVALID_KERNEL;
3320 size_t input_value_size,
const void *input_value,
size_t param_value_size,
3321 void *param_value,
size_t *param_value_size_ret) {
3322 (void)input_value_size;
3326 switch (param_name) {
3331 hipDeviceGetAttribute(&warpSize, hipDeviceAttributeWarpSize,
3332 device->get()) == hipSuccess);
3333 return getInfo(param_value_size, param_value, param_value_size_ret,
3334 static_cast<uint32_t
>(warpSize));
3338 int max_threads = 0;
3340 hipFuncGetAttribute(&max_threads,
3341 HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
3342 kernel->get()) == hipSuccess);
3345 0,
nullptr,
sizeof(uint32_t), &warpSize,
3347 int maxWarps = (max_threads + warpSize - 1) / warpSize;
3348 return getInfo(param_value_size, param_value, param_value_size_ret,
3349 static_cast<uint32_t
>(maxWarps));
3354 return getInfo(param_value_size, param_value, param_value_size_ret, 0);
3361 return getInfo(param_value_size, param_value, param_value_size_ret, 0);
3367 return PI_ERROR_INVALID_KERNEL;
3371 assert(
kernel !=
nullptr);
3372 assert(
kernel->get_reference_count() > 0u);
3374 kernel->increment_reference_count();
3379 assert(
kernel !=
nullptr);
3383 assert(
kernel->get_reference_count() != 0 &&
3384 "Reference count overflow detected in hip_piKernelRelease.");
3387 if (
kernel->decrement_reference_count() == 0) {
3399 size_t param_value_size,
3400 const void *param_value) {
3403 (void)param_value_size;
3410 size_t,
const void *) {
3414 "Native specialization constants are not supported");
3419 size_t arg_size,
const void *arg_value) {
3420 kernel->set_kernel_arg(arg_index, arg_size, arg_value);
3435 size_t param_value_size,
void *param_value,
3436 size_t *param_value_size_ret) {
3437 assert(
event !=
nullptr);
3439 switch (param_name) {
3441 return getInfo(param_value_size, param_value, param_value_size_ret,
3442 event->get_queue());
3444 return getInfo(param_value_size, param_value, param_value_size_ret,
3445 event->get_command_type());
3447 return getInfo(param_value_size, param_value, param_value_size_ret,
3448 event->get_reference_count());
3450 return getInfo(param_value_size, param_value, param_value_size_ret,
3454 return getInfo(param_value_size, param_value, param_value_size_ret,
3455 event->get_context());
3460 return PI_ERROR_INVALID_EVENT;
3467 size_t param_value_size,
3469 size_t *param_value_size_ret) {
3471 assert(
event !=
nullptr);
3475 return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
3478 switch (param_name) {
3481 return getInfo<pi_uint64>(param_value_size, param_value,
3482 param_value_size_ret,
event->get_queued_time());
3484 return getInfo<pi_uint64>(param_value_size, param_value,
3485 param_value_size_ret,
event->get_start_time());
3487 return getInfo<pi_uint64>(param_value_size, param_value,
3488 param_value_size_ret,
event->get_end_time());
3497 pi_int32 command_exec_callback_type,
3500 (void)command_exec_callback_type;
3510 (void)execution_status;
3513 return PI_ERROR_INVALID_VALUE;
3517 assert(
event !=
nullptr);
3519 const auto refCount =
event->increment_reference_count();
3522 refCount != 0,
"Reference count overflow detected in hip_piEventRetain.");
3528 assert(
event !=
nullptr);
3533 event->get_reference_count() != 0,
3534 "Reference count overflow detected in hip_piEventRelease.");
3537 if (
event->decrement_reference_count() == 0) {
3538 std::unique_ptr<_pi_event> event_ptr{
event};
3539 pi_result result = PI_ERROR_INVALID_EVENT;
3541 ScopedContext active(
event->get_context());
3542 result =
event->release();
3544 result = PI_ERROR_OUT_OF_RESOURCES;
3564 command_queue, num_events_in_wait_list, event_wait_list,
event);
3576 if (!command_queue) {
3577 return PI_ERROR_INVALID_QUEUE;
3581 ScopedContext active(command_queue->
get_context());
3583 if (event_wait_list) {
3585 forLatestEvents(event_wait_list, num_events_in_wait_list,
3590 if (result != PI_SUCCESS) {
3605 return PI_ERROR_UNKNOWN;
3631 bool ownNativeHandle,
3635 (void)ownNativeHandle;
3639 "Creation of PI event from native handle not implemented");
3657 bool propSeen[3] = {
false,
false,
false};
3658 for (
size_t i = 0; sampler_properties[i] != 0; i += 2) {
3659 switch (sampler_properties[i]) {
3662 return PI_ERROR_INVALID_VALUE;
3665 retImplSampl->props_ |= sampler_properties[i + 1];
3669 return PI_ERROR_INVALID_VALUE;
3672 retImplSampl->props_ |=
3677 return PI_ERROR_INVALID_VALUE;
3680 retImplSampl->props_ |=
3684 return PI_ERROR_INVALID_VALUE;
3689 retImplSampl->props_ |=
PI_TRUE;
3693 retImplSampl->props_ |=
3698 *result_sampler = retImplSampl.release();
3712 size_t param_value_size,
void *param_value,
3713 size_t *param_value_size_ret) {
3714 assert(sampler !=
nullptr);
3716 switch (param_name) {
3718 return getInfo(param_value_size, param_value, param_value_size_ret,
3721 return getInfo(param_value_size, param_value, param_value_size_ret,
3725 return getInfo(param_value_size, param_value, param_value_size_ret,
3731 return getInfo(param_value_size, param_value, param_value_size_ret,
3738 return getInfo(param_value_size, param_value, param_value_size_ret,
3753 assert(sampler !=
nullptr);
3765 assert(sampler !=
nullptr);
3771 "Reference count overflow detected in hip_piSamplerRelease.");
3789 size_t src_row_pitch,
size_t src_slice_pitch,
void *dst_ptr,
3791 size_t dst_row_pitch,
size_t dst_slice_pitch) {
3793 assert(region !=
nullptr);
3794 assert(src_offset !=
nullptr);
3795 assert(dst_offset !=
nullptr);
3797 assert(src_type == hipMemoryTypeDevice || src_type == hipMemoryTypeHost);
3798 assert(dst_type == hipMemoryTypeDevice || dst_type == hipMemoryTypeHost);
3800 src_row_pitch = (!src_row_pitch) ? region->
width_bytes : src_row_pitch;
3801 src_slice_pitch = (!src_slice_pitch) ? (region->
height_scalar * src_row_pitch)
3803 dst_row_pitch = (!dst_row_pitch) ? region->
width_bytes : dst_row_pitch;
3804 dst_slice_pitch = (!dst_slice_pitch) ? (region->
height_scalar * dst_row_pitch)
3807 HIP_MEMCPY3D params;
3813 params.srcMemoryType = src_type;
3814 params.srcDevice = src_type == hipMemoryTypeDevice
3815 ? *
static_cast<const hipDeviceptr_t *
>(src_ptr)
3817 params.srcHost = src_type == hipMemoryTypeHost ? src_ptr :
nullptr;
3818 params.srcXInBytes = src_offset->
x_bytes;
3819 params.srcY = src_offset->
y_scalar;
3820 params.srcZ = src_offset->
z_scalar;
3821 params.srcPitch = src_row_pitch;
3822 params.srcHeight = src_slice_pitch / src_row_pitch;
3824 params.dstMemoryType = dst_type;
3825 params.dstDevice = dst_type == hipMemoryTypeDevice
3826 ? *
reinterpret_cast<hipDeviceptr_t *
>(dst_ptr)
3828 params.dstHost = dst_type == hipMemoryTypeHost ? dst_ptr :
nullptr;
3829 params.dstXInBytes = dst_offset->
x_bytes;
3830 params.dstY = dst_offset->
y_scalar;
3831 params.dstZ = dst_offset->
z_scalar;
3832 params.dstPitch = dst_row_pitch;
3833 params.dstHeight = dst_slice_pitch / dst_row_pitch;
3835 return PI_CHECK_ERROR(hipDrvMemcpy3DAsync(¶ms, hip_stream));
3844 size_t buffer_slice_pitch,
size_t host_row_pitch,
size_t host_slice_pitch,
3845 void *ptr,
pi_uint32 num_events_in_wait_list,
3848 assert(buffer !=
nullptr);
3849 assert(command_queue !=
nullptr);
3852 hipStream_t hipStream = command_queue->
get();
3854 std::unique_ptr<_pi_event> retImplEv{
nullptr};
3857 ScopedContext active(command_queue->
get_context());
3860 event_wait_list,
nullptr);
3869 hipStream, region, &devPtr, hipMemoryTypeDevice, buffer_offset,
3870 buffer_row_pitch, buffer_slice_pitch, ptr, hipMemoryTypeHost,
3871 host_offset, host_row_pitch, host_slice_pitch);
3874 retErr = retImplEv->record();
3877 if (blocking_read) {
3878 retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
3882 *
event = retImplEv.release();
3895 size_t buffer_slice_pitch,
size_t host_row_pitch,
size_t host_slice_pitch,
3896 const void *ptr,
pi_uint32 num_events_in_wait_list,
3899 assert(buffer !=
nullptr);
3900 assert(command_queue !=
nullptr);
3903 hipStream_t hipStream = command_queue->
get();
3905 std::unique_ptr<_pi_event> retImplEv{
nullptr};
3908 ScopedContext active(command_queue->
get_context());
3911 event_wait_list,
nullptr);
3920 hipStream, region, ptr, hipMemoryTypeHost, host_offset, host_row_pitch,
3921 host_slice_pitch, &devPtr, hipMemoryTypeDevice, buffer_offset,
3922 buffer_row_pitch, buffer_slice_pitch);
3925 retErr = retImplEv->record();
3928 if (blocking_write) {
3929 retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
3933 *
event = retImplEv.release();
3943 pi_mem dst_buffer,
size_t src_offset,
3944 size_t dst_offset,
size_t size,
3948 if (!command_queue) {
3949 return PI_ERROR_INVALID_QUEUE;
3952 std::unique_ptr<_pi_event> retImplEv{
nullptr};
3955 ScopedContext active(command_queue->
get_context());
3957 if (event_wait_list) {
3959 event_wait_list,
nullptr);
3967 result = retImplEv->start();
3970 auto stream = command_queue->
get();
3974 result = PI_CHECK_ERROR(hipMemcpyDtoDAsync(dst, src, size, stream));
3977 result = retImplEv->record();
3978 *
event = retImplEv.release();
3985 return PI_ERROR_UNKNOWN;
3993 size_t dst_row_pitch,
size_t dst_slice_pitch,
3997 assert(src_buffer !=
nullptr);
3998 assert(dst_buffer !=
nullptr);
3999 assert(command_queue !=
nullptr);
4002 hipStream_t hipStream = command_queue->
get();
4005 std::unique_ptr<_pi_event> retImplEv{
nullptr};
4008 ScopedContext active(command_queue->
get_context());
4011 event_wait_list,
nullptr);
4020 hipStream, region, &srcPtr, hipMemoryTypeDevice, src_origin,
4021 src_row_pitch, src_slice_pitch, &dstPtr, hipMemoryTypeDevice,
4022 dst_origin, dst_row_pitch, dst_slice_pitch);
4025 retImplEv->record();
4026 *
event = retImplEv.release();
4036 const void *pattern,
size_t pattern_size,
4037 size_t offset,
size_t size,
4041 assert(command_queue !=
nullptr);
4043 auto args_are_multiples_of_pattern_size =
4044 (offset % pattern_size == 0) || (size % pattern_size == 0);
4046 auto pattern_is_valid = (pattern !=
nullptr);
4048 auto pattern_size_is_valid =
4049 ((pattern_size & (pattern_size - 1)) == 0) &&
4050 (pattern_size > 0) && (pattern_size <= 128);
4052 assert(args_are_multiples_of_pattern_size && pattern_is_valid &&
4053 pattern_size_is_valid);
4054 (void)args_are_multiples_of_pattern_size;
4055 (void)pattern_is_valid;
4056 (void)pattern_size_is_valid;
4058 std::unique_ptr<_pi_event> retImplEv{
nullptr};
4061 ScopedContext active(command_queue->
get_context());
4063 if (event_wait_list) {
4065 event_wait_list,
nullptr);
4073 result = retImplEv->start();
4077 auto stream = command_queue->
get();
4078 auto N = size / pattern_size;
4081 switch (pattern_size) {
4083 auto value = *
static_cast<const uint8_t *
>(pattern);
4084 result = PI_CHECK_ERROR(hipMemsetD8Async(dstDevice, value, N, stream));
4088 auto value = *
static_cast<const uint16_t *
>(pattern);
4089 result = PI_CHECK_ERROR(hipMemsetD16Async(dstDevice, value, N, stream));
4093 auto value = *
static_cast<const uint32_t *
>(pattern);
4094 result = PI_CHECK_ERROR(hipMemsetD32Async(dstDevice, value, N, stream));
4110 auto number_of_steps = pattern_size /
sizeof(uint8_t);
4111 auto pitch = number_of_steps *
sizeof(uint8_t);
4112 auto height = size / number_of_steps;
4113 auto count_32 = size /
sizeof(uint32_t);
4116 auto value = *(
static_cast<const uint32_t *
>(pattern));
4118 PI_CHECK_ERROR(hipMemsetD32Async(dstDevice, value, count_32, stream));
4121 value = *(
static_cast<const uint8_t *
>(pattern) +
step);
4124 auto offset_ptr =
reinterpret_cast<void *
>(
4125 reinterpret_cast<uint8_t *
>(dstDevice) + (
step *
sizeof(uint8_t)));
4128 result = PI_CHECK_ERROR(hipMemset2DAsync(
4129 offset_ptr, pitch, value,
sizeof(uint8_t), height, stream));
4136 result = retImplEv->record();
4137 *
event = retImplEv.release();
4144 return PI_ERROR_UNKNOWN;
4149 switch (array_format) {
4150 case HIP_AD_FORMAT_UNSIGNED_INT8:
4151 case HIP_AD_FORMAT_SIGNED_INT8:
4153 case HIP_AD_FORMAT_UNSIGNED_INT16:
4154 case HIP_AD_FORMAT_SIGNED_INT16:
4155 case HIP_AD_FORMAT_HALF:
4157 case HIP_AD_FORMAT_UNSIGNED_INT32:
4158 case HIP_AD_FORMAT_SIGNED_INT32:
4159 case HIP_AD_FORMAT_FLOAT:
4175 hipStream_t hip_stream,
pi_mem_type img_type,
const size_t *region,
4176 const void *src_ptr,
const hipMemoryType src_type,
const size_t *src_offset,
4177 void *dst_ptr,
const hipMemoryType dst_type,
const size_t *dst_offset) {
4178 assert(region !=
nullptr);
4180 assert(src_type == hipMemoryTypeArray || src_type == hipMemoryTypeHost);
4181 assert(dst_type == hipMemoryTypeArray || dst_type == hipMemoryTypeHost);
4184 hip_Memcpy2D cpyDesc;
4185 memset(&cpyDesc, 0,
sizeof(cpyDesc));
4186 cpyDesc.srcMemoryType = src_type;
4187 if (src_type == hipMemoryTypeArray) {
4189 reinterpret_cast<hipCUarray
>(
const_cast<void *
>(src_ptr));
4190 cpyDesc.srcXInBytes = src_offset[0];
4191 cpyDesc.srcY = src_offset[1];
4193 cpyDesc.srcHost = src_ptr;
4195 cpyDesc.dstMemoryType = dst_type;
4196 if (dst_type == hipMemoryTypeArray) {
4198 reinterpret_cast<hipCUarray
>(
const_cast<void *
>(dst_ptr));
4199 cpyDesc.dstXInBytes = dst_offset[0];
4200 cpyDesc.dstY = dst_offset[1];
4202 cpyDesc.dstHost = dst_ptr;
4204 cpyDesc.WidthInBytes = region[0];
4205 cpyDesc.Height = region[1];
4206 return PI_CHECK_ERROR(hipMemcpyParam2DAsync(&cpyDesc, hip_stream));
4211 HIP_MEMCPY3D cpyDesc;
4212 memset(&cpyDesc, 0,
sizeof(cpyDesc));
4213 cpyDesc.srcMemoryType = src_type;
4214 if (src_type == hipMemoryTypeArray) {
4216 reinterpret_cast<hipCUarray
>(
const_cast<void *
>(src_ptr));
4217 cpyDesc.srcXInBytes = src_offset[0];
4218 cpyDesc.srcY = src_offset[1];
4219 cpyDesc.srcZ = src_offset[2];
4221 cpyDesc.srcHost = src_ptr;
4223 cpyDesc.dstMemoryType = dst_type;
4224 if (dst_type == hipMemoryTypeArray) {
4225 cpyDesc.dstArray =
reinterpret_cast<hipCUarray
>(dst_ptr);
4226 cpyDesc.dstXInBytes = dst_offset[0];
4227 cpyDesc.dstY = dst_offset[1];
4228 cpyDesc.dstZ = dst_offset[2];
4230 cpyDesc.dstHost = dst_ptr;
4232 cpyDesc.WidthInBytes = region[0];
4233 cpyDesc.Height = region[1];
4234 cpyDesc.Depth = region[2];
4235 return PI_CHECK_ERROR(hipDrvMemcpy3DAsync(&cpyDesc, hip_stream));
4236 return PI_ERROR_UNKNOWN;
4239 return PI_ERROR_INVALID_VALUE;
4243 pi_bool blocking_read,
const size_t *origin,
4244 const size_t *region,
size_t row_pitch,
4245 size_t slice_pitch,
void *ptr,
4252 assert(command_queue !=
nullptr);
4253 assert(image !=
nullptr);
4257 hipStream_t hipStream = command_queue->
get();
4260 ScopedContext active(command_queue->
get_context());
4262 if (event_wait_list) {
4264 event_wait_list,
nullptr);
4267 hipArray *array = image->mem_.surface_mem_.get_array();
4269 hipArray_Format Format;
4271 getArrayDesc(array, Format, NumChannels);
4275 size_t byteOffsetX = origin[0] * elementByteSize * NumChannels;
4276 size_t bytesToCopy = elementByteSize * NumChannels * region[0];
4278 pi_mem_type imgType = image->mem_.surface_mem_.get_image_type();
4280 size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
4281 size_t srcOffset[3] = {byteOffsetX, origin[1], origin[2]};
4284 array, hipMemoryTypeArray, srcOffset,
4285 ptr, hipMemoryTypeHost,
nullptr);
4287 if (retErr != PI_SUCCESS) {
4294 new_event->record();
4298 if (blocking_read) {
4299 retErr = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
4304 return PI_ERROR_UNKNOWN;
4312 const size_t *origin,
const size_t *region,
4313 size_t input_row_pitch,
4314 size_t input_slice_pitch,
const void *ptr,
4318 (void)blocking_write;
4319 (void)input_row_pitch;
4320 (void)input_slice_pitch;
4321 assert(command_queue !=
nullptr);
4322 assert(image !=
nullptr);
4326 hipStream_t hipStream = command_queue->
get();
4329 ScopedContext active(command_queue->
get_context());
4331 if (event_wait_list) {
4333 event_wait_list,
nullptr);
4336 hipArray *array = image->mem_.surface_mem_.get_array();
4338 hipArray_Format Format;
4340 getArrayDesc(array, Format, NumChannels);
4344 size_t byteOffsetX = origin[0] * elementByteSize * NumChannels;
4345 size_t bytesToCopy = elementByteSize * NumChannels * region[0];
4347 pi_mem_type imgType = image->mem_.surface_mem_.get_image_type();
4349 size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
4350 size_t dstOffset[3] = {byteOffsetX, origin[1], origin[2]};
4353 ptr, hipMemoryTypeHost,
nullptr, array,
4354 hipMemoryTypeArray, dstOffset);
4356 if (retErr != PI_SUCCESS) {
4363 new_event->record();
4369 return PI_ERROR_UNKNOWN;
4378 pi_mem dst_image,
const size_t *src_origin,
4379 const size_t *dst_origin,
4380 const size_t *region,
4391 hipStream_t hipStream = command_queue->
get();
4394 ScopedContext active(command_queue->
get_context());
4396 if (event_wait_list) {
4398 event_wait_list,
nullptr);
4402 hipArray_Format srcFormat;
4403 size_t srcNumChannels;
4404 getArrayDesc(srcArray, srcFormat, srcNumChannels);
4407 hipArray_Format dstFormat;
4408 size_t dstNumChannels;
4409 getArrayDesc(dstArray, dstFormat, dstNumChannels);
4411 assert(srcFormat == dstFormat);
4412 assert(srcNumChannels == dstNumChannels);
4416 size_t dstByteOffsetX = dst_origin[0] * elementByteSize * srcNumChannels;
4417 size_t srcByteOffsetX = src_origin[0] * elementByteSize * dstNumChannels;
4418 size_t bytesToCopy = elementByteSize * srcNumChannels * region[0];
4422 size_t adjustedRegion[3] = {bytesToCopy, region[1], region[2]};
4423 size_t srcOffset[3] = {srcByteOffsetX, src_origin[1], src_origin[2]};
4424 size_t dstOffset[3] = {dstByteOffsetX, dst_origin[1], dst_origin[2]};
4427 hipStream, imgType, adjustedRegion, srcArray, hipMemoryTypeArray,
4428 srcOffset, dstArray, hipMemoryTypeArray, dstOffset);
4430 if (retErr != PI_SUCCESS) {
4437 new_event->record();
4443 return PI_ERROR_UNKNOWN;
4452 const void *fill_color,
4453 const size_t *origin,
const size_t *region,
4457 (void)command_queue;
4462 (void)num_events_in_wait_list;
4463 (void)event_wait_list;
4482 assert(ret_map !=
nullptr);
4483 assert(command_queue !=
nullptr);
4484 assert(buffer !=
nullptr);
4487 pi_result ret_err = PI_ERROR_INVALID_OPERATION;
4500 ret_err = PI_SUCCESS;
4506 command_queue, buffer, blocking_map, offset, size, hostPtr,
4507 num_events_in_wait_list, event_wait_list,
event);
4509 ScopedContext active(command_queue->
get_context());
4513 event_wait_list,
nullptr);
4542 assert(command_queue !=
nullptr);
4543 assert(mapped_ptr !=
nullptr);
4544 assert(memobj !=
nullptr);
4558 command_queue, memobj,
true,
4561 num_events_in_wait_list, event_wait_list,
event);
4563 ScopedContext active(command_queue->
get_context());
4567 event_wait_list,
nullptr);
4591 assert(result_ptr !=
nullptr);
4593 assert(properties ==
nullptr || *properties == 0);
4596 ScopedContext active(
context);
4597 result = PI_CHECK_ERROR(hipHostMalloc(result_ptr, size));
4602 assert(alignment == 0 ||
4603 (result == PI_SUCCESS &&
4604 reinterpret_cast<std::uintptr_t
>(*result_ptr) % alignment == 0));
4614 assert(result_ptr !=
nullptr);
4616 assert(
device !=
nullptr);
4617 assert(properties ==
nullptr || *properties == 0);
4620 ScopedContext active(
context);
4621 result = PI_CHECK_ERROR(hipMalloc(result_ptr, size));
4626 assert(alignment == 0 ||
4627 (result == PI_SUCCESS &&
4628 reinterpret_cast<std::uintptr_t
>(*result_ptr) % alignment == 0));
4638 assert(result_ptr !=
nullptr);
4640 assert(
device !=
nullptr);
4641 assert(properties ==
nullptr || *properties == 0);
4644 ScopedContext active(
context);
4646 PI_CHECK_ERROR(hipMallocManaged(result_ptr, size, hipMemAttachGlobal));
4651 assert(alignment == 0 ||
4652 (result == PI_SUCCESS &&
4653 reinterpret_cast<std::uintptr_t
>(*result_ptr) % alignment == 0));
4664 ScopedContext active(
context);
4666 hipPointerAttribute_t hipPointerAttributeType;
4668 PI_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, ptr));
4669 type = hipPointerAttributeType.memoryType;
4670 assert(type == hipMemoryTypeDevice or type == hipMemoryTypeHost);
4671 if (type == hipMemoryTypeDevice) {
4672 result = PI_CHECK_ERROR(hipFree(ptr));
4674 if (type == hipMemoryTypeHost) {
4675 result = PI_CHECK_ERROR(hipFreeHost(ptr));
4689 assert(
queue !=
nullptr);
4690 assert(ptr !=
nullptr);
4691 hipStream_t hipStream =
queue->get();
4693 std::unique_ptr<_pi_event> event_ptr{
nullptr};
4696 ScopedContext active(
queue->get_context());
4698 events_waitlist,
nullptr);
4700 event_ptr = std::unique_ptr<_pi_event>(
4704 result = PI_CHECK_ERROR(
4705 hipMemsetD8Async(
reinterpret_cast<hipDeviceptr_t
>(ptr),
4706 (
unsigned char)value & 0xFF, count, hipStream));
4708 result = event_ptr->record();
4709 *
event = event_ptr.release();
4719 void *dst_ptr,
const void *src_ptr,
4725 assert(
queue !=
nullptr);
4726 assert(dst_ptr !=
nullptr);
4727 assert(src_ptr !=
nullptr);
4728 hipStream_t hipStream =
queue->get();
4730 std::unique_ptr<_pi_event> event_ptr{
nullptr};
4733 ScopedContext active(
queue->get_context());
4735 events_waitlist,
nullptr);
4737 event_ptr = std::unique_ptr<_pi_event>(
4741 result = PI_CHECK_ERROR(
4742 hipMemcpyAsync(dst_ptr, src_ptr, size, hipMemcpyDefault, hipStream));
4744 result = event_ptr->record();
4747 result = PI_CHECK_ERROR(hipStreamSynchronize(hipStream));
4750 *
event = event_ptr.release();
4767 return PI_ERROR_INVALID_VALUE;
4768 assert(
queue !=
nullptr);
4769 assert(ptr !=
nullptr);
4770 hipStream_t hipStream =
queue->get();
4772 std::unique_ptr<_pi_event> event_ptr{
nullptr};
4775 ScopedContext active(
queue->get_context());
4777 events_waitlist,
nullptr);
4779 event_ptr = std::unique_ptr<_pi_event>(
4783 result = PI_CHECK_ERROR(hipMemPrefetchAsync(
4784 ptr, size,
queue->get_context()->get_device()->get(), hipStream));
4786 result = event_ptr->record();
4787 *
event = event_ptr.release();
4803 assert(
queue !=
nullptr);
4804 assert(ptr !=
nullptr);
4830 size_t param_value_size,
4832 size_t *param_value_size_ret) {
4835 assert(ptr !=
nullptr);
4837 hipPointerAttribute_t hipPointerAttributeType;
4840 ScopedContext active(
context);
4841 switch (param_name) {
4845 hipError_t ret = hipPointerGetAttributes(&hipPointerAttributeType, ptr);
4846 if (ret == hipErrorInvalidValue) {
4848 return getInfo(param_value_size, param_value, param_value_size_ret,
4851 result = check_error(ret, __func__, __LINE__ - 5, __FILE__);
4852 value = hipPointerAttributeType.isManaged;
4855 return getInfo(param_value_size, param_value, param_value_size_ret,
4858 result = PI_CHECK_ERROR(
4859 hipPointerGetAttributes(&hipPointerAttributeType, ptr));
4860 value = hipPointerAttributeType.memoryType;
4861 assert(value == hipMemoryTypeDevice or value == hipMemoryTypeHost);
4862 if (value == hipMemoryTypeDevice) {
4864 return getInfo(param_value_size, param_value, param_value_size_ret,
4867 if (value == hipMemoryTypeHost) {
4869 return getInfo(param_value_size, param_value, param_value_size_ret,
4873 __builtin_unreachable();
4874 return getInfo(param_value_size, param_value, param_value_size_ret,
4878 return PI_ERROR_INVALID_VALUE;
4881 return PI_ERROR_INVALID_VALUE;
4886 result = PI_CHECK_ERROR(
4887 hipPointerGetAttributes(&hipPointerAttributeType, ptr));
4888 int device_idx = hipPointerAttributeType.device;
4892 std::vector<pi_platform> platforms;
4893 platforms.resize(device_idx + 1);
4898 return getInfo(param_value_size, param_value, param_value_size_ret,
4913 (void)PluginParameter;
4924 size_t PluginVersionSize =
sizeof(PluginInit->
PluginVersion);
4926 return PI_ERROR_INVALID_VALUE;
4935 #define _PI_CL(pi_api, hip_api) \
4936 (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&hip_api);