22 #include <sycl/feature_test.hpp>
31 inline namespace _V1 {
35 std::vector<info::fp_config> result;
55 inline std::vector<info::partition_affinity_domain>
57 std::vector<info::partition_affinity_domain> result;
73 inline std::vector<info::execution_capability>
75 std::vector<info::execution_capability> result;
85 switch (AffinityDomain) {
86 #define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \
91 sycl::info::partition_affinity_domain::numa)
93 sycl::info::partition_affinity_domain::L4_cache)
95 sycl::info::partition_affinity_domain::L3_cache)
97 sycl::info::partition_affinity_domain::L2_cache)
99 sycl::info::partition_affinity_domain::L1_cache)
101 sycl::info::partition_affinity_domain::next_partitionable)
102 #undef __SYCL_AFFINITY_DOMAIN_STRING_CASE
104 assert(
false &&
"Missing case for affinity domain.");
127 using type = info::device::native_vector_width_half;
131 using type = info::device::native_vector_width_double;
144 return ReturnT(result);
158 return createSyclObjFromImpl<platform>(
167 size_t resultSize = 0;
169 nullptr, &resultSize);
170 if (resultSize == 0) {
173 std::unique_ptr<char[]> result(
new char[resultSize]);
175 getHandleRef(), InfoCode, resultSize, result.get(),
nullptr);
188 template <
typename ReturnT>
194 template <
typename Param>
203 cl_device_fp_config result;
214 return Dev->get_device_info_string(
222 info::device::single_fp_config> {
227 sizeof(result), &result,
nullptr);
240 sizeof(Properties), &Properties,
nullptr);
248 info::device::atomic_memory_order_capabilities> {
262 info::device::atomic_fence_order_capabilities> {
276 info::device::atomic_memory_scope_capabilities> {
290 info::device::atomic_fence_scope_capabilities> {
304 info::device::ext_oneapi_bfloat16_math_functions> {
312 sizeof(result), &result,
nullptr);
313 if (Err != PI_SUCCESS) {
323 info::device::execution_capabilities> {
337 info::device::built_in_kernel_ids> {
343 std::vector<kernel_id> ids;
344 ids.reserve(names.size());
345 for (
const auto &name : names) {
355 info::device::built_in_kernels> {
366 info::device::extensions> {
389 info::device::partition_properties> {
392 const auto &Plugin = Dev->getPlugin();
396 Dev->getHandleRef(), info_partition, 0,
nullptr, &resultSize);
398 size_t arrayLength = resultSize /
sizeof(cl_device_partition_property);
399 if (arrayLength == 0) {
402 std::unique_ptr<cl_device_partition_property[]> arrayResult(
403 new cl_device_partition_property[arrayLength]);
405 info_partition, resultSize,
406 arrayResult.get(),
nullptr);
408 std::vector<info::partition_property> result;
409 for (
size_t i = 0; i < arrayLength; ++i) {
415 result.push_back(pp);
424 info::device::partition_affinity_domains> {
425 static std::vector<info::partition_affinity_domain>
431 sizeof(result), &result,
nullptr);
440 info::device::partition_type_affinity_domain> {
446 nullptr, &resultSize);
447 if (resultSize != 1) {
450 cl_device_partition_property result;
454 sizeof(result), &result,
nullptr);
470 info::device::partition_type_property> {
479 size_t arrayLength = resultSize /
sizeof(cl_device_partition_property);
481 std::unique_ptr<cl_device_partition_property[]> arrayResult(
482 new cl_device_partition_property[arrayLength]);
485 arrayResult.get(),
nullptr);
494 info::device::sub_group_sizes> {
496 size_t resultSize = 0;
499 0,
nullptr, &resultSize);
501 std::vector<size_t> result(resultSize /
sizeof(
size_t));
504 resultSize, result.data(),
nullptr);
520 if (platform_name ==
"Intel(R) FPGA Emulation Platform for OpenCL(TM)" ||
521 platform_name ==
"Intel(R) FPGA SDK for OpenCL(TM)")
532 template <
int Dimensions>
540 return {values[1], values[0]};
543 return {values[2], values[1], values[0]};
547 template <
int Dimensions>
549 info::device::max_work_item_sizes<Dimensions>> {
555 sizeof(result), &result,
nullptr);
556 return construct_range<Dimensions>(result);
565 {
"5.0", oneapi_exp_arch::nvidia_gpu_sm_50},
566 {
"5.2", oneapi_exp_arch::nvidia_gpu_sm_52},
567 {
"5.3", oneapi_exp_arch::nvidia_gpu_sm_53},
568 {
"6.0", oneapi_exp_arch::nvidia_gpu_sm_60},
569 {
"6.1", oneapi_exp_arch::nvidia_gpu_sm_61},
570 {
"6.2", oneapi_exp_arch::nvidia_gpu_sm_62},
571 {
"7.0", oneapi_exp_arch::nvidia_gpu_sm_70},
572 {
"7.2", oneapi_exp_arch::nvidia_gpu_sm_72},
573 {
"7.5", oneapi_exp_arch::nvidia_gpu_sm_75},
574 {
"8.0", oneapi_exp_arch::nvidia_gpu_sm_80},
575 {
"8.6", oneapi_exp_arch::nvidia_gpu_sm_86},
576 {
"8.7", oneapi_exp_arch::nvidia_gpu_sm_87},
577 {
"8.9", oneapi_exp_arch::nvidia_gpu_sm_89},
578 {
"9.0", oneapi_exp_arch::nvidia_gpu_sm_90},
579 {
"gfx701", oneapi_exp_arch::amd_gpu_gfx701},
580 {
"gfx702", oneapi_exp_arch::amd_gpu_gfx702},
581 {
"gfx801", oneapi_exp_arch::amd_gpu_gfx801},
582 {
"gfx802", oneapi_exp_arch::amd_gpu_gfx802},
583 {
"gfx803", oneapi_exp_arch::amd_gpu_gfx803},
584 {
"gfx805", oneapi_exp_arch::amd_gpu_gfx805},
585 {
"gfx810", oneapi_exp_arch::amd_gpu_gfx810},
586 {
"gfx900", oneapi_exp_arch::amd_gpu_gfx900},
587 {
"gfx902", oneapi_exp_arch::amd_gpu_gfx902},
588 {
"gfx904", oneapi_exp_arch::amd_gpu_gfx904},
589 {
"gfx906", oneapi_exp_arch::amd_gpu_gfx906},
590 {
"gfx908", oneapi_exp_arch::amd_gpu_gfx908},
591 {
"gfx909", oneapi_exp_arch::amd_gpu_gfx909},
592 {
"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a},
593 {
"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c},
594 {
"gfx940", oneapi_exp_arch::amd_gpu_gfx940},
595 {
"gfx941", oneapi_exp_arch::amd_gpu_gfx941},
596 {
"gfx942", oneapi_exp_arch::amd_gpu_gfx942},
597 {
"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010},
598 {
"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011},
599 {
"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012},
600 {
"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013},
601 {
"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030},
602 {
"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031},
603 {
"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032},
604 {
"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033},
605 {
"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034},
606 {
"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035},
607 {
"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036},
608 {
"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100},
609 {
"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101},
610 {
"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102},
611 {
"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103},
612 {
"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150},
613 {
"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151},
614 {
"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200},
615 {
"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201},
620 {0x02000000, oneapi_exp_arch::intel_gpu_bdw},
621 {0x02400009, oneapi_exp_arch::intel_gpu_skl},
622 {0x02404009, oneapi_exp_arch::intel_gpu_kbl},
623 {0x02408009, oneapi_exp_arch::intel_gpu_cfl},
624 {0x0240c000, oneapi_exp_arch::intel_gpu_apl},
625 {0x02410000, oneapi_exp_arch::intel_gpu_glk},
626 {0x02414000, oneapi_exp_arch::intel_gpu_whl},
627 {0x02418000, oneapi_exp_arch::intel_gpu_aml},
628 {0x0241c000, oneapi_exp_arch::intel_gpu_cml},
629 {0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
630 {0x02c08000, oneapi_exp_arch::intel_gpu_ehl},
631 {0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
632 {0x03004000, oneapi_exp_arch::intel_gpu_rkl},
633 {0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
634 {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
635 {0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
636 {0x03028000, oneapi_exp_arch::intel_gpu_dg1},
637 {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10},
638 {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11},
639 {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12},
640 {0x030f0007, oneapi_exp_arch::intel_gpu_pvc},
641 {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg},
646 {8, oneapi_exp_arch::intel_cpu_spr},
647 {9, oneapi_exp_arch::intel_cpu_gnr},
655 backend CurrentBackend = Dev->getBackend();
658 auto MapArchIDToArchName = [](
const int arch) {
660 if (Item.first == arch)
665 "The current device architecture is not supported by "
666 "sycl_ext_oneapi_device_architecture.");
673 sizeof(DeviceIp), &DeviceIp,
nullptr);
674 return MapArchIDToArchName(DeviceIp);
677 auto MapArchIDToArchName = [](
const char *arch) {
684 "The current device architecture is not supported by "
685 "sycl_ext_oneapi_device_architecture.");
687 size_t ResultSize = 0;
690 nullptr, &ResultSize);
691 std::unique_ptr<char[]> DeviceArch(
new char[ResultSize]);
694 ResultSize, DeviceArch.get(),
nullptr);
697 DeviceArchCopy.substr(0, DeviceArchCopy.find(
":"));
698 return MapArchIDToArchName(DeviceArchSubstr.data());
700 auto MapArchIDToArchName = [](
const int arch) {
702 if (Item.first == arch)
705 return sycl::ext::oneapi::experimental::architecture::x86_64;
712 sizeof(DeviceIp), &DeviceIp,
nullptr);
713 return MapArchIDToArchName(DeviceIp);
720 else if (Dev->is_cpu())
722 else if (Dev->is_accelerator())
723 DeviceStr =
"accelerator";
725 std::stringstream ErrorMessage;
727 <<
"sycl_ext_oneapi_device_architecture feature is not supported on "
728 << DeviceStr <<
" device with sycl::backend::" << CurrentBackend
736 std::vector<ext::oneapi::experimental::matrix::combination>,
737 ext::oneapi::experimental::info::device::matrix_combinations> {
738 static std::vector<ext::oneapi::experimental::matrix::combination>
740 using namespace ext::oneapi::experimental::matrix;
741 using namespace ext::oneapi::experimental;
742 backend CurrentBackend = Dev->getBackend();
743 auto get_current_architecture = [&Dev]() -> std::optional<architecture> {
755 std::rethrow_exception(std::make_exception_ptr(e));
759 std::optional<architecture> DeviceArchOpt = get_current_architecture();
760 if (!DeviceArchOpt.has_value())
763 if (architecture::intel_cpu_spr == DeviceArch)
765 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
766 matrix_type::sint32, matrix_type::sint32},
767 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
768 matrix_type::sint32, matrix_type::sint32},
769 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
770 matrix_type::sint32, matrix_type::sint32},
771 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
772 matrix_type::sint32, matrix_type::sint32},
773 {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
774 matrix_type::fp32, matrix_type::fp32},
776 else if (architecture::intel_cpu_gnr == DeviceArch)
778 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
779 matrix_type::sint32, matrix_type::sint32},
780 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
781 matrix_type::sint32, matrix_type::sint32},
782 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
783 matrix_type::sint32, matrix_type::sint32},
784 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
785 matrix_type::sint32, matrix_type::sint32},
786 {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
787 matrix_type::fp32, matrix_type::fp32},
788 {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
789 matrix_type::fp32, matrix_type::fp32},
791 else if (architecture::intel_gpu_pvc == DeviceArch)
793 {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::uint8,
794 matrix_type::sint32, matrix_type::sint32},
795 {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::sint8,
796 matrix_type::sint32, matrix_type::sint32},
797 {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::uint8,
798 matrix_type::sint32, matrix_type::sint32},
799 {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::sint8,
800 matrix_type::sint32, matrix_type::sint32},
801 {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
802 matrix_type::fp32, matrix_type::fp32},
803 {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
804 matrix_type::fp32, matrix_type::fp32},
805 {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
806 matrix_type::fp32, matrix_type::fp32},
807 {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
808 matrix_type::fp32, matrix_type::fp32},
809 {8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
810 matrix_type::fp32, matrix_type::fp32},
812 else if ((architecture::intel_gpu_dg2_g10 == DeviceArch) ||
813 (architecture::intel_gpu_dg2_g11 == DeviceArch) ||
814 (architecture::intel_gpu_dg2_g12 == DeviceArch))
816 {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::uint8,
817 matrix_type::sint32, matrix_type::sint32},
818 {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::sint8,
819 matrix_type::sint32, matrix_type::sint32},
820 {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::uint8,
821 matrix_type::sint32, matrix_type::sint32},
822 {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::sint8,
823 matrix_type::sint32, matrix_type::sint32},
824 {8, 0, 0, 0, 8, 16, matrix_type::fp16, matrix_type::fp16,
825 matrix_type::fp32, matrix_type::fp32},
826 {8, 0, 0, 0, 8, 16, matrix_type::bf16, matrix_type::bf16,
827 matrix_type::fp32, matrix_type::fp32},
829 else if (architecture::amd_gpu_gfx90a == DeviceArch)
831 {0, 0, 0, 32, 32, 8, matrix_type::fp16, matrix_type::fp16,
832 matrix_type::fp32, matrix_type::fp32},
833 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
834 matrix_type::fp32, matrix_type::fp32},
835 {0, 0, 0, 32, 32, 8, matrix_type::sint8, matrix_type::sint8,
836 matrix_type::sint32, matrix_type::sint32},
837 {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
838 matrix_type::sint32, matrix_type::sint32},
839 {0, 0, 0, 32, 32, 8, matrix_type::bf16, matrix_type::bf16,
840 matrix_type::fp32, matrix_type::fp32},
841 {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
842 matrix_type::fp32, matrix_type::fp32},
843 {0, 0, 0, 16, 16, 4, matrix_type::fp64, matrix_type::fp64,
844 matrix_type::fp64, matrix_type::fp64},
850 constexpr std::pair<float, oneapi_exp_arch> NvidiaArchNumbs[] = {
851 {5.0, oneapi_exp_arch::nvidia_gpu_sm_50},
852 {5.2, oneapi_exp_arch::nvidia_gpu_sm_52},
853 {5.3, oneapi_exp_arch::nvidia_gpu_sm_53},
854 {6.0, oneapi_exp_arch::nvidia_gpu_sm_60},
855 {6.1, oneapi_exp_arch::nvidia_gpu_sm_61},
856 {6.2, oneapi_exp_arch::nvidia_gpu_sm_62},
857 {7.0, oneapi_exp_arch::nvidia_gpu_sm_70},
858 {7.2, oneapi_exp_arch::nvidia_gpu_sm_72},
859 {7.5, oneapi_exp_arch::nvidia_gpu_sm_75},
860 {8.0, oneapi_exp_arch::nvidia_gpu_sm_80},
861 {8.6, oneapi_exp_arch::nvidia_gpu_sm_86},
862 {8.7, oneapi_exp_arch::nvidia_gpu_sm_87},
863 {8.9, oneapi_exp_arch::nvidia_gpu_sm_89},
864 {9.0, oneapi_exp_arch::nvidia_gpu_sm_90},
867 for (
const auto &Item : NvidiaArchNumbs)
868 if (Item.second == arch)
872 float ComputeCapability = GetArchNum(DeviceArch);
873 std::vector<combination> sm_70_combinations = {
874 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
875 matrix_type::fp32, matrix_type::fp32},
876 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
877 matrix_type::fp32, matrix_type::fp32},
878 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
879 matrix_type::fp32, matrix_type::fp32},
880 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
881 matrix_type::fp16, matrix_type::fp16},
882 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
883 matrix_type::fp16, matrix_type::fp16},
884 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
885 matrix_type::fp16, matrix_type::fp16},
886 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
887 matrix_type::fp32, matrix_type::fp16},
888 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
889 matrix_type::fp32, matrix_type::fp16},
890 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
891 matrix_type::fp32, matrix_type::fp16},
892 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
893 matrix_type::fp16, matrix_type::fp32},
894 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
895 matrix_type::fp16, matrix_type::fp32},
896 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
897 matrix_type::fp16, matrix_type::fp32}};
898 std::vector<combination> sm_72_combinations = {
899 {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
900 matrix_type::sint32, matrix_type::sint32},
901 {0, 0, 0, 8, 32, 16, matrix_type::sint8, matrix_type::sint8,
902 matrix_type::sint32, matrix_type::sint32},
903 {0, 0, 0, 32, 8, 16, matrix_type::sint8, matrix_type::sint8,
904 matrix_type::sint32, matrix_type::sint32},
905 {0, 0, 0, 16, 16, 16, matrix_type::uint8, matrix_type::uint8,
906 matrix_type::sint32, matrix_type::sint32},
907 {0, 0, 0, 8, 32, 16, matrix_type::uint8, matrix_type::uint8,
908 matrix_type::sint32, matrix_type::sint32},
909 {0, 0, 0, 32, 8, 16, matrix_type::uint8, matrix_type::uint8,
910 matrix_type::sint32, matrix_type::sint32}};
911 std::vector<combination> sm_80_combinations = {
912 {0, 0, 0, 16, 16, 8, matrix_type::tf32, matrix_type::tf32,
913 matrix_type::fp32, matrix_type::fp32},
914 {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
915 matrix_type::fp32, matrix_type::fp32},
916 {0, 0, 0, 8, 32, 16, matrix_type::bf16, matrix_type::bf16,
917 matrix_type::fp32, matrix_type::fp32},
918 {0, 0, 0, 32, 8, 16, matrix_type::bf16, matrix_type::bf16,
919 matrix_type::fp32, matrix_type::fp32},
920 {0, 0, 0, 8, 8, 4, matrix_type::fp64, matrix_type::fp64,
921 matrix_type::fp64, matrix_type::fp64}};
922 if (ComputeCapability >= 8.0) {
923 sm_80_combinations.insert(sm_80_combinations.end(),
924 sm_72_combinations.begin(),
925 sm_72_combinations.end());
926 sm_80_combinations.insert(sm_80_combinations.end(),
927 sm_70_combinations.begin(),
928 sm_70_combinations.end());
929 return sm_80_combinations;
930 }
else if (ComputeCapability >= 7.2) {
931 sm_72_combinations.insert(sm_72_combinations.end(),
932 sm_70_combinations.begin(),
933 sm_70_combinations.end());
934 return sm_72_combinations;
935 }
else if (ComputeCapability >= 7.0)
936 return sm_70_combinations;
944 size_t, ext::oneapi::experimental::info::device::max_global_work_groups> {
951 id<1>, ext::oneapi::experimental::info::device::max_work_groups<1>> {
956 max_global_work_groups>
::get(Dev);
961 sizeof(result), &result,
nullptr);
962 return id<1>(std::min(Limit, result[0]));
968 id<2>, ext::oneapi::experimental::info::device::max_work_groups<2>> {
973 max_global_work_groups>
::get(Dev);
978 sizeof(result), &result,
nullptr);
979 return id<2>(std::min(Limit, result[1]), std::min(Limit, result[0]));
985 id<3>, ext::oneapi::experimental::info::device::max_work_groups<3>> {
990 max_global_work_groups>
::get(Dev);
995 sizeof(result), &result,
nullptr);
996 return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]),
997 std::min(Limit, result[0]));
1005 info::device::ext_oneapi_max_global_work_groups> {
1008 ext::oneapi::experimental::info::device::
1009 max_global_work_groups>
::get(Dev);
1017 info::device::ext_oneapi_max_work_groups_1d> {
1029 info::device::ext_oneapi_max_work_groups_2d> {
1041 info::device::ext_oneapi_max_work_groups_3d> {
1055 sizeof(result), &result,
nullptr);
1056 if (result ==
nullptr)
1057 throw invalid_object_error(
1058 "No parent for device because it is not a subdevice",
1059 PI_ERROR_INVALID_DEVICE);
1061 const auto &Platform = Dev->getPlatformImpl();
1062 return createSyclObjFromImpl<device>(
1063 Platform->getOrMakeDeviceImpl(result, Platform));
1083 Dev->getHandleRef(),
1087 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1097 Dev->getHandleRef(),
1101 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1111 Dev->getHandleRef(),
1114 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1121 info::device::usm_restricted_shared_allocations> {
1125 Dev->getHandleRef(),
1129 return (Err != PI_SUCCESS)
1141 Dev->getHandleRef(),
1144 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1151 bool, ext::codeplay::experimental::info::device::supports_fusion> {
1153 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
1159 return Dev->is_cpu() || Dev->is_gpu();
1176 ext::codeplay::experimental::info::device::max_registers_per_work_group> {
1178 uint32_t maxRegsPerWG;
1180 Dev->getHandleRef(),
1181 PiInfoCode<ext::codeplay::experimental::info::device::
1182 max_registers_per_work_group>::value,
1183 sizeof(maxRegsPerWG), &maxRegsPerWG,
nullptr);
1184 return maxRegsPerWG;
1191 std::vector<sycl::device>,
1192 ext::oneapi::experimental::info::device::component_devices> {
1196 size_t ResultSize = 0;
1199 Dev->getHandleRef(),
1201 ext::oneapi::experimental::info::device::component_devices>::value,
1202 0,
nullptr, &ResultSize);
1203 size_t DevCount = ResultSize /
sizeof(
pi_device);
1205 std::vector<pi_device> Devs(DevCount);
1207 Dev->getHandleRef(),
1209 ext::oneapi::experimental::info::device::component_devices>::value,
1210 ResultSize, Devs.data(),
nullptr);
1211 std::vector<sycl::device> Result;
1212 const auto &Platform = Dev->getPlatformImpl();
1213 for (
const auto &d : Devs)
1214 Result.push_back(createSyclObjFromImpl<device>(
1215 Platform->getOrMakeDeviceImpl(d, Platform)));
1222 sycl::
device, ext::oneapi::experimental::info::device::composite_device> {
1226 if (!Dev->has(sycl::aspect::ext_oneapi_is_component))
1228 "Only devices with aspect::ext_oneapi_is_component "
1229 "can call this function.");
1233 Dev->getHandleRef(),
1235 ext::oneapi::experimental::info::device::composite_device>::value,
1236 sizeof(Result), &Result,
nullptr);
1239 const auto &Platform = Dev->getPlatformImpl();
1240 return createSyclObjFromImpl<device>(
1241 Platform->getOrMakeDeviceImpl(Result, Platform));
1244 "A component with aspect::ext_oneapi_is_component "
1245 "must have a composite device.");
1249 template <
typename Param>
1252 "Invalid device information descriptor");
1253 if (std::is_same<Param,
1254 sycl::_V1::ext::intel::info::device::free_memory>::value) {
1255 if (!Dev->has(aspect::ext_intel_free_memory))
1256 throw invalid_object_error(
1257 "The device does not have the ext_intel_free_memory aspect",
1258 PI_ERROR_INVALID_DEVICE);
1267 template <
typename Param>
1271 inline std::vector<sycl::aspect> get_device_info_host<info::device::aspects>() {
1272 return std::vector<sycl::aspect>();
1277 get_device_info_host<ext::oneapi::experimental::info::device::architecture>() {
1286 template <>
inline uint32_t get_device_info_host<info::device::vendor_id>() {
1291 inline uint32_t get_device_info_host<info::device::max_compute_units>() {
1292 return std::thread::hardware_concurrency();
1296 inline uint32_t get_device_info_host<info::device::max_work_item_dimensions>() {
1301 inline range<1> get_device_info_host<info::device::max_work_item_sizes<1>>() {
1307 inline range<2> get_device_info_host<info::device::max_work_item_sizes<2>>() {
1313 inline range<3> get_device_info_host<info::device::max_work_item_sizes<3>>() {
1320 ext::oneapi::experimental::info::device::max_global_work_groups>() {
1330 ext::oneapi::experimental::info::device::max_global_work_groups>();
1336 ext::oneapi::experimental::info::device::max_work_groups<2>>() {
1339 ext::oneapi::experimental::info::device::max_global_work_groups>();
1340 return {Limit, Limit};
1345 ext::oneapi::experimental::info::device::max_work_groups<3>>() {
1348 ext::oneapi::experimental::info::device::max_global_work_groups>();
1349 return {Limit, Limit, Limit};
1355 inline constexpr
size_t
1356 get_device_info_host<info::device::ext_oneapi_max_global_work_groups>() {
1358 ext::oneapi::experimental::info::device::max_global_work_groups>();
1365 get_device_info_host<info::device::ext_oneapi_max_work_groups_1d>() {
1375 get_device_info_host<info::device::ext_oneapi_max_work_groups_2d>() {
1384 get_device_info_host<info::device::ext_oneapi_max_work_groups_3d>() {
1386 ext::oneapi::experimental::info::device::max_work_groups<3>>();
1390 inline size_t get_device_info_host<info::device::max_work_group_size>() {
1397 get_device_info_host<info::device::preferred_vector_width_char>() {
1404 get_device_info_host<info::device::preferred_vector_width_short>() {
1411 get_device_info_host<info::device::preferred_vector_width_int>() {
1418 get_device_info_host<info::device::preferred_vector_width_long>() {
1425 get_device_info_host<info::device::preferred_vector_width_float>() {
1432 get_device_info_host<info::device::preferred_vector_width_double>() {
1439 get_device_info_host<info::device::preferred_vector_width_half>() {
1445 inline uint32_t get_device_info_host<info::device::native_vector_width_char>() {
1451 get_device_info_host<info::device::native_vector_width_short>() {
1456 inline uint32_t get_device_info_host<info::device::native_vector_width_int>() {
1461 inline uint32_t get_device_info_host<info::device::native_vector_width_long>() {
1467 get_device_info_host<info::device::native_vector_width_float>() {
1473 get_device_info_host<info::device::native_vector_width_double>() {
1478 inline uint32_t get_device_info_host<info::device::native_vector_width_half>() {
1483 inline uint32_t get_device_info_host<info::device::max_clock_frequency>() {
1487 template <>
inline uint32_t get_device_info_host<info::device::address_bits>() {
1488 return sizeof(
void *) * 8;
1492 inline uint64_t get_device_info_host<info::device::global_mem_size>() {
1497 inline uint64_t get_device_info_host<info::device::max_mem_alloc_size>() {
1499 const uint64_t
a = get_device_info_host<info::device::global_mem_size>() / 4;
1500 const uint64_t
b = 128ul * 1024 * 1024;
1501 return (
a >
b) ?
a :
b;
1504 template <>
inline bool get_device_info_host<info::device::image_support>() {
1508 template <>
inline bool get_device_info_host<info::device::atomic64>() {
1513 inline std::vector<memory_order>
1514 get_device_info_host<info::device::atomic_memory_order_capabilities>() {
1520 inline std::vector<memory_order>
1521 get_device_info_host<info::device::atomic_fence_order_capabilities>() {
1527 inline std::vector<memory_scope>
1528 get_device_info_host<info::device::atomic_memory_scope_capabilities>() {
1534 inline std::vector<memory_scope>
1535 get_device_info_host<info::device::atomic_fence_scope_capabilities>() {
1542 get_device_info_host<info::device::ext_oneapi_bfloat16_math_functions>() {
1547 inline uint32_t get_device_info_host<info::device::max_read_image_args>() {
1553 inline uint32_t get_device_info_host<info::device::max_write_image_args>() {
1559 inline size_t get_device_info_host<info::device::image2d_max_width>() {
1575 inline size_t get_device_info_host<info::device::image2d_max_height>() {
1591 inline size_t get_device_info_host<info::device::image3d_max_width>() {
1607 inline size_t get_device_info_host<info::device::image3d_max_height>() {
1623 inline size_t get_device_info_host<info::device::image3d_max_depth>() {
1639 inline size_t get_device_info_host<info::device::image_max_buffer_size>() {
1645 inline size_t get_device_info_host<info::device::image_max_array_size>() {
1650 template <>
inline uint32_t get_device_info_host<info::device::max_samplers>() {
1656 inline size_t get_device_info_host<info::device::max_parameter_size>() {
1662 inline uint32_t get_device_info_host<info::device::mem_base_addr_align>() {
1667 inline std::vector<info::fp_config>
1668 get_device_info_host<info::device::half_fp_config>() {
1674 inline std::vector<info::fp_config>
1675 get_device_info_host<info::device::single_fp_config>() {
1681 inline std::vector<info::fp_config>
1682 get_device_info_host<info::device::double_fp_config>() {
1691 get_device_info_host<info::device::global_mem_cache_type>() {
1697 get_device_info_host<info::device::global_mem_cache_line_size>() {
1702 inline uint64_t get_device_info_host<info::device::global_mem_cache_size>() {
1707 inline uint64_t get_device_info_host<info::device::max_constant_buffer_size>() {
1713 inline uint32_t get_device_info_host<info::device::max_constant_args>() {
1720 get_device_info_host<info::device::local_mem_type>() {
1725 inline uint64_t get_device_info_host<info::device::local_mem_size>() {
1731 inline bool get_device_info_host<info::device::error_correction_support>() {
1736 inline bool get_device_info_host<info::device::host_unified_memory>() {
1741 inline size_t get_device_info_host<info::device::profiling_timer_resolution>() {
1742 typedef std::ratio_divide<std::chrono::high_resolution_clock::period,
1745 return ns_period::num / ns_period::den;
1748 template <>
inline bool get_device_info_host<info::device::is_endian_little>() {
1757 template <>
inline bool get_device_info_host<info::device::is_available>() {
1762 inline bool get_device_info_host<info::device::is_compiler_available>() {
1767 inline bool get_device_info_host<info::device::is_linker_available>() {
1772 inline std::vector<info::execution_capability>
1773 get_device_info_host<info::device::execution_capabilities>() {
1777 template <>
inline bool get_device_info_host<info::device::queue_profiling>() {
1782 inline std::vector<kernel_id>
1783 get_device_info_host<info::device::built_in_kernel_ids>() {
1788 inline std::vector<std::string>
1789 get_device_info_host<info::device::built_in_kernels>() {
1793 template <>
inline platform get_device_info_host<info::device::platform>() {
1797 template <>
inline std::string get_device_info_host<info::device::name>() {
1798 return "SYCL host device";
1801 template <>
inline std::string get_device_info_host<info::device::vendor>() {
1806 inline std::string get_device_info_host<info::device::driver_version>() {
1810 template <>
inline std::string get_device_info_host<info::device::profile>() {
1811 return "FULL PROFILE";
1814 template <>
inline std::string get_device_info_host<info::device::version>() {
1819 inline std::string get_device_info_host<info::device::opencl_c_version>() {
1820 return "not applicable";
1824 inline std::vector<std::string>
1825 get_device_info_host<info::device::extensions>() {
1831 inline size_t get_device_info_host<info::device::printf_buffer_size>() {
1837 inline bool get_device_info_host<info::device::preferred_interop_user_sync>() {
1841 template <>
inline device get_device_info_host<info::device::parent_device>() {
1842 throw invalid_object_error(
1843 "Partitioning to subdevices of the host device is not implemented",
1844 PI_ERROR_INVALID_DEVICE);
1849 get_device_info_host<info::device::partition_max_sub_devices>() {
1855 inline std::vector<info::partition_property>
1856 get_device_info_host<info::device::partition_properties>() {
1862 inline std::vector<info::partition_affinity_domain>
1863 get_device_info_host<info::device::partition_affinity_domains>() {
1870 get_device_info_host<info::device::partition_type_property>() {
1876 get_device_info_host<info::device::partition_type_affinity_domain>() {
1882 inline uint32_t get_device_info_host<info::device::reference_count>() {
1888 inline uint32_t get_device_info_host<info::device::max_num_sub_groups>() {
1890 throw runtime_error(
"Sub-group feature is not supported on HOST device.",
1891 PI_ERROR_INVALID_DEVICE);
1895 inline std::vector<size_t>
1896 get_device_info_host<info::device::sub_group_sizes>() {
1898 throw runtime_error(
"Sub-group feature is not supported on HOST device.",
1899 PI_ERROR_INVALID_DEVICE);
1904 get_device_info_host<info::device::sub_group_independent_forward_progress>() {
1906 throw runtime_error(
"Sub-group feature is not supported on HOST device.",
1907 PI_ERROR_INVALID_DEVICE);
1911 inline bool get_device_info_host<info::device::kernel_kernel_pipe_support>() {
1916 inline std::string get_device_info_host<info::device::backend_version>() {
1917 throw runtime_error(
1918 "Backend version feature is not supported on HOST device.",
1919 PI_ERROR_INVALID_DEVICE);
1923 inline bool get_device_info_host<info::device::usm_device_allocations>() {
1928 inline bool get_device_info_host<info::device::usm_host_allocations>() {
1933 inline bool get_device_info_host<info::device::usm_shared_allocations>() {
1939 get_device_info_host<info::device::usm_restricted_shared_allocations>() {
1944 inline bool get_device_info_host<info::device::usm_system_allocations>() {
1949 inline bool get_device_info_host<info::device::ext_intel_mem_channel>() {
1956 inline uint32_t get_device_info_host<ext::intel::info::device::device_id>() {
1957 throw runtime_error(
"Obtaining the device ID is not supported on HOST device",
1958 PI_ERROR_INVALID_DEVICE);
1962 get_device_info_host<ext::intel::info::device::pci_address>() {
1963 throw runtime_error(
1964 "Obtaining the PCI address is not supported on HOST device",
1965 PI_ERROR_INVALID_DEVICE);
1968 inline uint32_t get_device_info_host<ext::intel::info::device::gpu_eu_count>() {
1969 throw runtime_error(
"Obtaining the EU count is not supported on HOST device",
1970 PI_ERROR_INVALID_DEVICE);
1974 get_device_info_host<ext::intel::info::device::gpu_eu_simd_width>() {
1975 throw runtime_error(
1976 "Obtaining the EU SIMD width is not supported on HOST device",
1977 PI_ERROR_INVALID_DEVICE);
1980 inline uint32_t get_device_info_host<ext::intel::info::device::gpu_slices>() {
1981 throw runtime_error(
1982 "Obtaining the number of slices is not supported on HOST device",
1983 PI_ERROR_INVALID_DEVICE);
1987 get_device_info_host<ext::intel::info::device::gpu_subslices_per_slice>() {
1988 throw runtime_error(
"Obtaining the number of subslices per slice is not "
1989 "supported on HOST device",
1990 PI_ERROR_INVALID_DEVICE);
1994 get_device_info_host<ext::intel::info::device::gpu_eu_count_per_subslice>() {
1995 throw runtime_error(
1996 "Obtaining the EU count per subslice is not supported on HOST device",
1997 PI_ERROR_INVALID_DEVICE);
2001 get_device_info_host<ext::intel::info::device::gpu_hw_threads_per_eu>() {
2002 throw runtime_error(
2003 "Obtaining the HW threads count per EU is not supported on HOST device",
2004 PI_ERROR_INVALID_DEVICE);
2008 get_device_info_host<ext::intel::info::device::max_mem_bandwidth>() {
2009 throw runtime_error(
2010 "Obtaining the maximum memory bandwidth is not supported on HOST device",
2011 PI_ERROR_INVALID_DEVICE);
2015 get_device_info_host<ext::intel::info::device::uuid>() {
2016 throw runtime_error(
2017 "Obtaining the device uuid is not supported on HOST device",
2018 PI_ERROR_INVALID_DEVICE);
2024 inline std::string get_device_info_host<info::device::ext_intel_pci_address>() {
2025 throw runtime_error(
2026 "Obtaining the PCI address is not supported on HOST device",
2027 PI_ERROR_INVALID_DEVICE);
2032 inline uint32_t get_device_info_host<info::device::ext_intel_gpu_eu_count>() {
2033 throw runtime_error(
"Obtaining the EU count is not supported on HOST device",
2034 PI_ERROR_INVALID_DEVICE);
2040 get_device_info_host<info::device::ext_intel_gpu_eu_simd_width>() {
2041 throw runtime_error(
2042 "Obtaining the EU SIMD width is not supported on HOST device",
2043 PI_ERROR_INVALID_DEVICE);
2048 inline uint32_t get_device_info_host<info::device::ext_intel_gpu_slices>() {
2049 throw runtime_error(
2050 "Obtaining the number of slices is not supported on HOST device",
2051 PI_ERROR_INVALID_DEVICE);
2057 get_device_info_host<info::device::ext_intel_gpu_subslices_per_slice>() {
2058 throw runtime_error(
"Obtaining the number of subslices per slice is not "
2059 "supported on HOST device",
2060 PI_ERROR_INVALID_DEVICE);
2066 get_device_info_host<info::device::ext_intel_gpu_eu_count_per_subslice>() {
2067 throw runtime_error(
2068 "Obtaining the EU count per subslice is not supported on HOST device",
2069 PI_ERROR_INVALID_DEVICE);
2075 get_device_info_host<info::device::ext_intel_gpu_hw_threads_per_eu>() {
2076 throw runtime_error(
2077 "Obtaining the HW threads count per EU is not supported on HOST device",
2078 PI_ERROR_INVALID_DEVICE);
2084 get_device_info_host<info::device::ext_intel_max_mem_bandwidth>() {
2085 throw runtime_error(
2086 "Obtaining the maximum memory bandwidth is not supported on HOST device",
2087 PI_ERROR_INVALID_DEVICE);
2090 template <>
inline bool get_device_info_host<info::device::ext_oneapi_srgb>() {
2098 get_device_info_host<info::device::ext_intel_device_info_uuid>() {
2099 throw runtime_error(
2100 "Obtaining the device uuid is not supported on HOST device",
2101 PI_ERROR_INVALID_DEVICE);
2105 inline uint64_t get_device_info_host<ext::intel::info::device::free_memory>() {
2106 throw runtime_error(
2107 "Obtaining the device free memory is not supported on HOST device",
2108 PI_ERROR_INVALID_DEVICE);
2113 get_device_info_host<ext::intel::info::device::memory_clock_rate>() {
2114 throw runtime_error(
2115 "Obtaining the device memory clock rate is not supported on HOST device",
2116 PI_ERROR_INVALID_DEVICE);
2121 get_device_info_host<ext::intel::info::device::memory_bus_width>() {
2122 throw runtime_error(
2123 "Obtaining the device memory bus width is not supported on HOST device",
2124 PI_ERROR_INVALID_DEVICE);
2129 get_device_info_host<ext::intel::info::device::max_compute_queue_indices>() {
2130 throw runtime_error(
2131 "Obtaining max compute queue indices is not supported on HOST device",
2132 PI_ERROR_INVALID_DEVICE);
2137 ext::codeplay::experimental::info::device::supports_fusion>() {
2144 ext::codeplay::experimental::info::device::max_registers_per_work_group>() {
2145 throw runtime_error(
"Obtaining the maximum number of available registers per "
2146 "work-group is not supported on HOST device",
2147 PI_ERROR_INVALID_DEVICE);
2152 ext::oneapi::experimental::info::device::image_row_pitch_align>() {
2153 throw runtime_error(
"Obtaining image pitch alignment is not "
2154 "supported on HOST device",
2155 PI_ERROR_INVALID_DEVICE);
2160 ext::oneapi::experimental::info::device::max_image_linear_row_pitch>() {
2161 throw runtime_error(
"Obtaining max image linear pitch is not "
2162 "supported on HOST device",
2163 PI_ERROR_INVALID_DEVICE);
2167 inline std::vector<ext::oneapi::experimental::matrix::combination>
2169 ext::oneapi::experimental::info::device::matrix_combinations>() {
2170 throw runtime_error(
"Obtaining matrix combinations is not "
2171 "supported on HOST device",
2172 PI_ERROR_INVALID_DEVICE);
2177 ext::oneapi::experimental::info::device::max_image_linear_width>() {
2178 throw runtime_error(
"Obtaining max image linear width is not "
2179 "supported on HOST device",
2180 PI_ERROR_INVALID_DEVICE);
2185 ext::oneapi::experimental::info::device::max_image_linear_height>() {
2186 throw runtime_error(
"Obtaining max image linear height is not "
2187 "supported on HOST device",
2188 PI_ERROR_INVALID_DEVICE);
2193 ext::oneapi::experimental::info::device::mipmap_max_anisotropy>() {
2194 throw runtime_error(
"Bindless image mipaps are not supported on HOST device",
2195 PI_ERROR_INVALID_DEVICE);
2200 ext::oneapi::experimental::info::device::component_devices>() {
2201 throw runtime_error(
"Host devices cannot be component devices.",
2202 PI_ERROR_INVALID_DEVICE);
2207 ext::oneapi::experimental::info::device::composite_device>() {
2208 throw runtime_error(
"Host devices cannot be composite devices.",
2209 PI_ERROR_INVALID_DEVICE);
static size_t getOSMemSize()
Returns the amount of RAM available for the operating system.
static ProgramManager & getInstance()
std::string get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const
Get device info string.
const PluginPtr & getPlugin() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
const std::error_code & code() const noexcept
A unique identifier of an item in an index space.
#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN)
std::vector< memory_scope > readMemoryScopeBitfield(pi_memory_scope_capabilities bits)
range< 2 > construct_range< 2 >(size_t *values)
std::vector< info::fp_config > read_fp_bitfield(pi_device_fp_config bits)
range< Dimensions > construct_range(size_t *values)=delete
constexpr std::pair< const int, oneapi_exp_arch > IntelGPUArchitectures[]
constexpr std::pair< const int, oneapi_exp_arch > IntelCPUArchitectures[]
range< 3 > construct_range< 3 >(size_t *values)
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
static bool is_sycl_partition_property(info::partition_property PP)
Param::return_type get_device_info_host()=delete
std::vector< std::string > split_string(const std::string &str, char delimeter)
std::vector< info::partition_affinity_domain > read_domain_bitfield(pi_device_affinity_domain bits)
std::shared_ptr< device_impl > DeviceImplPtr
Param::return_type get_device_info(const DeviceImplPtr &Dev)
constexpr std::pair< const char *, oneapi_exp_arch > NvidiaAmdGPUArchitectures[]
std::vector< memory_order > readMemoryOrderBitfield(pi_memory_order_capabilities bits)
std::array< unsigned char, 16 > uuid_type
std::vector< info::execution_capability > read_execution_bitfield(pi_device_exec_capabilities bits)
range< 1 > construct_range< 1 >(size_t *values)
partition_affinity_domain
@ partition_by_affinity_domain
@ ext_intel_partition_by_cslice
@ correctly_rounded_divide_sqrt
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
T detail::marray_element_t< T > y T T T maxval[i] T T T a
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT
pi_bitfield pi_device_exec_capabilities
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE
_pi_usm_capabilities pi_usm_capabilities
@ PI_DEVICE_INFO_PARTITION_TYPE
static constexpr pi_device_fp_config PI_FP_DENORM
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
@ PI_USM_CONCURRENT_ACCESS
pi_bitfield pi_queue_properties
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA
pi_bitfield pi_memory_scope_capabilities
pi_bitfield pi_device_fp_config
static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL
static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO
static constexpr pi_device_fp_config PI_FP_FMA
static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL
static constexpr pi_device_fp_config PI_FP_INF_NAN
pi_bitfield pi_device_affinity_domain
pi_bitfield pi_memory_order_capabilities
C++ wrapper of extern "C" PI interfaces.
info::device::native_vector_width_double type
info::device::native_vector_width_half type
static ReturnT get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static device get(const DeviceImplPtr &Dev)
static ext::oneapi::experimental::architecture get(const DeviceImplPtr &Dev)
static id< 1 > get(const DeviceImplPtr &Dev)
static id< 1 > get(const DeviceImplPtr &Dev)
static id< 2 > get(const DeviceImplPtr &Dev)
static id< 2 > get(const DeviceImplPtr &Dev)
static id< 3 > get(const DeviceImplPtr &Dev)
static id< 3 > get(const DeviceImplPtr &Dev)
static info::partition_affinity_domain get(const DeviceImplPtr &Dev)
static info::partition_property get(const DeviceImplPtr &Dev)
static range< Dimensions > get(const DeviceImplPtr &Dev)
static size_t get(const DeviceImplPtr)
static size_t get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::vector< ext::oneapi::experimental::matrix::combination > get(const DeviceImplPtr &Dev)
static std::vector< info::execution_capability > get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static std::vector< info::partition_affinity_domain > get(const DeviceImplPtr &Dev)
static std::vector< info::partition_property > get(const DeviceImplPtr &Dev)
static std::vector< kernel_id > get(const DeviceImplPtr &Dev)
static std::vector< memory_order > get(const DeviceImplPtr &Dev)
static std::vector< memory_order > get(const DeviceImplPtr &Dev)
static std::vector< memory_scope > get(const DeviceImplPtr &Dev)
static std::vector< memory_scope > get(const DeviceImplPtr &Dev)
static std::vector< size_t > get(const DeviceImplPtr &Dev)
static std::vector< std::string > get(const DeviceImplPtr &Dev)
static std::vector< std::string > get(const DeviceImplPtr &Dev)
static std::vector< sycl::device > get(const DeviceImplPtr &Dev)
static sycl::device get(const DeviceImplPtr &Dev)
static uint32_t get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)