22 #include <sycl/feature_test.hpp>
34 inline namespace _V1 {
38 std::vector<info::fp_config> result;
58 inline std::vector<info::partition_affinity_domain>
60 std::vector<info::partition_affinity_domain> result;
76 inline std::vector<info::execution_capability>
78 std::vector<info::execution_capability> result;
88 switch (AffinityDomain) {
89 #define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \
94 sycl::info::partition_affinity_domain::numa)
96 sycl::info::partition_affinity_domain::L4_cache)
98 sycl::info::partition_affinity_domain::L3_cache)
100 sycl::info::partition_affinity_domain::L2_cache)
102 sycl::info::partition_affinity_domain::L1_cache)
104 sycl::info::partition_affinity_domain::next_partitionable)
105 #undef __SYCL_AFFINITY_DOMAIN_STRING_CASE
107 assert(
false &&
"Missing case for affinity domain.");
130 using type = info::device::native_vector_width_half;
134 using type = info::device::native_vector_width_double;
147 return ReturnT(result);
161 return createSyclObjFromImpl<platform>(
170 size_t resultSize = 0;
172 nullptr, &resultSize);
173 if (resultSize == 0) {
174 return std::string();
176 std::unique_ptr<char[]> result(
new char[resultSize]);
178 getHandleRef(), InfoCode, resultSize, result.get(),
nullptr);
180 return std::string(result.get());
191 template <
typename ReturnT>
197 template <
typename Param>
206 cl_device_fp_config result;
217 return Dev->get_device_info_string(
225 info::device::single_fp_config> {
230 sizeof(result), &result,
nullptr);
243 sizeof(Properties), &Properties,
nullptr);
251 info::device::atomic_memory_order_capabilities> {
265 info::device::atomic_fence_order_capabilities> {
279 info::device::atomic_memory_scope_capabilities> {
293 info::device::atomic_fence_scope_capabilities> {
314 sizeof(result), &result,
nullptr);
315 if (Err != PI_SUCCESS) {
326 info::device::execution_capabilities> {
340 info::device::built_in_kernel_ids> {
342 std::string result = Dev->get_device_info_string(
346 std::vector<kernel_id> ids;
347 ids.reserve(names.size());
348 for (
const auto &name : names) {
358 info::device::built_in_kernels> {
360 std::string result = Dev->get_device_info_string(
369 info::device::extensions> {
392 info::device::partition_properties> {
395 const auto &Plugin = Dev->getPlugin();
399 Dev->getHandleRef(), info_partition, 0,
nullptr, &resultSize);
401 size_t arrayLength = resultSize /
sizeof(cl_device_partition_property);
402 if (arrayLength == 0) {
405 std::unique_ptr<cl_device_partition_property[]> arrayResult(
406 new cl_device_partition_property[arrayLength]);
408 info_partition, resultSize,
409 arrayResult.get(),
nullptr);
411 std::vector<info::partition_property> result;
412 for (
size_t i = 0; i < arrayLength; ++i) {
418 result.push_back(pp);
427 info::device::partition_affinity_domains> {
428 static std::vector<info::partition_affinity_domain>
434 sizeof(result), &result,
nullptr);
443 info::device::partition_type_affinity_domain> {
449 nullptr, &resultSize);
450 if (resultSize != 1) {
453 cl_device_partition_property result;
457 sizeof(result), &result,
nullptr);
473 info::device::partition_type_property> {
482 size_t arrayLength = resultSize /
sizeof(cl_device_partition_property);
484 std::unique_ptr<cl_device_partition_property[]> arrayResult(
485 new cl_device_partition_property[arrayLength]);
488 arrayResult.get(),
nullptr);
497 info::device::sub_group_sizes> {
499 size_t resultSize = 0;
502 0,
nullptr, &resultSize);
504 std::vector<uint32_t> result32(resultSize /
sizeof(uint32_t));
507 resultSize, result32.data(),
nullptr);
509 std::vector<size_t> result;
510 result.reserve(result32.size());
511 for (uint32_t value : result32) {
512 result.push_back(value);
528 std::string platform_name = plt.get_info<info::platform::name>();
529 if (platform_name ==
"Intel(R) FPGA Emulation Platform for OpenCL(TM)" ||
530 platform_name ==
"Intel(R) FPGA SDK for OpenCL(TM)")
541 template <
int Dimensions>
549 return {values[1], values[0]};
552 return {values[2], values[1], values[0]};
556 template <
int Dimensions>
558 info::device::max_work_item_sizes<Dimensions>> {
564 sizeof(result), &result,
nullptr);
565 return construct_range<Dimensions>(result);
574 {
"5.0", oneapi_exp_arch::nvidia_gpu_sm_50},
575 {
"5.2", oneapi_exp_arch::nvidia_gpu_sm_52},
576 {
"5.3", oneapi_exp_arch::nvidia_gpu_sm_53},
577 {
"6.0", oneapi_exp_arch::nvidia_gpu_sm_60},
578 {
"6.1", oneapi_exp_arch::nvidia_gpu_sm_61},
579 {
"6.2", oneapi_exp_arch::nvidia_gpu_sm_62},
580 {
"7.0", oneapi_exp_arch::nvidia_gpu_sm_70},
581 {
"7.2", oneapi_exp_arch::nvidia_gpu_sm_72},
582 {
"7.5", oneapi_exp_arch::nvidia_gpu_sm_75},
583 {
"8.0", oneapi_exp_arch::nvidia_gpu_sm_80},
584 {
"8.6", oneapi_exp_arch::nvidia_gpu_sm_86},
585 {
"8.7", oneapi_exp_arch::nvidia_gpu_sm_87},
586 {
"8.9", oneapi_exp_arch::nvidia_gpu_sm_89},
587 {
"9.0", oneapi_exp_arch::nvidia_gpu_sm_90},
588 {
"gfx701", oneapi_exp_arch::amd_gpu_gfx701},
589 {
"gfx702", oneapi_exp_arch::amd_gpu_gfx702},
590 {
"gfx801", oneapi_exp_arch::amd_gpu_gfx801},
591 {
"gfx802", oneapi_exp_arch::amd_gpu_gfx802},
592 {
"gfx803", oneapi_exp_arch::amd_gpu_gfx803},
593 {
"gfx805", oneapi_exp_arch::amd_gpu_gfx805},
594 {
"gfx810", oneapi_exp_arch::amd_gpu_gfx810},
595 {
"gfx900", oneapi_exp_arch::amd_gpu_gfx900},
596 {
"gfx902", oneapi_exp_arch::amd_gpu_gfx902},
597 {
"gfx904", oneapi_exp_arch::amd_gpu_gfx904},
598 {
"gfx906", oneapi_exp_arch::amd_gpu_gfx906},
599 {
"gfx908", oneapi_exp_arch::amd_gpu_gfx908},
600 {
"gfx909", oneapi_exp_arch::amd_gpu_gfx909},
601 {
"gfx90a", oneapi_exp_arch::amd_gpu_gfx90a},
602 {
"gfx90c", oneapi_exp_arch::amd_gpu_gfx90c},
603 {
"gfx940", oneapi_exp_arch::amd_gpu_gfx940},
604 {
"gfx941", oneapi_exp_arch::amd_gpu_gfx941},
605 {
"gfx942", oneapi_exp_arch::amd_gpu_gfx942},
606 {
"gfx1010", oneapi_exp_arch::amd_gpu_gfx1010},
607 {
"gfx1011", oneapi_exp_arch::amd_gpu_gfx1011},
608 {
"gfx1012", oneapi_exp_arch::amd_gpu_gfx1012},
609 {
"gfx1013", oneapi_exp_arch::amd_gpu_gfx1013},
610 {
"gfx1030", oneapi_exp_arch::amd_gpu_gfx1030},
611 {
"gfx1031", oneapi_exp_arch::amd_gpu_gfx1031},
612 {
"gfx1032", oneapi_exp_arch::amd_gpu_gfx1032},
613 {
"gfx1033", oneapi_exp_arch::amd_gpu_gfx1033},
614 {
"gfx1034", oneapi_exp_arch::amd_gpu_gfx1034},
615 {
"gfx1035", oneapi_exp_arch::amd_gpu_gfx1035},
616 {
"gfx1036", oneapi_exp_arch::amd_gpu_gfx1036},
617 {
"gfx1100", oneapi_exp_arch::amd_gpu_gfx1100},
618 {
"gfx1101", oneapi_exp_arch::amd_gpu_gfx1101},
619 {
"gfx1102", oneapi_exp_arch::amd_gpu_gfx1102},
620 {
"gfx1103", oneapi_exp_arch::amd_gpu_gfx1103},
621 {
"gfx1150", oneapi_exp_arch::amd_gpu_gfx1150},
622 {
"gfx1151", oneapi_exp_arch::amd_gpu_gfx1151},
623 {
"gfx1200", oneapi_exp_arch::amd_gpu_gfx1200},
624 {
"gfx1201", oneapi_exp_arch::amd_gpu_gfx1201},
629 {0x02000000, oneapi_exp_arch::intel_gpu_bdw},
630 {0x02400009, oneapi_exp_arch::intel_gpu_skl},
631 {0x02404009, oneapi_exp_arch::intel_gpu_kbl},
632 {0x02408009, oneapi_exp_arch::intel_gpu_cfl},
633 {0x0240c000, oneapi_exp_arch::intel_gpu_apl},
634 {0x02410000, oneapi_exp_arch::intel_gpu_glk},
635 {0x02414000, oneapi_exp_arch::intel_gpu_whl},
636 {0x02418000, oneapi_exp_arch::intel_gpu_aml},
637 {0x0241c000, oneapi_exp_arch::intel_gpu_cml},
638 {0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
639 {0x02c08000, oneapi_exp_arch::intel_gpu_ehl},
640 {0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
641 {0x03004000, oneapi_exp_arch::intel_gpu_rkl},
642 {0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
643 {0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
644 {0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
645 {0x03028000, oneapi_exp_arch::intel_gpu_dg1},
646 {0x030dc000, oneapi_exp_arch::intel_gpu_acm_g10},
647 {0x030dc001, oneapi_exp_arch::intel_gpu_acm_g10},
648 {0x030dc004, oneapi_exp_arch::intel_gpu_acm_g10},
649 {0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10},
650 {0x030e0000, oneapi_exp_arch::intel_gpu_acm_g11},
651 {0x030e0004, oneapi_exp_arch::intel_gpu_acm_g11},
652 {0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11},
653 {0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12},
654 {0x030f0000, oneapi_exp_arch::intel_gpu_pvc},
655 {0x030f0001, oneapi_exp_arch::intel_gpu_pvc},
656 {0x030f0003, oneapi_exp_arch::intel_gpu_pvc},
657 {0x030f0005, oneapi_exp_arch::intel_gpu_pvc},
658 {0x030f0006, oneapi_exp_arch::intel_gpu_pvc},
659 {0x030f0007, oneapi_exp_arch::intel_gpu_pvc},
660 {0x030f4007, oneapi_exp_arch::intel_gpu_pvc_vg},
661 {0x03118000, oneapi_exp_arch::intel_gpu_mtl_u},
662 {0x03118004, oneapi_exp_arch::intel_gpu_mtl_u},
663 {0x0311c000, oneapi_exp_arch::intel_gpu_mtl_h},
664 {0x0311c004, oneapi_exp_arch::intel_gpu_mtl_h},
665 {0x03128000, oneapi_exp_arch::intel_gpu_arl_h},
666 {0x03128004, oneapi_exp_arch::intel_gpu_arl_h},
667 {0x05004000, oneapi_exp_arch::intel_gpu_bmg_g21},
668 {0x05004001, oneapi_exp_arch::intel_gpu_bmg_g21},
669 {0x05004004, oneapi_exp_arch::intel_gpu_bmg_g21},
670 {0x05010000, oneapi_exp_arch::intel_gpu_lnl_m},
671 {0x05010001, oneapi_exp_arch::intel_gpu_lnl_m},
672 {0x05010004, oneapi_exp_arch::intel_gpu_lnl_m},
677 {8, oneapi_exp_arch::intel_cpu_spr},
678 {9, oneapi_exp_arch::intel_cpu_gnr},
686 backend CurrentBackend = Dev->getBackend();
689 auto MapArchIDToArchName = [](
const int arch) {
691 if (Item.first == arch)
694 return ext::oneapi::experimental::architecture::unknown;
701 sizeof(DeviceIp), &DeviceIp,
nullptr);
702 return MapArchIDToArchName(DeviceIp);
705 auto MapArchIDToArchName = [](
const char *arch) {
707 if (std::string_view(Item.first) == arch)
710 return ext::oneapi::experimental::architecture::unknown;
712 size_t ResultSize = 0;
715 nullptr, &ResultSize);
716 std::unique_ptr<char[]> DeviceArch(
new char[ResultSize]);
719 ResultSize, DeviceArch.get(),
nullptr);
720 std::string DeviceArchCopy(DeviceArch.get());
721 std::string DeviceArchSubstr =
722 DeviceArchCopy.substr(0, DeviceArchCopy.find(
":"));
723 return MapArchIDToArchName(DeviceArchSubstr.data());
725 auto MapArchIDToArchName = [](
const int arch) {
727 if (Item.first == arch)
730 return sycl::ext::oneapi::experimental::architecture::x86_64;
737 sizeof(DeviceIp), &DeviceIp,
nullptr);
738 return MapArchIDToArchName(DeviceIp);
741 return ext::oneapi::experimental::architecture::unknown;
747 std::vector<ext::oneapi::experimental::matrix::combination>,
748 ext::oneapi::experimental::info::device::matrix_combinations> {
749 static std::vector<ext::oneapi::experimental::matrix::combination>
751 using namespace ext::oneapi::experimental::matrix;
752 using namespace ext::oneapi::experimental;
753 backend CurrentBackend = Dev->getBackend();
754 auto get_current_architecture = [&Dev]() -> std::optional<architecture> {
766 std::rethrow_exception(std::make_exception_ptr(e));
770 std::optional<architecture> DeviceArchOpt = get_current_architecture();
771 if (!DeviceArchOpt.has_value())
774 if (architecture::intel_cpu_spr == DeviceArch)
776 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
777 matrix_type::sint32, matrix_type::sint32},
778 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
779 matrix_type::sint32, matrix_type::sint32},
780 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
781 matrix_type::sint32, matrix_type::sint32},
782 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
783 matrix_type::sint32, matrix_type::sint32},
784 {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
785 matrix_type::fp32, matrix_type::fp32},
787 else if (architecture::intel_cpu_gnr == DeviceArch)
789 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
790 matrix_type::sint32, matrix_type::sint32},
791 {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
792 matrix_type::sint32, matrix_type::sint32},
793 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
794 matrix_type::sint32, matrix_type::sint32},
795 {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
796 matrix_type::sint32, matrix_type::sint32},
797 {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
798 matrix_type::fp32, matrix_type::fp32},
799 {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
800 matrix_type::fp32, matrix_type::fp32},
802 else if (architecture::intel_gpu_pvc == DeviceArch)
804 {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::uint8,
805 matrix_type::sint32, matrix_type::sint32},
806 {8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::sint8,
807 matrix_type::sint32, matrix_type::sint32},
808 {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::uint8,
809 matrix_type::sint32, matrix_type::sint32},
810 {8, 0, 0, 0, 16, 32, matrix_type::sint8, matrix_type::sint8,
811 matrix_type::sint32, matrix_type::sint32},
812 {8, 0, 0, 0, 16, 16, matrix_type::fp16, matrix_type::fp16,
813 matrix_type::fp32, matrix_type::fp32},
814 {8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
815 matrix_type::fp32, matrix_type::fp32},
816 {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
817 matrix_type::fp32, matrix_type::fp32},
818 {0, 0, 0, 1, 64, 16, matrix_type::bf16, matrix_type::bf16,
819 matrix_type::fp32, matrix_type::fp32},
820 {0, 0, 0, 32, 64, 16, matrix_type::bf16, matrix_type::bf16,
821 matrix_type::fp32, matrix_type::fp32},
822 {8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
823 matrix_type::fp32, matrix_type::fp32},
825 else if ((architecture::intel_gpu_dg2_g10 == DeviceArch) ||
826 (architecture::intel_gpu_dg2_g11 == DeviceArch) ||
827 (architecture::intel_gpu_dg2_g12 == DeviceArch))
829 {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::uint8,
830 matrix_type::sint32, matrix_type::sint32},
831 {8, 0, 0, 0, 8, 32, matrix_type::uint8, matrix_type::sint8,
832 matrix_type::sint32, matrix_type::sint32},
833 {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::uint8,
834 matrix_type::sint32, matrix_type::sint32},
835 {8, 0, 0, 0, 8, 32, matrix_type::sint8, matrix_type::sint8,
836 matrix_type::sint32, matrix_type::sint32},
837 {8, 0, 0, 0, 8, 16, matrix_type::fp16, matrix_type::fp16,
838 matrix_type::fp32, matrix_type::fp32},
839 {8, 0, 0, 0, 8, 16, matrix_type::bf16, matrix_type::bf16,
840 matrix_type::fp32, matrix_type::fp32},
842 else if (architecture::amd_gpu_gfx90a == DeviceArch)
844 {0, 0, 0, 32, 32, 8, matrix_type::fp16, matrix_type::fp16,
845 matrix_type::fp32, matrix_type::fp32},
846 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
847 matrix_type::fp32, matrix_type::fp32},
848 {0, 0, 0, 32, 32, 8, matrix_type::sint8, matrix_type::sint8,
849 matrix_type::sint32, matrix_type::sint32},
850 {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
851 matrix_type::sint32, matrix_type::sint32},
852 {0, 0, 0, 32, 32, 8, matrix_type::bf16, matrix_type::bf16,
853 matrix_type::fp32, matrix_type::fp32},
854 {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
855 matrix_type::fp32, matrix_type::fp32},
856 {0, 0, 0, 16, 16, 4, matrix_type::fp64, matrix_type::fp64,
857 matrix_type::fp64, matrix_type::fp64},
863 constexpr std::pair<float, oneapi_exp_arch> NvidiaArchNumbs[] = {
864 {5.0, oneapi_exp_arch::nvidia_gpu_sm_50},
865 {5.2, oneapi_exp_arch::nvidia_gpu_sm_52},
866 {5.3, oneapi_exp_arch::nvidia_gpu_sm_53},
867 {6.0, oneapi_exp_arch::nvidia_gpu_sm_60},
868 {6.1, oneapi_exp_arch::nvidia_gpu_sm_61},
869 {6.2, oneapi_exp_arch::nvidia_gpu_sm_62},
870 {7.0, oneapi_exp_arch::nvidia_gpu_sm_70},
871 {7.2, oneapi_exp_arch::nvidia_gpu_sm_72},
872 {7.5, oneapi_exp_arch::nvidia_gpu_sm_75},
873 {8.0, oneapi_exp_arch::nvidia_gpu_sm_80},
874 {8.6, oneapi_exp_arch::nvidia_gpu_sm_86},
875 {8.7, oneapi_exp_arch::nvidia_gpu_sm_87},
876 {8.9, oneapi_exp_arch::nvidia_gpu_sm_89},
877 {9.0, oneapi_exp_arch::nvidia_gpu_sm_90},
880 for (
const auto &Item : NvidiaArchNumbs)
881 if (Item.second == arch)
885 float ComputeCapability = GetArchNum(DeviceArch);
886 std::vector<combination> sm_70_combinations = {
887 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
888 matrix_type::fp32, matrix_type::fp32},
889 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
890 matrix_type::fp32, matrix_type::fp32},
891 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
892 matrix_type::fp32, matrix_type::fp32},
893 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
894 matrix_type::fp16, matrix_type::fp16},
895 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
896 matrix_type::fp16, matrix_type::fp16},
897 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
898 matrix_type::fp16, matrix_type::fp16},
899 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
900 matrix_type::fp32, matrix_type::fp16},
901 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
902 matrix_type::fp32, matrix_type::fp16},
903 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
904 matrix_type::fp32, matrix_type::fp16},
905 {0, 0, 0, 16, 16, 16, matrix_type::fp16, matrix_type::fp16,
906 matrix_type::fp16, matrix_type::fp32},
907 {0, 0, 0, 8, 32, 16, matrix_type::fp16, matrix_type::fp16,
908 matrix_type::fp16, matrix_type::fp32},
909 {0, 0, 0, 32, 8, 16, matrix_type::fp16, matrix_type::fp16,
910 matrix_type::fp16, matrix_type::fp32}};
911 std::vector<combination> sm_72_combinations = {
912 {0, 0, 0, 16, 16, 16, matrix_type::sint8, matrix_type::sint8,
913 matrix_type::sint32, matrix_type::sint32},
914 {0, 0, 0, 8, 32, 16, matrix_type::sint8, matrix_type::sint8,
915 matrix_type::sint32, matrix_type::sint32},
916 {0, 0, 0, 32, 8, 16, matrix_type::sint8, matrix_type::sint8,
917 matrix_type::sint32, matrix_type::sint32},
918 {0, 0, 0, 16, 16, 16, matrix_type::uint8, matrix_type::uint8,
919 matrix_type::sint32, matrix_type::sint32},
920 {0, 0, 0, 8, 32, 16, matrix_type::uint8, matrix_type::uint8,
921 matrix_type::sint32, matrix_type::sint32},
922 {0, 0, 0, 32, 8, 16, matrix_type::uint8, matrix_type::uint8,
923 matrix_type::sint32, matrix_type::sint32}};
924 std::vector<combination> sm_80_combinations = {
925 {0, 0, 0, 16, 16, 8, matrix_type::tf32, matrix_type::tf32,
926 matrix_type::fp32, matrix_type::fp32},
927 {0, 0, 0, 16, 16, 16, matrix_type::bf16, matrix_type::bf16,
928 matrix_type::fp32, matrix_type::fp32},
929 {0, 0, 0, 8, 32, 16, matrix_type::bf16, matrix_type::bf16,
930 matrix_type::fp32, matrix_type::fp32},
931 {0, 0, 0, 32, 8, 16, matrix_type::bf16, matrix_type::bf16,
932 matrix_type::fp32, matrix_type::fp32},
933 {0, 0, 0, 8, 8, 4, matrix_type::fp64, matrix_type::fp64,
934 matrix_type::fp64, matrix_type::fp64}};
935 if (ComputeCapability >= 8.0) {
936 sm_80_combinations.insert(sm_80_combinations.end(),
937 sm_72_combinations.begin(),
938 sm_72_combinations.end());
939 sm_80_combinations.insert(sm_80_combinations.end(),
940 sm_70_combinations.begin(),
941 sm_70_combinations.end());
942 return sm_80_combinations;
943 }
else if (ComputeCapability >= 7.2) {
944 sm_72_combinations.insert(sm_72_combinations.end(),
945 sm_70_combinations.begin(),
946 sm_70_combinations.end());
947 return sm_72_combinations;
948 }
else if (ComputeCapability >= 7.0)
949 return sm_70_combinations;
957 size_t, ext::oneapi::experimental::info::device::max_global_work_groups> {
964 id<1>, ext::oneapi::experimental::info::device::max_work_groups<1>> {
969 max_global_work_groups>
::get(Dev);
974 sizeof(result), &result,
nullptr);
975 return id<1>(std::min(Limit, result[0]));
981 id<2>, ext::oneapi::experimental::info::device::max_work_groups<2>> {
986 max_global_work_groups>
::get(Dev);
991 sizeof(result), &result,
nullptr);
992 return id<2>(std::min(Limit, result[1]), std::min(Limit, result[0]));
998 id<3>, ext::oneapi::experimental::info::device::max_work_groups<3>> {
1003 max_global_work_groups>
::get(Dev);
1005 Dev->getHandleRef(),
1008 sizeof(result), &result,
nullptr);
1009 return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]),
1010 std::min(Limit, result[0]));
1018 info::device::ext_oneapi_max_global_work_groups> {
1021 ext::oneapi::experimental::info::device::
1022 max_global_work_groups>
::get(Dev);
1030 info::device::ext_oneapi_max_work_groups_1d> {
1042 info::device::ext_oneapi_max_work_groups_2d> {
1054 info::device::ext_oneapi_max_work_groups_3d> {
1068 sizeof(result), &result,
nullptr);
1069 if (result ==
nullptr)
1071 "No parent for device because it is not a subdevice");
1073 const auto &Platform = Dev->getPlatformImpl();
1074 return createSyclObjFromImpl<device>(
1075 Platform->getOrMakeDeviceImpl(result, Platform));
1095 Dev->getHandleRef(),
1099 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1109 Dev->getHandleRef(),
1113 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1123 Dev->getHandleRef(),
1126 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1133 info::device::usm_restricted_shared_allocations> {
1137 Dev->getHandleRef(),
1141 return (Err != PI_SUCCESS)
1153 Dev->getHandleRef(),
1156 return (Err != PI_SUCCESS) ? false : (caps &
PI_USM_ACCESS);
1163 bool, ext::codeplay::experimental::info::device::supports_fusion> {
1165 #if SYCL_EXT_CODEPLAY_KERNEL_FUSION
1176 return Dev->is_cpu() || Dev->is_gpu();
1193 ext::codeplay::experimental::info::device::max_registers_per_work_group> {
1195 uint32_t maxRegsPerWG;
1197 Dev->getHandleRef(),
1198 PiInfoCode<ext::codeplay::experimental::info::device::
1199 max_registers_per_work_group>::value,
1200 sizeof(maxRegsPerWG), &maxRegsPerWG,
nullptr);
1201 return maxRegsPerWG;
1208 std::vector<sycl::device>,
1209 ext::oneapi::experimental::info::device::component_devices> {
1211 size_t ResultSize = 0;
1214 Dev->getHandleRef(),
1216 ext::oneapi::experimental::info::device::component_devices>::value,
1217 0,
nullptr, &ResultSize);
1221 if (Err == PI_ERROR_INVALID_VALUE || (Err == PI_SUCCESS && ResultSize == 0))
1226 Dev->getPlugin()->checkPiResult(Err);
1228 size_t DevCount = ResultSize /
sizeof(
pi_device);
1230 std::vector<pi_device> Devs(DevCount);
1232 Dev->getHandleRef(),
1234 ext::oneapi::experimental::info::device::component_devices>::value,
1235 ResultSize, Devs.data(),
nullptr);
1236 std::vector<sycl::device> Result;
1237 const auto &Platform = Dev->getPlatformImpl();
1238 for (
const auto &d : Devs)
1239 Result.push_back(createSyclObjFromImpl<device>(
1240 Platform->getOrMakeDeviceImpl(d, Platform)));
1247 sycl::
device, ext::oneapi::experimental::info::device::composite_device> {
1249 if (!Dev->has(sycl::aspect::ext_oneapi_is_component))
1251 "Only devices with aspect::ext_oneapi_is_component "
1252 "can call this function.");
1256 Dev->getHandleRef(),
1258 ext::oneapi::experimental::info::device::composite_device>::value,
1259 sizeof(Result), &Result,
nullptr);
1262 const auto &Platform = Dev->getPlatformImpl();
1263 return createSyclObjFromImpl<device>(
1264 Platform->getOrMakeDeviceImpl(Result, Platform));
1267 "A component with aspect::ext_oneapi_is_component "
1268 "must have a composite device.");
1272 template <
typename Param>
1275 "Invalid device information descriptor");
1276 if (std::is_same<Param,
1277 sycl::_V1::ext::intel::info::device::free_memory>::value) {
1278 if (!Dev->has(aspect::ext_intel_free_memory))
1281 "The device does not have the ext_intel_free_memory aspect");
1293 template <
typename ReturnT>
1296 ext::oneapi::experimental::info::device::work_group_progress_capabilities<
1297 ext::oneapi::experimental::execution_scope::root_group>> {
1300 return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1301 Dev->getProgressGuarantee(execution_scope::work_group,
1302 execution_scope::root_group));
1305 template <
typename ReturnT>
1308 ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
1309 ext::oneapi::experimental::execution_scope::root_group>> {
1312 return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1313 Dev->getProgressGuarantee(execution_scope::sub_group,
1314 execution_scope::root_group));
1318 template <
typename ReturnT>
1321 ext::oneapi::experimental::info::device::sub_group_progress_capabilities<
1322 ext::oneapi::experimental::execution_scope::work_group>> {
1326 return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1327 Dev->getProgressGuarantee(execution_scope::sub_group,
1328 execution_scope::work_group));
1332 template <
typename ReturnT>
1335 ext::oneapi::experimental::info::device::work_item_progress_capabilities<
1336 ext::oneapi::experimental::execution_scope::root_group>> {
1340 return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1341 Dev->getProgressGuarantee(execution_scope::work_item,
1342 execution_scope::root_group));
1345 template <
typename ReturnT>
1348 ext::oneapi::experimental::info::device::work_item_progress_capabilities<
1349 ext::oneapi::experimental::execution_scope::work_group>> {
1353 return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1354 Dev->getProgressGuarantee(execution_scope::work_item,
1355 execution_scope::work_group));
1359 template <
typename ReturnT>
1362 ext::oneapi::experimental::info::device::work_item_progress_capabilities<
1363 ext::oneapi::experimental::execution_scope::sub_group>> {
1367 return device_impl::getProgressGuaranteesUpTo<ReturnT>(
1368 Dev->getProgressGuarantee(execution_scope::work_item,
1369 execution_scope::sub_group));
static ProgramManager & getInstance()
std::string get_device_info_string(sycl::detail::pi::PiDeviceInfo InfoCode) const
Get device info string.
const PluginPtr & getPlugin() const
sycl::detail::pi::PiDevice & getHandleRef()
Get reference to PI device.
static jit_compiler & get_instance()
The SYCL device class encapsulates a single SYCL device on which kernels may be executed.
const std::error_code & code() const noexcept
A unique identifier of an item in an index space.
#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN)
std::vector< memory_scope > readMemoryScopeBitfield(pi_memory_scope_capabilities bits)
range< 2 > construct_range< 2 >(size_t *values)
std::vector< info::fp_config > read_fp_bitfield(pi_device_fp_config bits)
range< Dimensions > construct_range(size_t *values)=delete
constexpr std::pair< const int, oneapi_exp_arch > IntelGPUArchitectures[]
constexpr std::pair< const int, oneapi_exp_arch > IntelCPUArchitectures[]
range< 3 > construct_range< 3 >(size_t *values)
std::string affinityDomainToString(info::partition_affinity_domain AffinityDomain)
static bool is_sycl_partition_property(info::partition_property PP)
std::vector< info::partition_affinity_domain > read_domain_bitfield(pi_device_affinity_domain bits)
std::vector< std::string > split_string(std::string_view str, char delimeter)
std::shared_ptr< device_impl > DeviceImplPtr
Param::return_type get_device_info(const DeviceImplPtr &Dev)
constexpr std::pair< const char *, oneapi_exp_arch > NvidiaAmdGPUArchitectures[]
std::vector< memory_order > readMemoryOrderBitfield(pi_memory_order_capabilities bits)
std::vector< info::execution_capability > read_execution_bitfield(pi_device_exec_capabilities bits)
range< 1 > construct_range< 1 >(size_t *values)
partition_affinity_domain
@ partition_by_affinity_domain
@ ext_intel_partition_by_cslice
@ correctly_rounded_divide_sqrt
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
static constexpr pi_device_fp_config PI_FP_SOFT_FLOAT
pi_bitfield pi_device_exec_capabilities
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE
_pi_usm_capabilities pi_usm_capabilities
@ PI_DEVICE_INFO_PARTITION_TYPE
static constexpr pi_device_fp_config PI_FP_DENORM
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L2_CACHE
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L3_CACHE
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
@ PI_USM_CONCURRENT_ACCESS
pi_bitfield pi_queue_properties
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L4_CACHE
static constexpr pi_device_fp_config PI_FP_ROUND_TO_NEAREST
static constexpr pi_device_fp_config PI_FP_ROUND_TO_INF
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE
static constexpr pi_device_fp_config PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_L1_CACHE
static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA
pi_bitfield pi_memory_scope_capabilities
pi_bitfield pi_device_fp_config
static constexpr pi_device_exec_capabilities PI_EXEC_NATIVE_KERNEL
static constexpr pi_device_fp_config PI_FP_ROUND_TO_ZERO
static constexpr pi_device_fp_config PI_FP_FMA
static constexpr pi_device_exec_capabilities PI_EXEC_KERNEL
static constexpr pi_device_fp_config PI_FP_INF_NAN
pi_bitfield pi_device_affinity_domain
pi_bitfield pi_memory_order_capabilities
C++ wrapper of extern "C" PI interfaces.
info::device::native_vector_width_double type
info::device::native_vector_width_half type
static ReturnT get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static bool get(const DeviceImplPtr &Dev)
static device get(const DeviceImplPtr &Dev)
static ext::oneapi::experimental::architecture get(const DeviceImplPtr &Dev)
static id< 1 > get(const DeviceImplPtr &Dev)
static id< 1 > get(const DeviceImplPtr &Dev)
static id< 2 > get(const DeviceImplPtr &Dev)
static id< 2 > get(const DeviceImplPtr &Dev)
static id< 3 > get(const DeviceImplPtr &Dev)
static id< 3 > get(const DeviceImplPtr &Dev)
static info::partition_affinity_domain get(const DeviceImplPtr &Dev)
static info::partition_property get(const DeviceImplPtr &Dev)
static range< Dimensions > get(const DeviceImplPtr &Dev)
static size_t get(const DeviceImplPtr)
static size_t get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::string get(const DeviceImplPtr &Dev)
static std::vector< ext::oneapi::experimental::matrix::combination > get(const DeviceImplPtr &Dev)
static std::vector< info::execution_capability > get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static std::vector< info::fp_config > get(const DeviceImplPtr &Dev)
static std::vector< info::partition_affinity_domain > get(const DeviceImplPtr &Dev)
static std::vector< info::partition_property > get(const DeviceImplPtr &Dev)
static std::vector< kernel_id > get(const DeviceImplPtr &Dev)
static std::vector< memory_order > get(const DeviceImplPtr &Dev)
static std::vector< memory_order > get(const DeviceImplPtr &Dev)
static std::vector< memory_scope > get(const DeviceImplPtr &Dev)
static std::vector< memory_scope > get(const DeviceImplPtr &Dev)
static std::vector< size_t > get(const DeviceImplPtr &Dev)
static std::vector< std::string > get(const DeviceImplPtr &Dev)
static std::vector< std::string > get(const DeviceImplPtr &Dev)
static std::vector< sycl::device > get(const DeviceImplPtr &Dev)
static sycl::device get(const DeviceImplPtr &Dev)
static uint32_t get(const DeviceImplPtr &Dev)
static ReturnT get(const DeviceImplPtr &Dev)