DPC++ Runtime
Runtime libraries for oneAPI DPC++
platform_util.cpp
Go to the documentation of this file.
1 //===-- platform_util.cpp - Platform utilities implementation --*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <CL/sycl/exception.hpp>
11 #include <detail/platform_util.hpp>
12 
13 #if defined(__SYCL_RT_OS_LINUX)
14 #include <errno.h>
15 #include <unistd.h>
16 #if defined(__x86_64__) || defined(__i386__)
17 #include <cpuid.h>
18 #endif
19 #elif defined(__SYCL_RT_OS_WINDOWS)
20 #include <intrin.h>
21 #endif
22 
24 namespace sycl {
25 namespace detail {
26 
27 #if defined(__x86_64__) || defined(__i386__)
28 // Used by methods that duplicate OpenCL behaviour in order to get CPU info
29 static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) {
30 #if defined(__SYCL_RT_OS_LINUX)
31  __cpuid_count(Type, SubType, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
32 #elif defined(__SYCL_RT_OS_WINDOWS)
33  __cpuidex(reinterpret_cast<int *>(CPUInfo), Type, SubType);
34 #endif
35 }
36 #endif
37 
38 uint32_t PlatformUtil::getMaxClockFrequency() {
39  throw runtime_error(
40  "max_clock_frequency parameter is not supported for host device",
41  PI_ERROR_INVALID_DEVICE);
42 #if defined(__x86_64__) || defined(__i386__)
43  uint32_t CPUInfo[4];
44  std::string Buff(sizeof(CPUInfo) * 3 + 1, 0);
45  size_t Offset = 0;
46 
47  for (uint32_t i = 0x80000002; i <= 0x80000004; i++) {
48  cpuid(CPUInfo, i);
49  std::copy(reinterpret_cast<char *>(CPUInfo),
50  reinterpret_cast<char *>(CPUInfo) + sizeof(CPUInfo),
51  Buff.begin() + Offset);
52  Offset += sizeof(CPUInfo);
53  }
54  std::size_t Found = Buff.rfind("Hz");
55  // Bail out if frequency is not found in CPUID string
56  if (Found == std::string::npos)
57  return 0;
58 
59  Buff = Buff.substr(0, Found);
60  uint32_t Freq = 0;
61  switch (Buff[Buff.size() - 1]) {
62  case 'M':
63  Freq = 1;
64  break;
65  case 'G':
66  Freq = 1000;
67  break;
68  }
69  Buff = Buff.substr(Buff.rfind(' '), Buff.length());
70  Freq *= std::stod(Buff);
71  return Freq;
72 #endif
73  return 0;
74 }
75 
76 uint32_t PlatformUtil::getMemCacheLineSize() {
77 #if defined(__x86_64__) || defined(__i386__)
78  uint32_t CPUInfo[4];
79  cpuid(CPUInfo, 0x80000006);
80  return CPUInfo[2] & 0xff;
81 #elif defined(__SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE)
82  long lineSize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE);
83  if (lineSize > 0) {
84  return lineSize;
85  }
86 #endif
87  return 8;
88 }
89 
90 uint64_t PlatformUtil::getMemCacheSize() {
91 #if defined(__x86_64__) || defined(__i386__)
92  uint32_t CPUInfo[4];
93  cpuid(CPUInfo, 0x80000006);
94  return static_cast<uint64_t>(CPUInfo[2] >> 16) * 1024;
95 #elif defined(__SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE)
96  long cacheSize = sysconf(_SC_LEVEL2_DCACHE_SIZE);
97  if (cacheSize > 0) {
98  return cacheSize;
99  }
100 #endif
101  return static_cast<uint64_t>(16 * 1024);
102 }
103 
104 uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {
105 
106 #if defined(__x86_64__) || defined(__i386__)
107  uint32_t Index = static_cast<uint32_t>(TIndex);
108 
109  // SSE4.2 has 16 byte (XMM) registers
110  static constexpr uint32_t VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0};
111  // AVX supports 32 byte (YMM) registers only for floats and doubles
112  static constexpr uint32_t VECTOR_WIDTH_AVX[] = {16, 8, 4, 2, 8, 4, 0};
113  // AVX2 has a full set of 32 byte (YMM) registers
114  static constexpr uint32_t VECTOR_WIDTH_AVX2[] = {32, 16, 8, 4, 8, 4, 0};
115  // AVX512 has 64 byte (ZMM) registers
116  static constexpr uint32_t VECTOR_WIDTH_AVX512[] = {64, 32, 16, 8, 16, 8, 0};
117 
118 #if defined(__SYCL_RT_OS_LINUX)
119  if (__builtin_cpu_supports("avx512f"))
120  return VECTOR_WIDTH_AVX512[Index];
121  if (__builtin_cpu_supports("avx2"))
122  return VECTOR_WIDTH_AVX2[Index];
123  if (__builtin_cpu_supports("avx"))
124  return VECTOR_WIDTH_AVX[Index];
125 #elif defined(__SYCL_RT_OS_WINDOWS)
126 
127  uint32_t Info[4];
128 
129  // Check that CPUID func number 7 is available.
130  cpuid(Info, 0);
131  if (Info[0] >= 7) {
132  // avx512f = CPUID.7.EBX[16]
133  cpuid(Info, 7);
134  if (Info[1] & (1 << 16))
135  return VECTOR_WIDTH_AVX512[Index];
136 
137  // avx2 = CPUID.7.EBX[5]
138  if (Info[1] & (1 << 5))
139  return VECTOR_WIDTH_AVX2[Index];
140  }
141  // It is assumed that CPUID func number 1 is always available.
142  // avx = CPUID.1.ECX[28]
143  cpuid(Info, 1);
144  if (Info[2] & (1 << 28))
145  return VECTOR_WIDTH_AVX[Index];
146 #endif
147 
148  return VECTOR_WIDTH_SSE42[Index];
149 
150 #elif defined(__ARM_NEON)
151  uint32_t Index = static_cast<uint32_t>(TIndex);
152 
153  // NEON has 16 byte registers
154  static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16, 8, 4, 2, 4, 2, 0};
155  return VECTOR_WIDTH_NEON[Index];
156 
157 #endif
158  return 0;
159 }
160 
161 void PlatformUtil::prefetch(const char *Ptr, size_t NumBytes) {
162  if (!Ptr)
163  return;
164 
165  const size_t CacheLineSize = PlatformUtil::getMemCacheLineSize();
166  const size_t CacheLineMask = ~(CacheLineSize - 1);
167  const char *PtrEnd = Ptr + NumBytes;
168 
169  // Set the pointer to the beginning of the current cache line.
170  Ptr = reinterpret_cast<const char *>(
171  reinterpret_cast<size_t>(Ptr) & CacheLineMask);
172  for (; Ptr < PtrEnd; Ptr += CacheLineSize) {
173 #if defined(__SYCL_RT_OS_LINUX)
174  __builtin_prefetch(Ptr);
175 #elif defined(__SYCL_RT_OS_WINDOWS)
176  _mm_prefetch(Ptr, _MM_HINT_T0);
177 #endif
178  }
179 }
180 
181 } // namespace detail
182 } // namespace sycl
183 } // __SYCL_INLINE_NAMESPACE(cl)
os_util.hpp
sycl
Definition: invoke_simd.hpp:68
cl::sycl::detail::PlatformUtil::TypeIndex
TypeIndex
Definition: platform_util.hpp:24
cl::sycl::ext::intel::experimental::prefetch
prefetch_impl< _B > prefetch
Definition: fpga_lsu.hpp:47
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
platform_util.hpp
exception.hpp
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12