DPC++ Runtime
Runtime libraries for oneAPI DPC++
platform_util.cpp
Go to the documentation of this file.
1 //===-- platform_util.cpp - Platform utilities implementation --*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include <sycl/detail/os_util.hpp>
11 #include <sycl/exception.hpp>
12 
13 #if defined(__SYCL_RT_OS_LINUX)
14 #include <errno.h>
15 #include <unistd.h>
16 #if defined(__x86_64__) || defined(__i386__)
17 #include <cpuid.h>
18 #endif
19 #elif defined(__SYCL_RT_OS_WINDOWS)
20 #include <intrin.h>
21 #elif defined(__SYCL_RT_OS_DARWIN)
22 #if defined(__x86_64__) || defined(__i386__)
23 #include <cpuid.h>
24 #endif
25 #endif
26 
27 namespace sycl {
28 inline namespace _V1 {
29 namespace detail {
30 
31 #if defined(__x86_64__) || defined(__i386__)
32 // Used by methods that duplicate OpenCL behaviour in order to get CPU info
33 static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) {
34 #if defined(__SYCL_RT_OS_LINUX) || defined(__SYCL_RT_OS_DARWIN)
35  __cpuid_count(Type, SubType, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
36 #elif defined(__SYCL_RT_OS_WINDOWS)
37  __cpuidex(reinterpret_cast<int *>(CPUInfo), Type, SubType);
38 #endif
39 }
40 #endif
41 
43  throw runtime_error(
44  "max_clock_frequency parameter is not supported for host device",
45  PI_ERROR_INVALID_DEVICE);
46  return 0;
47 }
48 
50 #if defined(__x86_64__) || defined(__i386__)
51  uint32_t CPUInfo[4];
52  cpuid(CPUInfo, 0x80000006);
53  return CPUInfo[2] & 0xff;
54 #elif defined(__SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE)
55  long lineSize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE);
56  if (lineSize > 0) {
57  return lineSize;
58  }
59 #endif
60  return 8;
61 }
62 
64 #if defined(__x86_64__) || defined(__i386__)
65  uint32_t CPUInfo[4];
66  cpuid(CPUInfo, 0x80000006);
67  return static_cast<uint64_t>(CPUInfo[2] >> 16) * 1024;
68 #elif defined(__SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE)
69  long cacheSize = sysconf(_SC_LEVEL2_DCACHE_SIZE);
70  if (cacheSize > 0) {
71  return cacheSize;
72  }
73 #endif
74  return static_cast<uint64_t>(16 * 1024);
75 }
76 
78 
79 #if defined(__x86_64__) || defined(__i386__)
80  uint32_t Index = static_cast<uint32_t>(TIndex);
81 
82  // SSE4.2 has 16 byte (XMM) registers
83  static constexpr uint32_t VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0};
84  // AVX supports 32 byte (YMM) registers only for floats and doubles
85  static constexpr uint32_t VECTOR_WIDTH_AVX[] = {16, 8, 4, 2, 8, 4, 0};
86  // AVX2 has a full set of 32 byte (YMM) registers
87  static constexpr uint32_t VECTOR_WIDTH_AVX2[] = {32, 16, 8, 4, 8, 4, 0};
88  // AVX512 has 64 byte (ZMM) registers
89  static constexpr uint32_t VECTOR_WIDTH_AVX512[] = {64, 32, 16, 8, 16, 8, 0};
90 
91 #if defined(__SYCL_RT_OS_LINUX) || defined(__SYCL_RT_OS_DARWIN)
92  if (__builtin_cpu_supports("avx512f"))
93  return VECTOR_WIDTH_AVX512[Index];
94  if (__builtin_cpu_supports("avx2"))
95  return VECTOR_WIDTH_AVX2[Index];
96  if (__builtin_cpu_supports("avx"))
97  return VECTOR_WIDTH_AVX[Index];
98 #elif defined(__SYCL_RT_OS_WINDOWS)
99 
100  uint32_t Info[4];
101 
102  // Check that CPUID func number 7 is available.
103  cpuid(Info, 0);
104  if (Info[0] >= 7) {
105  // avx512f = CPUID.7.EBX[16]
106  cpuid(Info, 7);
107  if (Info[1] & (1 << 16))
108  return VECTOR_WIDTH_AVX512[Index];
109 
110  // avx2 = CPUID.7.EBX[5]
111  if (Info[1] & (1 << 5))
112  return VECTOR_WIDTH_AVX2[Index];
113  }
114  // It is assumed that CPUID func number 1 is always available.
115  // avx = CPUID.1.ECX[28]
116  cpuid(Info, 1);
117  if (Info[2] & (1 << 28))
118  return VECTOR_WIDTH_AVX[Index];
119 #endif
120 
121  return VECTOR_WIDTH_SSE42[Index];
122 
123 #elif defined(__ARM_NEON)
124  uint32_t Index = static_cast<uint32_t>(TIndex);
125 
126  // NEON has 16 byte registers
127  static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16, 8, 4, 2, 4, 2, 0};
128  return VECTOR_WIDTH_NEON[Index];
129 
130 #endif
131  return 0;
132 }
133 
134 void PlatformUtil::prefetch(const char *Ptr, size_t NumBytes) {
135  if (!Ptr)
136  return;
137 
138  const size_t CacheLineSize = PlatformUtil::getMemCacheLineSize();
139  const size_t CacheLineMask = ~(CacheLineSize - 1);
140  const char *PtrEnd = Ptr + NumBytes;
141 
142  // Set the pointer to the beginning of the current cache line.
143  Ptr = reinterpret_cast<const char *>(reinterpret_cast<size_t>(Ptr) &
144  CacheLineMask);
145  for (; Ptr < PtrEnd; Ptr += CacheLineSize) {
146 #if defined(__SYCL_RT_OS_LINUX)
147  __builtin_prefetch(Ptr);
148 #elif defined(__SYCL_RT_OS_WINDOWS)
149  _mm_prefetch(Ptr, _MM_HINT_T0);
150 #endif
151  }
152 }
153 
154 } // namespace detail
155 } // namespace _V1
156 } // namespace sycl
Definition: access.hpp:18
static uint32_t getMemCacheLineSize()
static void prefetch(const char *Ptr, size_t NumBytes)
static uint32_t getNativeVectorWidth(TypeIndex Index)
Returns the maximum vector width counted in elements of the given type.
static uint32_t getMaxClockFrequency()