Skip to content

Commit c209221

Browse files
AshkanAliabadidreiss
authored andcommitted
Upstream cpuinfo updates in XNNPACK as of XNNPACK:d793f6c2ec145be3ddbffea951e6e5480f4646b8.
1 parent 2b14e44 commit c209221

File tree

23 files changed

+423
-69
lines changed

23 files changed

+423
-69
lines changed

CMakeLists.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ IF(NOT CMAKE_SYSTEM_NAME)
7979
"Target operating system is not specified. "
8080
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
8181
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
82-
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$")
82+
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$")
8383
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
8484
MESSAGE(WARNING
8585
"Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
@@ -125,7 +125,7 @@ SET(CPUINFO_SRCS
125125
src/cache.c)
126126

127127
IF(CPUINFO_SUPPORTED_PLATFORM)
128-
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
128+
IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$"))
129129
LIST(APPEND CPUINFO_SRCS
130130
src/x86/init.c
131131
src/x86/info.c
@@ -143,7 +143,7 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
143143
src/x86/linux/cpuinfo.c)
144144
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
145145
LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
146-
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows")
146+
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
147147
LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
148148
ENDIF()
149149
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
@@ -175,6 +175,11 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
175175
ENDIF()
176176
ENDIF()
177177

178+
IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
179+
LIST(APPEND CPUINFO_SRCS
180+
src/emscripten/init.c)
181+
ENDIF()
182+
178183
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
179184
LIST(APPEND CPUINFO_SRCS
180185
src/linux/smallfile.c
@@ -205,6 +210,11 @@ ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
205210
CPUINFO_TARGET_ENABLE_C99(cpuinfo)
206211
CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
207212
CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
213+
IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$")
214+
# Target Windows 7+ API
215+
TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601)
216+
TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601)
217+
ENDIF()
208218
SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
209219
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
210220
TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ Detect if target is a 32-bit or 64-bit ARM system:
4949
```
5050

5151
Check if the host CPU support ARM NEON
52+
5253
```c
5354
cpuinfo_initialize();
5455
if (cpuinfo_has_arm_neon()) {
@@ -57,6 +58,7 @@ if (cpuinfo_has_arm_neon()) {
5758
```
5859

5960
Check if the host CPU supports x86 AVX
61+
6062
```c
6163
cpuinfo_initialize();
6264
if (cpuinfo_has_x86_avx()) {
@@ -65,6 +67,7 @@ if (cpuinfo_has_x86_avx()) {
6567
```
6668

6769
Check if the thread runs on a Cortex-A53 core
70+
6871
```c
6972
cpuinfo_initialize();
7073
switch (cpuinfo_get_current_core()->uarch) {
@@ -78,12 +81,14 @@ switch (cpuinfo_get_current_core()->uarch) {
7881
```
7982

8083
Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems):
84+
8185
```c
8286
cpuinfo_initialize();
8387
const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size;
8488
```
8589

8690
Pin thread to cores sharing L2 cache with the current core (Linux or Android)
91+
8792
```c
8893
cpuinfo_initialize();
8994
cpu_set_t cpu_set;

bench/get-current.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,13 @@ static void cpuinfo_get_current_uarch_index(benchmark::State& state) {
3030
}
3131
BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond);
3232

33+
static void cpuinfo_get_current_uarch_index_with_default(benchmark::State& state) {
34+
cpuinfo_initialize();
35+
while (state.KeepRunning()) {
36+
const uint32_t uarch_index = cpuinfo_get_current_uarch_index_with_default(0);
37+
benchmark::DoNotOptimize(uarch_index);
38+
}
39+
}
40+
BENCHMARK(cpuinfo_get_current_uarch_index_with_default)->Unit(benchmark::kNanosecond);
41+
3342
BENCHMARK_MAIN();

configure.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def main(args):
2323
build.export_cpath("include", ["cpuinfo.h"])
2424

2525
with build.options(source_dir="src", macros=macros, extra_include_dirs="src", deps=build.deps.clog):
26-
sources = ["init.c", "api.c"]
26+
sources = ["api.c", "init.c", "cache.c"]
2727
if build.target.is_x86 or build.target.is_x86_64:
2828
sources += [
2929
"x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c",
@@ -61,7 +61,6 @@ def main(args):
6161
sources += ["mach/topology.c"]
6262
if build.target.is_linux or build.target.is_android:
6363
sources += [
64-
"linux/current.c",
6564
"linux/cpulist.c",
6665
"linux/smallfile.c",
6766
"linux/multiline.c",

include/cpuinfo.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -499,11 +499,11 @@ enum cpuinfo_uarch {
499499
/** Applied Micro X-Gene. */
500500
cpuinfo_uarch_xgene = 0x00B00100,
501501

502-
/** Huawei hisilicon Kunpeng Series CPU. */
503-
cpuinfo_uarch_taishanv110 = 0x00C00100,
504-
505502
/* Hygon Dhyana (a modification of AMD Zen for Chinese market). */
506503
cpuinfo_uarch_dhyana = 0x01000100,
504+
505+
/** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */
506+
cpuinfo_uarch_taishan_v110 = 0x00C00100,
507507
};
508508

509509
struct cpuinfo_processor {
@@ -523,7 +523,7 @@ struct cpuinfo_processor {
523523
*/
524524
int linux_id;
525525
#endif
526-
#if defined(_WIN32)
526+
#if defined(_WIN32) || defined(__CYGWIN__)
527527
/** Windows-specific ID for the group containing the logical processor. */
528528
uint16_t windows_group_id;
529529
/**
@@ -1799,13 +1799,22 @@ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
17991799

18001800
/**
18011801
* Identify the microarchitecture index of the core that executes the current thread.
1802-
* If the system does not support such identification, the function return 0.
1802+
* If the system does not support such identification, the function returns 0.
18031803
*
18041804
* There is no guarantee that the thread will stay on the same type of core for any time.
18051805
* Callers should treat the result as only a hint.
18061806
*/
18071807
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void);
18081808

1809+
/**
1810+
* Identify the microarchitecture index of the core that executes the current thread.
1811+
* If the system does not support such identification, the function returns the user-specified default value.
1812+
*
1813+
* There is no guarantee that the thread will stay on the same type of core for any time.
1814+
* Callers should treat the result as only a hint.
1815+
*/
1816+
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index);
1817+
18091818
#ifdef __cplusplus
18101819
} /* extern "C" */
18111820
#endif

src/api.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,3 +374,33 @@ uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
374374
return 0;
375375
#endif
376376
}
377+
378+
uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) {
379+
if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
380+
cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default");
381+
}
382+
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
383+
#ifdef __linux__
384+
if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
385+
/* Special case: avoid syscall on systems with only a single type of cores */
386+
return 0;
387+
}
388+
389+
/* General case */
390+
unsigned cpu;
391+
if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
392+
return default_uarch_index;
393+
}
394+
if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
395+
return default_uarch_index;
396+
}
397+
return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
398+
#else
399+
/* Fallback: no API to query current core, use default uarch index. */
400+
return default_uarch_index;
401+
#endif
402+
#else
403+
/* Only ARM/ARM64 processors may include cores of different types in the same package. */
404+
return 0;
405+
#endif
406+
}

src/arm/cache.c

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,23 +1448,24 @@ void cpuinfo_arm_decode_cache(
14481448
.line_size = 64 /* assumption */
14491449
};
14501450
break;
1451-
case cpuinfo_uarch_taishanv110:
1451+
case cpuinfo_uarch_taishan_v110:
14521452
/*
1453-
* Kunpeng920 series CPU designed by Huawei hisilicon for server,
1454-
* L1 and L2 cache is private to each core, L3 is shared with all cores.
1455-
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
1456-
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
1457-
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
1458-
* | Kunpeng920-3226 | 32 | 64K | 64K | 512K | 32M | [1] |
1459-
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
1460-
* | Kunpeng920-4826 | 48 | 64K | 64K | 512K | 48M | [2] |
1461-
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
1462-
* | Kunpeng920-6426 | 64 | 64K | 64K | 512K | 64M | [3] |
1463-
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
1464-
*
1465-
* [1] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
1466-
* [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
1467-
* [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
1453+
* It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1]
1454+
*
1455+
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
1456+
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
1457+
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
1458+
* | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] |
1459+
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
1460+
* | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] |
1461+
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
1462+
* | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] |
1463+
* +------------------+-------+-----------+-----------+-----------+----------+-----------+
1464+
*
1465+
* [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110
1466+
* [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226
1467+
* [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826
1468+
* [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
14681469
*/
14691470
*l1i = (struct cpuinfo_cache) {
14701471
.size = 64 * 1024,
@@ -1482,11 +1483,11 @@ void cpuinfo_arm_decode_cache(
14821483
.line_size = 128 /* assumption */,
14831484
.flags = CPUINFO_CACHE_INCLUSIVE /* assumption */,
14841485
};
1485-
*l3 = (struct cpuinfo_cache) {
1486-
.size = cluster_cores * 1024 * 1024,
1487-
.associativity = 16 /* assumption */,
1488-
.line_size = 128 /* assumption */,
1489-
};
1486+
*l3 = (struct cpuinfo_cache) {
1487+
.size = cluster_cores * 1024 * 1024,
1488+
.associativity = 16 /* assumption */,
1489+
.line_size = 128 /* assumption */,
1490+
};
14901491
break;
14911492
#endif
14921493
case cpuinfo_uarch_cortex_a12:

src/arm/linux/aarch32-isa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
193193
CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON;
194194
if ((architecture_version >= 7) || (features & vfpv3_mask)) {
195195
isa->vfpv3 = true;
196-
196+
197197
const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON;
198198
if (features & d32_mask) {
199199
isa->d32 = true;

src/arm/linux/clusters.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
4747
*
4848
* @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags.
4949
* @param max_processors - number of elements in the @p processors array.
50-
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
50+
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
5151
* frequency, MIDR infromation, and core cluster (package siblings list) information.
5252
*
5353
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
@@ -308,7 +308,7 @@ bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic(
308308
* @p processors array have cluster information.
309309
*
310310
* @param max_processors - number of elements in the @p processors array.
311-
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
311+
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum
312312
* frequency, MIDR infromation, and core cluster (package siblings list) information.
313313
*
314314
* @retval true if the heuristic successfully assigned all processors into clusters of cores.
@@ -466,7 +466,7 @@ void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(
466466
* This function should be called after all processors are assigned to core clusters.
467467
*
468468
* @param max_processors - number of elements in the @p processors array.
469-
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
469+
* @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags,
470470
* and decoded core cluster (package_leader_id) information.
471471
* The function expects the value of processors[i].package_processor_count to be zero.
472472
* Upon return, processors[i].package_processor_count will contain the number of logical
@@ -482,12 +482,12 @@ void cpuinfo_arm_linux_count_cluster_processors(
482482
const uint32_t package_leader_id = processors[i].package_leader_id;
483483
processors[package_leader_id].package_processor_count += 1;
484484
}
485-
}
485+
}
486486
/* Second pass: copy the package_processor_count from the group leader processor */
487487
for (uint32_t i = 0; i < max_processors; i++) {
488488
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
489489
const uint32_t package_leader_id = processors[i].package_leader_id;
490490
processors[i].package_processor_count = processors[package_leader_id].package_processor_count;
491491
}
492-
}
492+
}
493493
}

src/arm/linux/cpuinfo.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ static uint32_t parse_processor_number(
4444

4545
/*
4646
* Full list of ARM features reported in /proc/cpuinfo:
47-
*
47+
*
4848
* * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future)
4949
* * half - support for half-word loads and stores. These instruction are part of ARMv4,
5050
* so no need to check it on supported CPUs.
@@ -620,7 +620,7 @@ static void parse_cache_number(
620620
break;
621621
default:
622622
cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected",
623-
number_name, (int) (number_end - number_start), number_start);
623+
number_name, (int) (number_end - number_start), number_start);
624624
}
625625
}
626626

@@ -670,7 +670,7 @@ static bool parse_line(
670670
if (line_start == line_end) {
671671
return true;
672672
}
673-
673+
674674
/* Search for ':' on the line. */
675675
const char* separator = line_start;
676676
for (; separator != line_end; separator++) {

0 commit comments

Comments
 (0)