Skip to content

Commit d2d83be

Browse files
committed
pahole: Allow tweaking the size of the loader hash tables
To experiment with different sizes as time goes by and the number of symbols in the kernel grows. The current default, 15, is suboptimal for the fedora rawhide kernel, we can do better using 12. Default: 15: $ sudo ~acme/bin/perf stat -d -r5 pahole -j --btf_encode_detached vmlinux-j.btf vmlinux Performance counter stats for 'pahole -j --btf_encode_detached vmlinux-j.btf vmlinux' (5 runs): 8,107.73 msec task-clock # 2.749 CPUs utilized ( +- 0.05% ) 1,723 context-switches # 212.562 /sec ( +- 1.86% ) 5 cpu-migrations # 0.641 /sec ( +- 46.07% ) 68,802 page-faults # 8.486 K/sec ( +- 0.05% ) 29,221,590,880 cycles # 3.604 GHz ( +- 0.04% ) 63,438,138,612 instructions # 2.17 insn per cycle ( +- 0.00% ) 15,125,172,105 branches # 1.866 G/sec ( +- 0.00% ) 119,983,284 branch-misses # 0.79% of all branches ( +- 0.06% ) 13,964,248,638 L1-dcache-loads # 1.722 G/sec ( +- 0.00% ) 375,110,346 L1-dcache-load-misses # 2.69% of all L1-dcache accesses( +- 0.01% ) 91,712,402 LLC-loads # 11.312 M/sec ( +- 0.14% ) 28,025,289 LLC-load-misses # 30.56% of all LL-cache accesses ( +- 0.23% ) 2.94980 +- 0.00193 seconds time elapsed ( +- 0.07% ) $ New default, to be set in an upcoming patch, 12: $ sudo ~acme/bin/perf stat -d -r5 pahole --hashbits=12 -j --btf_encode_detached vmlinux-j.btf vmlinux Performance counter stats for 'pahole --hashbits=12 -j --btf_encode_detached vmlinux-j.btf vmlinux' (5 runs): 7,687.31 msec task-clock # 2.704 CPUs utilized ( +- 0.02% ) 1,677 context-switches # 218.126 /sec ( +- 0.70% ) 4 cpu-migrations # 0.468 /sec ( +- 18.84% ) 67,827 page-faults # 8.823 K/sec ( +- 0.03% ) 27,711,744,058 cycles # 3.605 GHz ( +- 0.02% ) 63,032,539,630 instructions # 2.27 insn per cycle ( +- 0.00% ) 15,062,001,666 branches # 1.959 G/sec ( +- 0.00% ) 127,728,818 branch-misses # 0.85% of all branches ( +- 0.07% ) 13,972,184,314 L1-dcache-loads # 1.818 G/sec ( +- 0.00% ) 364,962,883 L1-dcache-load-misses # 2.61% of all L1-dcache accesses( +- 0.02% ) 83,969,109 LLC-loads # 10.923 M/sec ( +- 0.13% ) 19,141,055 LLC-load-misses # 22.80% of all LL-cache accesses ( +- 0.25% ) 2.842440 +- 0.000952 seconds time elapsed ( +- 0.03% ) $ sudo ~acme/bin/perf stat -d -r5 pahole --hashbits=11 -j --btf_encode_detached vmlinux-j.btf vmlinux Performance counter stats for 'pahole --hashbits=11 -j --btf_encode_detached vmlinux-j.btf vmlinux' (5 runs): 7,704.29 msec task-clock # 2.702 CPUs utilized ( +- 0.05% ) 1,676 context-switches # 217.515 /sec ( +- 1.04% ) 2 cpu-migrations # 0.286 /sec ( +- 17.01% ) 67,813 page-faults # 8.802 K/sec ( +- 0.05% ) 27,786,710,102 cycles # 3.607 GHz ( +- 0.05% ) 63,027,795,038 instructions # 2.27 insn per cycle ( +- 0.00% ) 15,066,316,987 branches # 1.956 G/sec ( +- 0.00% ) 130,431,772 branch-misses # 0.87% of all branches ( +- 0.20% ) 13,981,516,517 L1-dcache-loads # 1.815 G/sec ( +- 0.00% ) 369,525,466 L1-dcache-load-misses # 2.64% of all L1-dcache accesses( +- 0.03% ) 83,328,524 LLC-loads # 10.816 M/sec ( +- 0.27% ) 18,704,020 LLC-load-misses # 22.45% of all LL-cache accesses ( +- 0.18% ) 2.85109 +- 0.00281 seconds time elapsed ( +- 0.10% ) $ sudo ~acme/bin/perf stat -d -r5 pahole --hashbits=8 -j --btf_encode_detached vmlinux-j.btf vmlinux Performance counter stats for 'pahole --hashbits=8 -j --btf_encode_detached vmlinux-j.btf vmlinux' (5 runs): 8,190.55 msec task-clock # 2.774 CPUs utilized ( +- 0.03% ) 1,607 context-switches # 196.226 /sec ( +- 0.67% ) 3 cpu-migrations # 0.317 /sec ( +- 15.38% ) 67,869 page-faults # 8.286 K/sec ( +- 0.05% ) 29,511,213,192 cycles # 3.603 GHz ( +- 0.02% ) 63,347,196,598 instructions # 2.15 insn per cycle ( +- 0.00% ) 15,198,023,498 branches # 1.856 G/sec ( +- 0.00% ) 131,113,100 branch-misses # 0.86% of all branches ( +- 0.14% ) 14,118,162,884 L1-dcache-loads # 1.724 G/sec ( +- 0.00% ) 422,048,384 L1-dcache-load-misses # 2.99% of all L1-dcache accesses( +- 0.01% ) 105,878,910 LLC-loads # 12.927 M/sec ( +- 0.05% ) 21,022,664 LLC-load-misses # 19.86% of all LL-cache accesses ( +- 0.20% ) 2.952678 +- 0.000858 seconds time elapsed ( +- 0.03% ) $ sudo ~acme/bin/perf stat -d -r5 pahole --hashbits=13 -j --btf_encode_detached vmlinux-j.btf vmlinux Performance counter stats for 'pahole --hashbits=13 -j --btf_encode_detached vmlinux-j.btf vmlinux' (5 runs): 7,728.71 msec task-clock # 2.707 CPUs utilized ( +- 0.07% ) 1,661 context-switches # 214.887 /sec ( +- 0.70% ) 2 cpu-migrations # 0.259 /sec ( +- 22.36% ) 67,893 page-faults # 8.785 K/sec ( +- 0.04% ) 27,874,322,843 cycles # 3.607 GHz ( +- 0.07% ) 63,079,425,815 instructions # 2.26 insn per cycle ( +- 0.00% ) 15,067,279,408 branches # 1.950 G/sec ( +- 0.00% ) 125,706,874 branch-misses # 0.83% of all branches ( +- 1.00% ) 13,967,177,801 L1-dcache-loads # 1.807 G/sec ( +- 0.00% ) 363,566,754 L1-dcache-load-misses # 2.60% of all L1-dcache accesses( +- 0.02% ) 86,583,482 LLC-loads # 11.203 M/sec ( +- 0.13% ) 20,629,871 LLC-load-misses # 23.83% of all LL-cache accesses ( +- 0.21% ) 2.85551 +- 0.00124 seconds time elapsed ( +- 0.04% ) $ Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent ff7bd70 commit d2d83be

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

man-pages/pahole.1

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ Allows specifying a list of debugging formats to try, in order. Right now this
127127
includes "ctf" and "dwarf". The default format path used is equivalent to
128128
"-F dwarf,ctf".
129129

130+
.TP
131+
.B \-\-hashbits=BITS
132+
Allows specifying the number of bits for the debugging format loader to use.
133+
The only one affected so far is the "dwarf" one, its default now is 15, the
134+
maximum for it is now 21 bits. Tweak it to see if it improves performance as
135+
the kernel evolves and more types and functions have to be loaded.
136+
130137
.TP
131138
.B \-\-hex
132139
Print offsets and sizes in hexadecimal.

pahole.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,7 @@ ARGP_PROGRAM_VERSION_HOOK_DEF = dwarves_print_version;
900900
#define ARGP_btf_encode_detached 326
901901
#define ARGP_prettify_input_filename 327
902902
#define ARGP_sort_output 328
903+
#define ARGP_hashbits 329
903904

904905
static const struct argp_option pahole__options[] = {
905906
{
@@ -1259,6 +1260,12 @@ static const struct argp_option pahole__options[] = {
12591260
.arg = "PATH",
12601261
.doc = "Path to the raw data to pretty print",
12611262
},
1263+
{
1264+
.name = "hashbits",
1265+
.key = ARGP_hashbits,
1266+
.arg = "BITS",
1267+
.doc = "Number of bits for the hash table key",
1268+
},
12621269
{
12631270
.name = NULL,
12641271
}
@@ -1399,6 +1406,8 @@ static error_t pahole__options_parser(int key, char *arg,
13991406
prettify_input_filename = arg; break;
14001407
case ARGP_sort_output:
14011408
sort_output = true; break;
1409+
case ARGP_hashbits:
1410+
conf_load.hashtable_bits = atoi(arg); break;
14021411
default:
14031412
return ARGP_ERR_UNKNOWN;
14041413
}
@@ -2872,6 +2881,11 @@ int main(int argc, char *argv[])
28722881
return 0;
28732882
}
28742883

2884+
if (conf_load.hashtable_bits > 31) {
2885+
fprintf(stderr, "Invalid --hashbits value (%d) should be less than 32\n", conf_load.hashtable_bits);
2886+
goto out;
2887+
}
2888+
28752889
if (dwarves__init(cacheline_size)) {
28762890
fputs("pahole: insufficient memory\n", stderr);
28772891
goto out;

0 commit comments

Comments
 (0)