Skip to content

Commit 2e719cc

Browse files
anakryikoacmel
authored andcommitted
btf_encoder: revamp how per-CPU variables are encoded
Right now to encode per-CPU variables in BTF, pahole iterates complete vmlinux symbol table for each CU. There are 2500 CUs for a typical kernel image. Overall, to encode 287 per-CPU variables pahole spends more than 10% of its CPU budget, this is incredibly wasteful. This patch revamps how this is done. Now it pre-processes symbol table once before any of per-CU processing starts. It remembers each per-CPU variable symbol, including its address, size, and name. Then during processing each CU, binary search is used to correlate DWARF variable with per-CPU symbols and figure out if variable belongs to per-CPU data section. If the match is found, BTF_KIND_VAR is emitted and var_secinfo is recorded, just like before. At the very end, after all CUs are processed, BTF_KIND_DATASEC is emitted with sorted variables. This change makes per-CPU variables generation overhead pretty negligible and returns back about 10% of CPU usage. Performance counter stats for './pahole -J /home/andriin/linux-build/default/vmlinux': BEFORE: 19.160149000 seconds user 1.304873000 seconds sys 24,114.05 msec task-clock # 0.999 CPUs utilized 83 context-switches # 0.003 K/sec 0 cpu-migrations # 0.000 K/sec 622,417 page-faults # 0.026 M/sec 72,897,315,125 cycles # 3.023 GHz (25.02%) 127,807,316,959 instructions # 1.75 insn per cycle (25.01%) 29,087,179,117 branches # 1206.234 M/sec (25.01%) 464,105,921 branch-misses # 1.60% of all branches (25.01%) 30,252,119,368 L1-dcache-loads # 1254.543 M/sec (25.01%) 1,156,336,207 L1-dcache-load-misses # 3.82% of all L1-dcache hits (25.05%) 343,373,503 LLC-loads # 14.240 M/sec (25.02%) 12,044,977 LLC-load-misses # 3.51% of all LL-cache hits (25.01%) 24.136198321 seconds time elapsed 22.729693000 seconds user 1.384859000 seconds sys AFTER: 16.781455000 seconds user 1.343956000 seconds sys 23,398.77 msec task-clock # 1.000 CPUs utilized 86 context-switches # 0.004 K/sec 0 cpu-migrations # 0.000 K/sec 622,420 page-faults # 0.027 M/sec 68,395,641,468 cycles # 2.923 GHz (25.05%) 114,241,327,034 instructions # 1.67 insn per cycle (25.01%) 26,330,711,718 branches # 1125.303 M/sec (25.01%) 465,926,869 branch-misses # 1.77% of all branches (25.00%) 24,662,984,772 L1-dcache-loads # 1054.029 M/sec (25.00%) 1,054,052,064 L1-dcache-load-misses # 4.27% of all L1-dcache hits (25.00%) 340,970,622 LLC-loads # 14.572 M/sec (25.00%) 16,032,297 LLC-load-misses # 4.70% of all LL-cache hits (25.03%) 23.402259654 seconds time elapsed 21.916437000 seconds user 1.482826000 seconds sys Committer testing: $ grep 'model name' -m1 /proc/cpuinfo model name : AMD Ryzen 9 3900X 12-Core Processor $ Before: $ perf stat -r5 pahole -J vmlinux Performance counter stats for 'pahole -J vmlinux' (5 runs): 9,730.28 msec task-clock:u # 0.998 CPUs utilized ( +- 0.54% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 353,854 page-faults:u # 0.036 M/sec ( +- 0.00% ) 39,721,726,459 cycles:u # 4.082 GHz ( +- 0.07% ) (83.33%) 626,010,654 stalled-cycles-frontend:u # 1.58% frontend cycles idle ( +- 0.91% ) (83.33%) 7,518,333,691 stalled-cycles-backend:u # 18.93% backend cycles idle ( +- 0.56% ) (83.33%) 85,477,123,093 instructions:u # 2.15 insn per cycle # 0.09 stalled cycles per insn ( +- 0.02% ) (83.34%) 19,346,085,683 branches:u # 1988.235 M/sec ( +- 0.03% ) (83.34%) 237,291,787 branch-misses:u # 1.23% of all branches ( +- 0.15% ) (83.33%) 9.7465 +- 0.0524 seconds time elapsed ( +- 0.54% ) $ After: $ perf stat -r5 pahole -J vmlinux Performance counter stats for 'pahole -J vmlinux' (5 runs): 8,953.80 msec task-clock:u # 0.998 CPUs utilized ( +- 0.09% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 353,855 page-faults:u # 0.040 M/sec ( +- 0.00% ) 35,775,730,539 cycles:u # 3.996 GHz ( +- 0.07% ) (83.33%) 579,534,836 stalled-cycles-frontend:u # 1.62% frontend cycles idle ( +- 2.21% ) (83.33%) 5,719,840,144 stalled-cycles-backend:u # 15.99% backend cycles idle ( +- 0.93% ) (83.33%) 73,035,744,786 instructions:u # 2.04 insn per cycle # 0.08 stalled cycles per insn ( +- 0.02% ) (83.34%) 16,798,017,844 branches:u # 1876.077 M/sec ( +- 0.05% ) (83.33%) 237,777,143 branch-misses:u # 1.42% of all branches ( +- 0.15% ) (83.34%) 8.97077 +- 0.00803 seconds time elapsed ( +- 0.09% ) $ Indeed, about 10% shaved, not bad. Signed-off-by: Andrii Nakryiko <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: Alexei Starovoitov <[email protected]> Cc: Andrii Nakryiko <[email protected]> Cc: Hao Luo <[email protected]> Cc: Oleg Rombakh <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 0258a47 commit 2e719cc

File tree

3 files changed

+148
-107
lines changed

3 files changed

+148
-107
lines changed

btf_encoder.c

Lines changed: 146 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "btf_encoder.h"
1818

1919
#include <ctype.h> /* for isalpha() and isalnum() */
20+
#include <stdlib.h> /* for qsort() and bsearch() */
2021
#include <inttypes.h>
2122

2223
/*
@@ -53,18 +54,18 @@ static bool btf_name_valid(const char *p)
5354
return !*p;
5455
}
5556

56-
static void dump_invalid_symbol(const char *msg, const char *sym, const char *cu,
57+
static void dump_invalid_symbol(const char *msg, const char *sym,
5758
int verbose, bool force)
5859
{
5960
if (force) {
6061
if (verbose)
61-
fprintf(stderr, "PAHOLE: Warning: %s, ignored (sym: '%s', cu: '%s').\n",
62-
msg, sym, cu);
62+
fprintf(stderr, "PAHOLE: Warning: %s, ignored (sym: '%s').\n",
63+
msg, sym);
6364
return;
6465
}
6566

66-
fprintf(stderr, "PAHOLE: Error: %s (sym: '%s', cu: '%s').\n", msg, sym, cu);
67-
fprintf(stderr, "PAHOLE: Error: Use '-j' or '--force' to ignore such symbols and force emit the btf.\n");
67+
fprintf(stderr, "PAHOLE: Error: %s (sym: '%s').\n", msg, sym);
68+
fprintf(stderr, "PAHOLE: Error: Use '--btf_encode_force' to ignore such symbols and force emit the btf.\n");
6869
}
6970

7071
extern struct debug_fmt_ops *dwarves__active_loader;
@@ -202,45 +203,138 @@ int btf_encoder__encode()
202203
{
203204
int err;
204205

206+
if (gobuffer__size(&btfe->percpu_secinfo) != 0)
207+
btf_elf__add_datasec_type(btfe, PERCPU_SECTION, &btfe->percpu_secinfo);
208+
205209
err = btf_elf__encode(btfe, 0);
206210
btf_elf__delete(btfe);
207211
btfe = NULL;
208212

209213
return err;
210214
}
211215

212-
#define HASHADDR__BITS 8
213-
#define HASHADDR__SIZE (1UL << HASHADDR__BITS)
214-
#define hashaddr__fn(key) hash_64(key, HASHADDR__BITS)
216+
#define MAX_PERCPU_VAR_CNT 4096
217+
218+
struct var_info {
219+
uint64_t addr;
220+
uint32_t sz;
221+
const char *name;
222+
};
223+
224+
static struct var_info percpu_vars[MAX_PERCPU_VAR_CNT];
225+
static int percpu_var_cnt;
226+
227+
static int percpu_var_cmp(const void *_a, const void *_b)
228+
{
229+
const struct var_info *a = _a;
230+
const struct var_info *b = _b;
231+
232+
if (a->addr == b->addr)
233+
return 0;
234+
return a->addr < b->addr ? -1 : 1;
235+
}
236+
237+
static bool percpu_var_exists(uint64_t addr, uint32_t *sz, const char **name)
238+
{
239+
const struct var_info *p;
240+
struct var_info key = { .addr = addr };
241+
242+
p = bsearch(&key, percpu_vars, percpu_var_cnt,
243+
sizeof(percpu_vars[0]), percpu_var_cmp);
244+
245+
if (!p)
246+
return false;
247+
248+
*sz = p->sz;
249+
*name = p->name;
250+
return true;
251+
}
215252

216-
static struct variable *hashaddr__find_variable(const struct hlist_head hashtable[],
217-
const uint64_t addr)
253+
static int find_all_percpu_vars(struct btf_elf *btfe)
218254
{
219-
struct variable *variable;
220-
struct hlist_node *pos;
221-
uint16_t bucket = hashaddr__fn(addr);
222-
const struct hlist_head *head = &hashtable[bucket];
223-
224-
hlist_for_each_entry(variable, pos, head, tool_hnode) {
225-
if (variable->ip.addr == addr)
226-
return variable;
255+
uint32_t core_id;
256+
GElf_Sym sym;
257+
258+
/* cache variables' addresses, preparing for searching in symtab. */
259+
percpu_var_cnt = 0;
260+
261+
/* search within symtab for percpu variables */
262+
elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
263+
const char *sym_name;
264+
uint64_t addr;
265+
uint32_t size;
266+
267+
/* compare a symbol's shndx to determine if it's a percpu variable */
268+
if (elf_sym__section(&sym) != btfe->percpu_shndx)
269+
continue;
270+
if (elf_sym__type(&sym) != STT_OBJECT)
271+
continue;
272+
273+
addr = elf_sym__value(&sym);
274+
/*
275+
* Store only those symbols that have allocated space in the percpu section.
276+
* This excludes the following three types of symbols:
277+
*
278+
* 1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
279+
* 2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
280+
* 3. __exitcall(fn), functions which are labeled as exit calls.
281+
*
282+
* In addition, the variables defined using DEFINE_PERCPU_FIRST are
283+
* also not included, which currently includes:
284+
*
285+
* 1. fixed_percpu_data
286+
*/
287+
if (!addr)
288+
continue;
289+
290+
sym_name = elf_sym__name(&sym, btfe->symtab);
291+
if (!btf_name_valid(sym_name)) {
292+
dump_invalid_symbol("Found symbol of invalid name when encoding btf",
293+
sym_name, btf_elf__verbose, btf_elf__force);
294+
if (btf_elf__force)
295+
continue;
296+
return -1;
297+
}
298+
size = elf_sym__size(&sym);
299+
if (!size) {
300+
dump_invalid_symbol("Found symbol of zero size when encoding btf",
301+
sym_name, btf_elf__verbose, btf_elf__force);
302+
if (btf_elf__force)
303+
continue;
304+
return -1;
305+
}
306+
307+
if (btf_elf__verbose)
308+
printf("Found per-CPU symbol '%s' at address 0x%lx\n", sym_name, addr);
309+
310+
if (percpu_var_cnt == MAX_PERCPU_VAR_CNT) {
311+
fprintf(stderr, "Reached the limit of per-CPU variables: %d\n",
312+
MAX_PERCPU_VAR_CNT);
313+
return -1;
314+
}
315+
percpu_vars[percpu_var_cnt].addr = addr;
316+
percpu_vars[percpu_var_cnt].sz = size;
317+
percpu_vars[percpu_var_cnt].name = sym_name;
318+
percpu_var_cnt++;
227319
}
228320

229-
return NULL;
321+
if (percpu_var_cnt)
322+
qsort(percpu_vars, percpu_var_cnt, sizeof(percpu_vars[0]), percpu_var_cmp);
323+
324+
if (btf_elf__verbose)
325+
printf("Found %d per-CPU variables!\n", percpu_var_cnt);
326+
return 0;
230327
}
231328

232329
int cu__encode_btf(struct cu *cu, int verbose, bool force,
233330
bool skip_encoding_vars)
234331
{
235332
uint32_t type_id_off;
236333
uint32_t core_id;
334+
struct variable *var;
237335
struct function *fn;
238336
struct tag *pos;
239337
int err = 0;
240-
struct hlist_head hash_addr[HASHADDR__SIZE];
241-
struct variable *var;
242-
bool has_global_var = false;
243-
GElf_Sym sym;
244338

245339
if (btfe && strcmp(btfe->filename, cu->filename)) {
246340
err = btf_encoder__encode();
@@ -257,6 +351,9 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
257351
if (!btfe)
258352
return -1;
259353

354+
if (!skip_encoding_vars && find_all_percpu_vars(btfe))
355+
goto out;
356+
260357
has_index_type = false;
261358
need_index_type = false;
262359
array_index_id = 0;
@@ -278,6 +375,7 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
278375
}
279376

280377
btf_elf__verbose = verbose;
378+
btf_elf__force = force;
281379
type_id_off = btf__get_nr_types(btfe->btf);
282380

283381
cu__for_each_type(cu, core_id, pos) {
@@ -325,116 +423,62 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
325423
if (verbose)
326424
printf("search cu '%s' for percpu global variables.\n", cu->name);
327425

328-
/* cache variables' addresses, preparing for searching in symtab. */
329-
for (core_id = 0; core_id < HASHADDR__SIZE; ++core_id)
330-
INIT_HLIST_HEAD(&hash_addr[core_id]);
331-
332426
cu__for_each_variable(cu, core_id, pos) {
333-
struct hlist_head *head;
427+
uint32_t size, type, linkage, offset;
428+
const char *name;
429+
uint64_t addr;
430+
int id;
334431

335432
var = tag__variable(pos);
336433
if (var->declaration && !var->spec)
337434
continue;
338435
/* percpu variables are allocated in global space */
339436
if (variable__scope(var) != VSCOPE_GLOBAL && !var->spec)
340437
continue;
341-
has_global_var = true;
342-
head = &hash_addr[hashaddr__fn(var->ip.addr)];
343-
hlist_add_head(&var->tool_hnode, head);
344-
}
345-
if (!has_global_var) {
346-
if (verbose)
347-
printf("cu has no global variable defined, skip.\n");
348-
goto out;
349-
}
350-
351-
/* search within symtab for percpu variables */
352-
elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
353-
uint32_t linkage, type, size, offset;
354-
int32_t btf_var_id, btf_var_secinfo_id;
355-
uint64_t addr;
356-
const char *sym_name;
357-
358-
/* compare a symbol's shndx to determine if it's a percpu variable */
359-
if (elf_sym__section(&sym) != btfe->percpu_shndx)
360-
continue;
361-
if (elf_sym__type(&sym) != STT_OBJECT)
362-
continue;
363438

364-
addr = elf_sym__value(&sym);
365-
/*
366-
* Store only those symbols that have allocated space in the percpu section.
367-
* This excludes the following three types of symbols:
368-
*
369-
* 1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
370-
* 2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
371-
* 3. __exitcall(fn), functions which are labeled as exit calls.
372-
*
373-
* In addition, the variables defined using DEFINE_PERCPU_FIRST are
374-
* also not included, which currently includes:
375-
*
376-
* 1. fixed_percpu_data
377-
*/
378-
if (!addr)
379-
continue;
380-
var = hashaddr__find_variable(hash_addr, addr);
381-
if (var == NULL)
382-
continue;
439+
/* addr has to be recorded before we follow spec */
440+
addr = var->ip.addr;
383441
if (var->spec)
384442
var = var->spec;
385443

386-
sym_name = elf_sym__name(&sym, btfe->symtab);
387-
if (!btf_name_valid(sym_name)) {
388-
dump_invalid_symbol("Found symbol of invalid name when encoding btf",
389-
sym_name, cu->name, verbose, force);
390-
if (force)
391-
continue;
392-
err = -1;
393-
break;
394-
}
395444
if (var->ip.tag.type == 0) {
396-
dump_invalid_symbol("Found symbol of void type when encoding btf",
397-
sym_name, cu->name, verbose, force);
398-
if (force)
399-
continue;
400-
err = -1;
401-
break;
402-
}
403-
type = type_id_off + var->ip.tag.type;
404-
size = elf_sym__size(&sym);
405-
if (!size) {
406-
dump_invalid_symbol("Found symbol of zero size when encoding btf",
407-
sym_name, cu->name, verbose, force);
445+
fprintf(stderr, "error: found variable in CU '%s' that has void type\n",
446+
cu->name);
408447
if (force)
409448
continue;
410449
err = -1;
411450
break;
412451
}
413452

414-
if (verbose)
415-
printf("symbol '%s' of address 0x%lx encoded\n",
416-
sym_name, addr);
453+
type = var->ip.tag.type + type_id_off;
454+
linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
455+
if (!percpu_var_exists(addr, &size, &name))
456+
continue; /* not a per-CPU variable */
457+
458+
if (btf_elf__verbose) {
459+
printf("Variable '%s' from CU '%s' at address 0x%lx encoded\n",
460+
name, cu->name, addr);
461+
}
417462

418463
/* add a BTF_KIND_VAR in btfe->types */
419-
linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
420-
btf_var_id = btf_elf__add_var_type(btfe, type, sym_name, linkage);
421-
if (btf_var_id < 0) {
464+
id = btf_elf__add_var_type(btfe, type, name, linkage);
465+
if (id < 0) {
422466
err = -1;
423-
printf("error: failed to encode variable '%s'\n", sym_name);
467+
fprintf(stderr, "error: failed to encode variable '%s' at addr 0x%lx\n",
468+
name, addr);
424469
break;
425470
}
426471

427472
/*
428473
* add a BTF_VAR_SECINFO in btfe->percpu_secinfo, which will be added into
429474
* btfe->types later when we add BTF_VAR_DATASEC.
430475
*/
431-
type = btf_var_id;
432476
offset = addr - btfe->percpu_base_addr;
433-
btf_var_secinfo_id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo,
434-
type, offset, size);
435-
if (btf_var_secinfo_id < 0) {
477+
id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo, id, offset, size);
478+
if (id < 0) {
436479
err = -1;
437-
printf("error: failed to encode var secinfo '%s'\n", sym_name);
480+
fprintf(stderr, "error: failed to encode section info for variable '%s' at addr 0x%lx\n",
481+
name, addr);
438482
break;
439483
}
440484
}

libbtf.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "elf_symtab.h"
2929

3030
uint8_t btf_elf__verbose;
31+
uint8_t btf_elf__force;
3132

3233
static int btf_var_secinfo_cmp(const void *a, const void *b)
3334
{
@@ -62,7 +63,6 @@ int btf_elf__load(struct btf_elf *btfe)
6263
return 0;
6364
}
6465

65-
6666
struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
6767
{
6868
struct btf_elf *btfe = zalloc(sizeof(*btfe));
@@ -771,10 +771,6 @@ int btf_elf__encode(struct btf_elf *btfe, uint8_t flags)
771771
{
772772
struct btf *btf = btfe->btf;
773773

774-
if (gobuffer__size(&btfe->percpu_secinfo) != 0)
775-
btf_elf__add_datasec_type(btfe, PERCPU_SECTION,
776-
&btfe->percpu_secinfo);
777-
778774
/* Empty file, nothing to do, so... done! */
779775
if (btf__get_nr_types(btf) == 0)
780776
return 0;

libbtf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ struct btf_elf {
3030
};
3131

3232
extern uint8_t btf_elf__verbose;
33+
extern uint8_t btf_elf__force;
3334
#define btf_elf__verbose_log(fmt, ...) { if (btf_elf__verbose) printf(fmt, __VA_ARGS__); }
3435

3536
#define PERCPU_SECTION ".data..percpu"

0 commit comments

Comments
 (0)