Skip to content

Commit 6778826

Browse files
committed
Add option to analyze duplicates between two symbols
1 parent be2e80e commit 6778826

File tree

1 file changed

+40
-17
lines changed

1 file changed

+40
-17
lines changed

analytics/duplicates_analyze.py

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,23 @@
44
import os
55
import sys
66

7-
def get_defined_symbols(fname: str) -> Dict[str, int]:
7+
8+
def get_defined_symbols(fname: str, verbose: bool = False) -> Dict[str, int]:
9+
if verbose:
10+
print(f"Processing {fname}...", end='', flush=True)
811
if sys.platform == 'darwin':
912
lines = check_output(['nm', '--defined-only', '-n', fname]).decode('ascii').split("\n")[:-1]
1013
rc = {}
1114
for idx, line in enumerate(lines):
12-
addr, stype, name = line.split(" ")
13-
size = 4 if idx + 1 == len(lines) else (int(lines[idx+1].split(" ")[0], 16) - int(addr, 16))
15+
addr, stype, name = line.split(' ')
16+
size = 4 if idx + 1 == len(lines) else (int(lines[idx + 1].split(' ')[0], 16) - int(addr, 16))
1417
rc[name] = size
15-
return rc
16-
lines = check_output(['nm', '--print-size', '--defined-only', fname]).decode('ascii').split("\n")
17-
return {e[3]:int(e[1], 16) for e in [l.split() for l in lines] if len(e) == 4}
18+
else:
19+
lines = check_output(['nm', '--print-size', '--defined-only', fname]).decode('ascii').split('\n')
20+
rc = {e[3]: int(e[1], 16) for e in [line.split() for line in lines] if len(e) == 4}
21+
if verbose:
22+
print("done")
23+
return rc
1824

1925

2026
def get_deps(fname: str) -> List[str]:
@@ -24,12 +30,13 @@ def get_deps(fname: str) -> List[str]:
2430
for idx, line in enumerate(lines):
2531
if line.strip() != 'cmd LC_LOAD_DYLIB':
2632
continue
27-
path = lines[idx+2].strip()
33+
path = lines[idx + 2].strip()
2834
assert path.startswith('name')
29-
rc.append(os.path.basename(path.split(" ")[1]))
35+
rc.append(os.path.basename(path.split(' ')[1]))
3036
return rc
31-
lines = check_output(['readelf', '--dynamic', fname]).decode('ascii').split("\n")
32-
return [line.split("[")[1][:-1] for line in lines if '(NEEDED)' in line]
37+
lines = check_output(['readelf', '--dynamic', fname]).decode('ascii').split('\n')
38+
return [line.split('[')[1][:-1] for line in lines if '(NEEDED)' in line]
39+
3340

3441
def humansize(size):
3542
if size < 1024:
@@ -40,12 +47,11 @@ def humansize(size):
4047
return f"{size/(1024.0**2):.2f} Mb"
4148
return f"{size/(1024.0**3):.2f} Gb"
4249

50+
4351
def print_sizes(libname, depth: int = 2) -> None:
4452
libs = [libname]
4553
depth = 2
46-
print(f"Processing {libname}...", end='', flush=True)
47-
symbols = {os.path.basename(libname): get_defined_symbols(libname)}
48-
print("done")
54+
symbols = {os.path.basename(libname): get_defined_symbols(libname, verbose=True)}
4955
for _ in range(depth):
5056
for lib in libs:
5157
dirname = os.path.dirname(lib)
@@ -55,9 +61,7 @@ def print_sizes(libname, depth: int = 2) -> None:
5561
continue
5662
if path not in libs:
5763
libs.append(path)
58-
print(f"Processing {path}...", end='', flush=True)
59-
symbols[dep] = get_defined_symbols(path)
60-
print("done")
64+
symbols[dep] = get_defined_symbols(path, verbose=True)
6165

6266
for lib in libs:
6367
lib_symbols = symbols[os.path.basename(lib)]
@@ -72,5 +76,24 @@ def print_sizes(libname, depth: int = 2) -> None:
7276
rc += f" {dep} overlap is {humansize(overlap_size)}"
7377
print(rc)
7478

79+
80+
def print_symbols_overlap(libname1: str, libname2: str) -> None:
81+
sym1 = get_defined_symbols(libname1, verbose=True)
82+
sym2 = get_defined_symbols(libname2, verbose=True)
83+
sym1_size = sum(sym1.values())
84+
sym2_size = sum(sym2.values())
85+
sym_overlap = set(sym1.keys()).intersection(set(sym2.keys()))
86+
overlap_size = sum(sym1[s] for s in sym_overlap)
87+
if overlap_size == 0:
88+
print(f"{libname1} symbols size {humansize(sym1_size)} does not overlap with {libname2}")
89+
return
90+
print(f"{libname1} symbols size {humansize(sym1_size)} overlap {humansize(overlap_size)} ({100.0 * overlap_size/sym1_size :.2f}%)")
91+
for sym in sym_overlap:
92+
print(sym)
93+
94+
7595
if __name__ == '__main__':
76-
print_sizes(sys.argv[1] if len(sys.argv)>1 else "lib/libtorch_cuda.so")
96+
if len(sys.argv) == 3:
97+
print_symbols_overlap(sys.argv[1], sys.argv[2])
98+
else:
99+
print_sizes(sys.argv[1] if len(sys.argv) > 1 else "lib/libtorch_cuda.so")

0 commit comments

Comments
 (0)