diff --git a/scripts/tools/file_size_from_nm.py b/scripts/tools/file_size_from_nm.py index a5d53b26dced19..2b13936a546e85 100755 --- a/scripts/tools/file_size_from_nm.py +++ b/scripts/tools/file_size_from_nm.py @@ -23,6 +23,16 @@ # --max-depth 5 \ # out/nrf-nrf52840dk-light-data-model-enabled/nrfconnect/zephyr/zephyr.elf # +# There are two modes that the script can run over: +# +# - "nm" provides object sizes, without "originating source" information. Grouping is done +# by c++ namespacing and some "ember" rules. +# +# - "objdump" has the ability to find "file names" as symbols are grouped and prefixed +# as a "*ABS* associated names". In this case we try to find the "source paths" +# in the entire "src". We have duplicated file names for which we do not have a +# good way to disambiguate +# # Requires: # click @@ -32,6 +42,7 @@ # plotly import logging +import re import subprocess from dataclasses import dataclass from enum import Enum, auto @@ -65,12 +76,23 @@ class ChartStyle(Enum): } +class FetchStyle(Enum): + NM = auto() + OBJDUMP = auto() + + +__FETCH_STYLES__ = { + "nm": ChartStyle.TREE_MAP, + "objdump": ChartStyle.SUNBURST, +} + + @dataclass class Symbol: name: str symbol_type: str - offset: int size: int + tree_path: list[str] def tree_display_name(name: str) -> list[str]: @@ -243,6 +265,7 @@ def test_tree_display_name(): def build_treemap( name: str, symbols: list[Symbol], + separator: str, style: ChartStyle, max_depth: int, zoom: Optional[str], @@ -263,13 +286,15 @@ def build_treemap( total_sizes: dict = {} for symbol in symbols: - tree_name = tree_display_name(symbol.name) + tree_name = symbol.tree_path if zoom is not None: + if not zoom.startswith(separator): + zoom = separator + zoom partial = "" # try to filter out the tree name. If it contains the zoom item, keep it, otherwise discard while tree_name and partial != zoom: - partial += "::" + tree_name[0] + partial += separator + tree_name[0] tree_name = tree_name[1:] if not tree_name: continue @@ -277,7 +302,7 @@ def build_treemap( if strip is not None: partial = "" for part_name in tree_name: - partial = "::" + part_name + partial = separator + part_name if partial == strip: break if partial == strip: @@ -285,7 +310,10 @@ def build_treemap( partial = "" for name in tree_name[:-1]: - next_value = partial + "::" + name + if not partial: + next_value = name + else: + next_value = partial + separator + name if next_value not in known_parents: known_parents.add(next_value) data["name"].append(next_value) @@ -329,57 +357,167 @@ def build_treemap( fig.show() -@click.command() -@click.option( - "--log-level", - default="INFO", - show_default=True, - type=click.Choice(list(__LOG_LEVELS__.keys()), case_sensitive=False), - help="Determines the verbosity of script output.", -) -@click.option( - "--display-type", - default="TREEMAP", - show_default=True, - type=click.Choice(list(__CHART_STYLES__.keys()), case_sensitive=False), - help="Style of the chart", -) -@click.option( - "--max-depth", - default=4, - show_default=True, - type=int, - help="Display depth by default", -) -@click.option( - "--zoom", - default=None, - help="Zoom in the graph to ONLY the specified path as root (e.g. ::chip::app)", -) -@click.option( - "--strip", - default=None, - help="Strip out a tree subset (e.g. ::C)", -) -@click.argument("elf-file", type=Path) -def main( - log_level, - elf_file: Path, - display_type: str, - max_depth: int, - zoom: Optional[str], - strip: Optional[str], -): - log_fmt = "%(asctime)s %(levelname)-7s %(message)s" - coloredlogs.install(level=__LOG_LEVELS__[log_level], fmt=log_fmt) +def symbols_from_objdump(elf_file: str) -> list[Symbol]: + + sources = {} + SOURCE_RE = re.compile(r'^(.*third_party/connectedhomeip/)?(?P.*\.(cpp|c|asm)$)') + + # First try to figure out `source paths`. Do the "ugly" way and search for all strings that + # seem to match a 'source' + for line in subprocess.check_output(["strings", elf_file]).decode("utf8").split('\n'): + if '/' not in line: + # want directory paths... + continue + m = SOURCE_RE.match(line) + if not m: + continue + + path = m.groupdict()['path'] + + # heuristics: + # - some paths start with relative paths and we remove that + # - remove intermediate ../ + while path.startswith('../'): + path = path[3:] + + parts = [] + for item in path.split('/'): + if item == '..': + parts.pop() + else: + parts.append(item) + + sources[parts[-1]] = parts + items = subprocess.check_output( + [ + "objdump", + "--syms", + "--demangle", + elf_file, + ] + ).decode("utf8") + + # The format looks like: + # + # out/qpg-qpg6105-light/chip-qpg6105-lighting-example.out: file format elf32-little │ + # │ + # SYMBOL TABLE: │ + # 04000010 l d .bl_user_license 00000000 .bl_user_license │ + # 04000800 l d .datajumptable 00000000 .datajumptable │ + # 04000840 l d .flashjumptable 00000000 .flashjumptable │ + # 04001600 l d .m_flashjumptable 00000000 .m_flashjumptable │ + # 04001800 l d .bootloader 00000000 .bootloader │ + # 04003d00 l d .rt_flash 00000000 .rt_flash │ + # 04007000 l d .upgrade_image_user_license 00000000 .upgrade_image_user_license │ + # 04008000 l d .loaded_user_license 00000000 .loaded_user_license │ + # 04008080 l d .extended_user_license 00000000 .extended_user_license │ + # 04008100 l d .isr_vector 00000000 .isr_vector │ + # 040081c4 l d firmware_datafirmwaredata 00000000 firmware_datafirmwaredata + # .... + # 00000000 l df *ABS* 00000000 gpJumpTables_DataTable.c │ + # 04080384 l .text 00000000 $t │ + # 0408038c l .text 00000000 $d │ + # 04000800 l .datajumptable 00000000 $d │ + # 00000000 l df *ABS* 00000000 gpJumpTables_RomLib_FlashJump_gcc.o │ + # .... + # 00000000 l df *ABS* 00000000 ember-io-storage.cpp │ + # 04012106 l .text 00000000 $t │ + # 04012122 l .text 00000000 $d │ + # 0401212a l .text 00000000 $t │ + # 04012136 l .text 00000000 $d │ + # 2003aa70 l .data 00000000 $d │ + # 200417a0 l .bss 00000000 $d │ + # 04012167 l .text 00000000 $d │ + # 04012168 l .text 00000000 $t + # ... + # 200417a0 g O .bss 00000103 chip::app::Compatibility::Internal::attributeIOBuffer + # 04012107 g F .text 0000008a chip::app::Compatibility::Internal::AttributeBaseType(unsigned char) + # + # Documentation at https://sourceware.org/binutils/docs/binutils/objdump.html + # + # Format is: + # - Address + # - set of character and spaces for flags + # - section (or *ABS* or *UND*) + # - alignment or size (common symbos: alignment, otherwise size) + # - Symbol name + # Flags are: + # - l,g,u,! => local,global,unique global, none (space) or both local and global (!) + # - w - weak (space is strong) + # - C - constructor + # - W - warning + # - I/i - indirect reference/evaluated during relocation processing + # - D/d - debugging symbol/dynamic debugging symbol + # - F/f/O - name of a function, or a file (F) or an object (O) + + # Logic generally is: + # - can capture segment (.text, .data, .bss seem interesting) + # - file information exists (... df *ABS* of 0 size), however pointers inside + # if may be slightly off - we need to track these as .text seem to potentially be aligned + # - symbols are have size + + LINE_RE = re.compile(r'^(?P[0-9a-f]{8})\s(?P.{7})\s+(?P
\S+)\s+(?P\S+)\s*(?P.*)$') + current_file_name = None + + offset_file_map = {} + symbols = [] + unknown_file_names = set() + + for line in items.split("\n"): + line = line.strip() + + m = LINE_RE.match(line) + if not m: + continue + + captures = m.groupdict() + size = int(captures['size'], 16) + offset = int(captures['offset'], 16) + if captures['flags'].endswith('df') and captures['section'] == '*ABS*' and size == 0: + current_file_name = captures['name'] + continue + + if size == 0: + if current_file_name: + offset_file_map[offset] = current_file_name + continue + + # find the offset in a file. Either exact or a bit above + symbol_file_name = current_file_name + if not symbol_file_name: + for delta in range(8): + if offset - delta in offset_file_map: + symbol_file_name = offset_file_map[offset - delta] + + if symbol_file_name not in sources: + if symbol_file_name not in unknown_file_names: + logging.warning('Source %r is not known', symbol_file_name) + unknown_file_names.add(symbol_file_name) + path = [captures['section'], 'UNKNOWN', symbol_file_name, captures['name']] + else: + path = [captures['section']] + sources[symbol_file_name] + [captures['name']] + + s = Symbol( + name=captures['name'], + symbol_type=captures['section'], + size=size, + tree_path=path, + ) + + symbols.append(s) + + return symbols + + +def symbols_from_nm(elf_file: str) -> list[Symbol]: items = subprocess.check_output( [ "nm", "--print-size", "--size-sort", # Filters out empty entries "--radix=d", - elf_file.absolute().as_posix(), + elf_file, ] ).decode("utf8") @@ -390,10 +528,9 @@ def main( for line in items.split("\n"): if not line.strip(): continue - offset, size, t, name = line.split(" ") + _, size, t, name = line.split(" ") size = int(size, 10) - offset = int(offset, 10) if t in { # Text section @@ -413,7 +550,7 @@ def main( "V", }: logging.debug("Found %s of size %d", name, size) - symbols.append(Symbol(name=name, symbol_type=t, offset=offset, size=size)) + symbols.append(Symbol(name=name, symbol_type=t, size=size, tree_path=tree_display_name(name))) elif t in { # BSS - 0-initialized, not code "b", @@ -423,8 +560,70 @@ def main( else: logging.error("SKIPPING SECTION %s", t) + return symbols + + +@click.command() +@click.option( + "--log-level", + default="INFO", + show_default=True, + type=click.Choice(list(__LOG_LEVELS__.keys()), case_sensitive=False), + help="Determines the verbosity of script output.", +) +@click.option( + "--display-type", + default="TREEMAP", + show_default=True, + type=click.Choice(list(__CHART_STYLES__.keys()), case_sensitive=False), + help="Style of the chart", +) +@click.option( + "--fetch-via", + default="nm", + show_default=True, + type=click.Choice(list(__FETCH_STYLES__.keys()), case_sensitive=False), + help="How to read the binary symbols", +) +@click.option( + "--max-depth", + default=4, + show_default=True, + type=int, + help="Display depth by default", +) +@click.option( + "--zoom", + default=None, + help="Zoom in the graph to ONLY the specified path as root (e.g. ::chip::app)", +) +@click.option( + "--strip", + default=None, + help="Strip out a tree subset (e.g. ::C)", +) +@click.argument("elf-file", type=Path) +def main( + log_level, + elf_file: Path, + display_type: str, + fetch_via: str, + max_depth: int, + zoom: Optional[str], + strip: Optional[str], +): + log_fmt = "%(asctime)s %(levelname)-7s %(message)s" + coloredlogs.install(level=__LOG_LEVELS__[log_level], fmt=log_fmt) + + if __FETCH_STYLES__[fetch_via] == FetchStyle.NM: + symbols = symbols_from_nm(elf_file.absolute().as_posix()) + separator = "::" + else: + symbols = symbols_from_objdump(elf_file.absolute().as_posix()) + separator = "/" + build_treemap( - elf_file.name, symbols, __CHART_STYLES__[display_type], max_depth, zoom, strip + elf_file.name, symbols, separator, __CHART_STYLES__[display_type], max_depth, zoom, strip )