From ce82cbb0e2caa9bc2d947f6b6cb980eda14bb4fa Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 08:27:03 +0300 Subject: [PATCH 01/17] First crude version of a disassembler Pass bytes from a hexdump in as command line arguments, eg: micropython -m tools.disassemble 401f 0040 (If the byte sequence is not quoted, all args are joined together into a single byte sequence. Spaces are allowed and will be ignored) --- tools/disassemble.py | 127 +++++++++++++++++++++++++++++++++++++++++++ tools/esp32_ulp | 1 + 2 files changed, 128 insertions(+) create mode 100644 tools/disassemble.py create mode 120000 tools/esp32_ulp diff --git a/tools/disassemble.py b/tools/disassemble.py new file mode 100644 index 0000000..d67c86f --- /dev/null +++ b/tools/disassemble.py @@ -0,0 +1,127 @@ +import esp32_ulp.opcodes as opcodes +import ubinascii +import sys + + +def decode_instruction(i): + ins = opcodes._end + ins.all = i # abuse a struct to get opcode and sub_opcode + + print(ubinascii.hexlify(i.to_bytes(4, 'little'))) + + if ins.opcode == opcodes.OPCODE_ADC: + print('OPCODE_ADC') + opcodes._adc.all = i + ins = opcodes._adc + elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT: + print('OPCODE_ALU / SUB_OPCODE_ALU_CNT') + opcodes._alu_cnt.all = i + ins = opcodes._alu_cnt + elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_IMM: + print('OPCODE_ALU / SUB_OPCODE_ALU_IMM') + opcodes._alu_imm.all = i + ins = opcodes._alu_imm + elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_REG: + print('OPCODE_ALU / SUB_OPCODE_ALU_REG') + opcodes._alu_reg.all = i + ins = opcodes._alu_reg + elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BX: + print('JUMP') + opcodes._bx.all = i + ins = opcodes._bx + elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BR: + print('JUMPR') + opcodes._br.all = i + ins = opcodes._br + elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BS: + print('JUMPS') + opcodes._bs.all = i + ins = opcodes._bs + elif ins.opcode == opcodes.OPCODE_DELAY: + print('OPCODE_DELAY') + opcodes._delay.all = i + ins = opcodes._delay + elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_END: + print('OPCODE_END') + opcodes._end.all = i + ins = opcodes._end + elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_SLEEP: + print('OPCODE_SLEEP') + opcodes._sleep.all = i + ins = opcodes._sleep + elif ins.opcode == opcodes.OPCODE_HALT: + print('OPCODE_HALT') + opcodes._halt.all = i + ins = opcodes._halt + elif ins.opcode == opcodes.OPCODE_I2C: + print('OPCODE_I2C') + opcodes._i2c.all = i + ins = opcodes._i2c + elif ins.opcode == opcodes.OPCODE_LD: + print('OPCODE_LD') + opcodes._ld.all = i + ins = opcodes._ld + elif ins.opcode == opcodes.OPCODE_RD_REG: + print('OPCODE_RD_REG') + opcodes._rd_reg.all = i + ins = opcodes._rd_reg + elif ins.opcode == opcodes.OPCODE_ST: + print('OPCODE_ST') + opcodes._st.all = i + ins = opcodes._st + elif ins.opcode == opcodes.OPCODE_TSENS: + print('OPCODE_TSENS') + opcodes._tsens.all = i + ins = opcodes._tsens + elif ins.opcode == opcodes.OPCODE_WR_REG: + print('OPCODE_WR_REG') + opcodes._wr_reg.all = i + ins = opcodes._wr_reg + + possible_fields = ( + 'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg', + 'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode', + 'periph_sel', 'reg', 'rw', 'sar_sel', 'sel', 'sign', 'sreg', + 'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1', + 'unused2', 'wakeup' + ) + for field in possible_fields: + try: + # eval is ugly but constrained to possible_fields and variable ins + val = eval('i.%s' % field, {}, {'i': ins}) + except KeyError: + continue + extra = '' + if field == 'sel': + if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT: + alu_ops = ('INC', 'DEC', 'RST') + else: + alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOV', 'LSH', 'RSH') + extra = ' (%s)' % alu_ops[val] + elif field == 'cmp': + cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT') + extra = ' (%s)' % cmp_ops[val] + print(" {:10} = {:3}{}".format(field, val, extra)) + + +def disassemble_manually(byte_sequence_string): + sequence = byte_sequence_string.strip().replace(' ','') + chars_per_instruction = 8 + list = [ + sequence[i:i+chars_per_instruction] + for i in range(0, len(sequence), chars_per_instruction) + ] + + for instruction in list: + byte_sequence = ubinascii.unhexlify(instruction.replace(' ','')) + i = int.from_bytes(byte_sequence, 'little') + decode_instruction(i) + + +def handle_cmdline(params): + byte_sequence = "".join(params) + disassemble_manually(byte_sequence) + + +if sys.argv: # if run from cmdline + handle_cmdline(sys.argv[1:]) diff --git a/tools/esp32_ulp b/tools/esp32_ulp new file mode 120000 index 0000000..0bc67d4 --- /dev/null +++ b/tools/esp32_ulp @@ -0,0 +1 @@ +../esp32_ulp \ No newline at end of file From 4c834e3dabad5d388a8581947dbc485568969d08 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 08:38:32 +0300 Subject: [PATCH 02/17] Replace crude disassembly with lookup table In this approach, each opcode has its own decoding (using the correct struct for each opcode). Each opcode (or opcode+subopcode) also has its own rendering function. The lookup table is hierarchical so the same structure used for opcodes is also used within opcodes for looking up subopcodes. --- tools/disassemble.py | 184 ++++++++++++++++++++++++++----------------- 1 file changed, 111 insertions(+), 73 deletions(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index d67c86f..fa55148 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -1,82 +1,122 @@ +from esp32_ulp.opcodes import RD_REG_PERIPH_RTC_CNTL, RD_REG_PERIPH_RTC_IO, RD_REG_PERIPH_RTC_I2C, \ + RD_REG_PERIPH_SENS, DR_REG_MAX_DIRECT import esp32_ulp.opcodes as opcodes +import esp32_ulp.soc as soc import ubinascii import sys +alu_cnt_ops = ('STAGE_INC', 'STAGE_DEC', 'STAGE_RST') +alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOVE', 'LSH', 'RSH') +jump_types = ('--', 'EQ', 'OV') +cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT') + +lookup = { + opcodes.OPCODE_ADC: ('ADC', opcodes._adc, lambda op: 'ADC r%s, %s, %s' % (op.dreg, op.mux, op.sar_sel)), + opcodes.OPCODE_ALU: ('ALU', opcodes._alu_imm, { + opcodes.SUB_OPCODE_ALU_CNT: ( + 'ALU_CNT', + opcodes._alu_cnt, + lambda op: '%s%s' % (alu_cnt_ops[op.sel], '' if op.sel == opcodes.ALU_SEL_RST else ' %s' % op.imm) + ), + opcodes.SUB_OPCODE_ALU_IMM: ( + 'ALU_IMM', + opcodes._alu_imm, + lambda op: '%s r%s, %s' % (alu_ops[op.sel], op.dreg, op.imm) if op.sel == opcodes.ALU_SEL_MOV + else '%s r%s, r%s, %s' % (alu_ops[op.sel], op.dreg, op.sreg, op.imm) + ), + opcodes.SUB_OPCODE_ALU_REG: ( + 'ALU_REG', + opcodes._alu_reg, + lambda op: '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg) + ), + }), + opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, { + opcodes.SUB_OPCODE_BX: ( + 'BX', + opcodes._bx, + lambda op: 'JUMP %s%s' % (op.addr if op.reg == 0 else 'r%s' % op.dreg, ', %s' % jump_types[op.type] + if op.type != 0 else '') + ), + opcodes.SUB_OPCODE_BR: ( + 'BR', + opcodes._br, + lambda op: 'JUMPR %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp]) + ), + opcodes.SUB_OPCODE_BS: ( + 'BS', + opcodes._bs, + lambda op: 'JUMPS %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp]) + ), + }), + opcodes.OPCODE_DELAY: ( + 'DELAY', + opcodes._delay, + lambda op: 'NOP' if op.cycles == 0 else 'WAIT %s' % op.cycles + ), + opcodes.OPCODE_END: ('END', opcodes._end, { + opcodes.SUB_OPCODE_END: ( + 'WAKE', + opcodes._end + ), + opcodes.SUB_OPCODE_SLEEP: ( + 'SLEEP', + opcodes._sleep, + lambda op: 'SLEEP %s' % op.cycle_sel + ), + }), + opcodes.OPCODE_HALT: ('HALT', opcodes._halt), + opcodes.OPCODE_I2C: ( + 'I2C', + opcodes._i2c, + lambda op: 'I2C_%s %s, %s, %s, %s' % ('RD' if op.rw == 0 else 'WR', op.sub_addr, op.high, op.low, op.i2c_sel) + ), + opcodes.OPCODE_LD: ('LD', opcodes._ld, lambda op: 'LD r%s, r%s, %s' % (op.dreg, op.sreg, op.offset)), + opcodes.OPCODE_ST: ('ST', opcodes._st, lambda op: 'ST r%s, r%s, %s' % (op.sreg, op.dreg, op.offset)), + opcodes.OPCODE_RD_REG: ( + 'RD_REG', + opcodes._rd_reg, + lambda op: 'REG_RD 0x%x, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low) + ), + opcodes.OPCODE_WR_REG: ( + 'WR_REG', + opcodes._wr_reg, + lambda op: 'REG_WR 0x%x, %s, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low, op.data) + ), + opcodes.OPCODE_TSENS: ('TSENS', opcodes._tsens, lambda op: 'TSENS r%s, %s' % (op.dreg, op.delay)), +} + + def decode_instruction(i): ins = opcodes._end - ins.all = i # abuse a struct to get opcode and sub_opcode + ins.all = i # abuse a struct to get opcode print(ubinascii.hexlify(i.to_bytes(4, 'little'))) - if ins.opcode == opcodes.OPCODE_ADC: - print('OPCODE_ADC') - opcodes._adc.all = i - ins = opcodes._adc - elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT: - print('OPCODE_ALU / SUB_OPCODE_ALU_CNT') - opcodes._alu_cnt.all = i - ins = opcodes._alu_cnt - elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_IMM: - print('OPCODE_ALU / SUB_OPCODE_ALU_IMM') - opcodes._alu_imm.all = i - ins = opcodes._alu_imm - elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_REG: - print('OPCODE_ALU / SUB_OPCODE_ALU_REG') - opcodes._alu_reg.all = i - ins = opcodes._alu_reg - elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BX: - print('JUMP') - opcodes._bx.all = i - ins = opcodes._bx - elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BR: - print('JUMPR') - opcodes._br.all = i - ins = opcodes._br - elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BS: - print('JUMPS') - opcodes._bs.all = i - ins = opcodes._bs - elif ins.opcode == opcodes.OPCODE_DELAY: - print('OPCODE_DELAY') - opcodes._delay.all = i - ins = opcodes._delay - elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_END: - print('OPCODE_END') - opcodes._end.all = i - ins = opcodes._end - elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_SLEEP: - print('OPCODE_SLEEP') - opcodes._sleep.all = i - ins = opcodes._sleep - elif ins.opcode == opcodes.OPCODE_HALT: - print('OPCODE_HALT') - opcodes._halt.all = i - ins = opcodes._halt - elif ins.opcode == opcodes.OPCODE_I2C: - print('OPCODE_I2C') - opcodes._i2c.all = i - ins = opcodes._i2c - elif ins.opcode == opcodes.OPCODE_LD: - print('OPCODE_LD') - opcodes._ld.all = i - ins = opcodes._ld - elif ins.opcode == opcodes.OPCODE_RD_REG: - print('OPCODE_RD_REG') - opcodes._rd_reg.all = i - ins = opcodes._rd_reg - elif ins.opcode == opcodes.OPCODE_ST: - print('OPCODE_ST') - opcodes._st.all = i - ins = opcodes._st - elif ins.opcode == opcodes.OPCODE_TSENS: - print('OPCODE_TSENS') - opcodes._tsens.all = i - ins = opcodes._tsens - elif ins.opcode == opcodes.OPCODE_WR_REG: - print('OPCODE_WR_REG') - opcodes._wr_reg.all = i - ins = opcodes._wr_reg + params = lookup.get(ins.opcode, None) + + if not params: + print('Unknown instruction') + return + + if len(params) == 3: + name, ins, third = params + ins.all = i + + if callable(third): + params = (third(ins), ins) + else: + params = third.get(ins.sub_opcode, ()) + + if len(params) == 3: + name, ins, pretty = params + ins.all = i + name = pretty(ins) + else: + name, ins = params + ins.all = i + + print(name) possible_fields = ( 'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg', @@ -94,12 +134,10 @@ def decode_instruction(i): extra = '' if field == 'sel': if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT: - alu_ops = ('INC', 'DEC', 'RST') + extra = ' (%s)' % alu_cnt_ops[val] else: - alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOV', 'LSH', 'RSH') - extra = ' (%s)' % alu_ops[val] + extra = ' (%s)' % alu_ops[val] elif field == 'cmp': - cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT') extra = ' (%s)' % cmp_ops[val] print(" {:10} = {:3}{}".format(field, val, extra)) From 8325c2bddc8573715649a5b2f9ca12e9d68d296c Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 08:47:16 +0300 Subject: [PATCH 03/17] Add command line handling, implementing help (-h) --- tools/disassemble.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index fa55148..fc35bf0 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -156,8 +156,27 @@ def disassemble_manually(byte_sequence_string): decode_instruction(i) +def print_help(): + print('Usage: disassemble.py [] ') + print('') + print('Options:') + print(' -h Show this help text') + print(' Sequence of hex bytes (8 per instruction)') + pass + + def handle_cmdline(params): - byte_sequence = "".join(params) + byte_sequence = '' + + while params: + if params[0] == '-h': + print_help() + sys.exit(0) + else: + byte_sequence += params[0] + + params = params[1:] # remove first param from list + disassemble_manually(byte_sequence) From e4b34e2a6ae917e280a3c39525a50fe3cde0ecd5 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 19:58:34 +0300 Subject: [PATCH 04/17] Tease apart decoding of instruction and printing. Add unit tests. --- tests/00_unit_tests.sh | 2 +- tests/disassemble.py | 138 +++++++++++++++++++++++++++++++++++++++++ tests/tools | 1 + tools/disassemble.py | 28 ++++++--- 4 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 tests/disassemble.py create mode 120000 tests/tools diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh index ee1a239..b56fd3c 100755 --- a/tests/00_unit_tests.sh +++ b/tests/00_unit_tests.sh @@ -4,7 +4,7 @@ set -e -for file in opcodes assemble link util preprocess definesdb; do +for file in opcodes assemble link util preprocess definesdb disassemble; do echo testing $file... micropython $file.py done diff --git a/tests/disassemble.py b/tests/disassemble.py new file mode 100644 index 0000000..5cd0fc6 --- /dev/null +++ b/tests/disassemble.py @@ -0,0 +1,138 @@ +from tools.disassemble import decode_instruction +import esp32_ulp.opcodes as opcodes +import ubinascii + +tests = [] + + +def test(param): + tests.append(param) + + +def hex_to_int(sequence): + byte_sequence = ubinascii.unhexlify(sequence) + return int.from_bytes(byte_sequence, 'little') + + +def assert_decode(sequence, expected_struct, expected_name): + i = hex_to_int(sequence) + + ins, name = decode_instruction(i) + + assert ins is expected_struct, 'incorrect instruction struct' + assert name == expected_name, '%s != %s' % (name, expected_name) + + +def assert_decode_exception(sequence, expected_message): + i = hex_to_int(sequence) + + try: + decode_instruction(i) + except Exception as e: + assert str(e) == expected_message, str(e) + raised = True + else: + raised = False + + assert raised, 'Exception not raised' + + +@test +def test_unknown_instruction(): + assert_decode_exception("10000001", 'Unknown instruction') + + +# All hex sequences were generated using our assembler. +# Note: disassembled instructions always show field values according +# to what is actually encoded into the binary instruction, not as per +# original assembly code. +# For example in JUMP instructions in the source code one would +# specify jump offsets in bytes (e.g. 4 bytes) but in the actual +# instruction offset encoded in the binary instruction will be in +# words (1 word = 4 bytes). +# The disassembled instructions would therefore show as "JUMP 1" +# for what was originally "JUMP 4" in the source code.@test +@test +def test_all_instructions(): + # OPCODE_WR_REG = 1 + assert_decode("00000010", opcodes._wr_reg, 'REG_WR 0x0, 0, 0, 0') + + # OPCODE_RD_REG = 2 + assert_decode("00000020", opcodes._rd_reg, 'REG_RD 0x0, 0, 0') + + # OPCODE_I2C = 3 + assert_decode("00000030", opcodes._i2c, 'I2C_RD 0, 0, 0, 0') + assert_decode("00000038", opcodes._i2c, 'I2C_WR 0, 0, 0, 0') + + # OPCODE_DELAY = 4 + assert_decode("00000040", opcodes._delay, 'NOP') + assert_decode("01000040", opcodes._delay, 'WAIT 1') + + # OPCODE_ADC = 5 + assert_decode("00000050", opcodes._adc, 'ADC r0, 0, 0') + + # OPCODE_ST = 6 + assert_decode("00000068", opcodes._st, 'ST r0, r0, 0') + + # OPCODE_ALU = 7, SUB_OPCODE_ALU_REG + assert_decode("00000070", opcodes._alu_reg, 'ADD r0, r0, r0') + assert_decode("00002070", opcodes._alu_reg, 'SUB r0, r0, r0') + assert_decode("00004070", opcodes._alu_reg, 'AND r0, r0, r0') + assert_decode("00006070", opcodes._alu_reg, 'OR r0, r0, r0') + assert_decode("00008070", opcodes._alu_reg, "MOVE r0, r0") + assert_decode("0000a070", opcodes._alu_reg, 'LSH r0, r0, r0') + assert_decode("0000c070", opcodes._alu_reg, 'RSH r0, r0, r0') + + # OPCODE_ALU = 7, SUB_OPCODE_ALU_IMM + assert_decode("00000072", opcodes._alu_imm, 'ADD r0, r0, 0') + assert_decode("00002072", opcodes._alu_imm, 'SUB r0, r0, 0') + assert_decode("00004072", opcodes._alu_imm, 'AND r0, r0, 0') + assert_decode("00006072", opcodes._alu_imm, 'OR r0, r0, 0') + assert_decode("00008072", opcodes._alu_imm, "MOVE r0, 0") + assert_decode("0000a072", opcodes._alu_imm, 'LSH r0, r0, 0') + assert_decode("0000c072", opcodes._alu_imm, 'RSH r0, r0, 0') + + # OPCODE_ALU = 7, SUB_OPCODE_ALU_CNT + assert_decode("00004074", opcodes._alu_cnt, 'STAGE_RST') + assert_decode("00000074", opcodes._alu_cnt, 'STAGE_INC 0') + assert_decode("00002074", opcodes._alu_cnt, 'STAGE_DEC 0') + + # OPCODE_BRANCH = 8, SUB_OPCODE_BX (IMM) + assert_decode("00000080", opcodes._bx, 'JUMP 0') + assert_decode("00004080", opcodes._bx, 'JUMP 0, EQ') + assert_decode("00008080", opcodes._bx, 'JUMP 0, OV') + + # OPCODE_BRANCH = 8, SUB_OPCODE_BX (REG) + assert_decode("00002080", opcodes._bx, 'JUMP r0') + assert_decode("00006080", opcodes._bx, 'JUMP r0, EQ') + assert_decode("0000a080", opcodes._bx, 'JUMP r0, OV') + + # OPCODE_BRANCH = 8, SUB_OPCODE_BR + assert_decode("00000082", opcodes._br, 'JUMPR 0, 0, LT') + assert_decode("00000182", opcodes._br, 'JUMPR 0, 0, GE') + + # OPCODE_BRANCH = 8, SUB_OPCODE_BX + assert_decode("00000084", opcodes._bs, 'JUMPS 0, 0, LT') + assert_decode("00800084", opcodes._bs, 'JUMPS 0, 0, GE') + assert_decode("00000184", opcodes._bs, 'JUMPS 0, 0, LE') + + # OPCODE_END = 9, SUB_OPCODE_END + assert_decode("01000090", opcodes._end, 'WAKE') + + # OPCODE_END = 9, SUB_OPCODE_SLEEP + assert_decode("00000092", opcodes._sleep, 'SLEEP 0') + + # OPCODE_TSENS = 10 + assert_decode("000000a0", opcodes._tsens, 'TSENS r0, 0') + + # OPCODE_HALT = 11 + assert_decode("000000b0", opcodes._halt, 'HALT') + + # OPCODE_LD = 13 + assert_decode("000000d0", opcodes._ld, 'LD r0, r0, 0') + + +if __name__ == '__main__': + # run all methods marked with @test + for t in tests: + t() diff --git a/tests/tools b/tests/tools new file mode 120000 index 0000000..4887d6e --- /dev/null +++ b/tests/tools @@ -0,0 +1 @@ +../tools \ No newline at end of file diff --git a/tools/disassemble.py b/tools/disassemble.py index fc35bf0..184b128 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -28,7 +28,8 @@ opcodes.SUB_OPCODE_ALU_REG: ( 'ALU_REG', opcodes._alu_reg, - lambda op: '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg) + lambda op: '%s r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg) if op.sel == opcodes.ALU_SEL_MOV + else '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg) ), }), opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, { @@ -91,13 +92,10 @@ def decode_instruction(i): ins = opcodes._end ins.all = i # abuse a struct to get opcode - print(ubinascii.hexlify(i.to_bytes(4, 'little'))) - params = lookup.get(ins.opcode, None) if not params: - print('Unknown instruction') - return + raise Exception('Unknown instruction') if len(params) == 3: name, ins, third = params @@ -116,6 +114,18 @@ def decode_instruction(i): name, ins = params ins.all = i + return ins, name + + +def decode_instruction_and_print(i): + print(ubinascii.hexlify(i.to_bytes(4, 'little'))) + + try: + ins, name = decode_instruction(i) + except Exception as e: + print(e) + return + print(name) possible_fields = ( @@ -132,12 +142,14 @@ def decode_instruction(i): except KeyError: continue extra = '' - if field == 'sel': + if field == 'sel': # ALU if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT: extra = ' (%s)' % alu_cnt_ops[val] else: extra = ' (%s)' % alu_ops[val] - elif field == 'cmp': + elif field == 'type': # JUMP + extra = ' (%s)' % jump_types[val] + elif field == 'cmp': # JUMPR/JUMPS extra = ' (%s)' % cmp_ops[val] print(" {:10} = {:3}{}".format(field, val, extra)) @@ -153,7 +165,7 @@ def disassemble_manually(byte_sequence_string): for instruction in list: byte_sequence = ubinascii.unhexlify(instruction.replace(' ','')) i = int.from_bytes(byte_sequence, 'little') - decode_instruction(i) + decode_instruction_and_print(i) def print_help(): From 2ebaacc540ecdb128cc6e396d4a1fed39c0434b7 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 20:00:07 +0300 Subject: [PATCH 05/17] Allow choosing which unit tests to run Useful for running just one unit test file instead of all. Now one can pass the name of a unit test (or a list of names) to the 00_unit_tests.sh script. Example: cd tests ./00_unit_tests.sh disassemble # run only disassemble.py The default (if nothing is passed the script) is still to run all tests as before. --- tests/00_unit_tests.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh index b56fd3c..1dc05e9 100755 --- a/tests/00_unit_tests.sh +++ b/tests/00_unit_tests.sh @@ -4,7 +4,9 @@ set -e -for file in opcodes assemble link util preprocess definesdb disassemble; do +LIST=${1:-opcodes assemble link util preprocess definesdb disassemble} + +for file in $LIST; do echo testing $file... micropython $file.py done From 278bbf016534a11fad3b07b44e97f656c5b64459 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 19:35:07 +0300 Subject: [PATCH 06/17] Add unit tests for field level output --- tests/disassemble.py | 444 ++++++++++++++++++++++++++++++++++++++++++- tools/disassemble.py | 31 +-- 2 files changed, 463 insertions(+), 12 deletions(-) diff --git a/tests/disassemble.py b/tests/disassemble.py index 5cd0fc6..012fc14 100644 --- a/tests/disassemble.py +++ b/tests/disassemble.py @@ -1,4 +1,4 @@ -from tools.disassemble import decode_instruction +from tools.disassemble import decode_instruction, get_instruction_fields import esp32_ulp.opcodes as opcodes import ubinascii @@ -37,6 +37,16 @@ def assert_decode_exception(sequence, expected_message): assert raised, 'Exception not raised' +def assert_decode_fields(sequence, expected_field_details): + i = hex_to_int(sequence) + + ins, _ = decode_instruction(i) + + actual_field_details = get_instruction_fields(ins) + + assert actual_field_details == expected_field_details, '\n- %s \n+ %s' % (actual_field_details, expected_field_details) + + @test def test_unknown_instruction(): assert_decode_exception("10000001", 'Unknown instruction') @@ -132,6 +142,438 @@ def test_all_instructions(): assert_decode("000000d0", opcodes._ld, 'LD r0, r0, 0') +@test +def test_instruction_field_decoding(): + # OPCODE_WR_REG = 1 + assert_decode_fields("000c8810", [ # REG_WR 0x0, 1, 2, 3 + ('addr' , 0, ''), + ('data' , 3, ''), + ('high' , 1, ''), + ('low' , 2, ''), + ('opcode' , 1, ''), + ('periph_sel', 0, ''), + ]) + + # OPCODE_RD_REG = 2 + assert_decode_fields("03000421", [ # REG_RD 0x3, 2, 1 + ('addr' , 3, ''), + ('high' , 2, ''), + ('low' , 1, ''), + ('opcode' , 2, ''), + ('periph_sel', 0, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_I2C = 3 + assert_decode_fields("03001130", [ # I2C_RD 3, 2, 1, 0 + ('data' , 0, ''), + ('high' , 2, ''), + ('i2c_sel' , 0, ''), + ('low' , 1, ''), + ('opcode' , 3, ''), + ('rw' , 0, ''), + ('sub_addr' , 3, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("00011339", [ # I2C_WR 0, 2, 3, 4 + ('data' , 1, ''), + ('high' , 2, ''), + ('i2c_sel' , 4, ''), + ('low' , 3, ''), + ('opcode' , 3, ''), + ('rw' , 1, ''), + ('sub_addr' , 0, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_DELAY = 4 + assert_decode_fields("00000040", [ # NOP + ('cycles' , 0, ''), + ('opcode' , 4, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("07000040", [ # WAIT 7 + ('cycles' , 7, ''), + ('opcode' , 4, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_ADC = 5 + assert_decode_fields("07000050", [ # ADC r3, 1, 0 + ('cycles' , 0, ''), + ('dreg' , 3, ''), + ('mux' , 1, ''), + ('opcode' , 5, ''), + ('sar_sel' , 0, ''), + ('unused1' , 0, ''), + ('unused2' , 0, ''), + ]) + + # OPCODE_ST = 6 + assert_decode_fields("0b000068", [ # ST r3, r2, 0 + ('dreg' , 2, ''), + ('offset' , 0, ''), + ('opcode' , 6, ''), + ('sreg' , 3, ''), + ('sub_opcode', 4, ''), + ('unused1' , 0, ''), + ('unused2' , 0, ''), + ]) + + # OPCODE_ALU = 7, SUB_OPCODE_ALU_REG + assert_decode_fields("06000070", [ # ADD r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 0, ' (ADD)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06002070", [ # SUB r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 1, ' (SUB)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06004070", [ # AND r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 2, ' (AND)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06006070", [ # OR r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 3, ' (OR)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("0600a070", [ # LSH r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 5, ' (LSH)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("0600c070", [ # RSH r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 6, ' (RSH)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06000070", [ # ADD r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 0, ' (ADD)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06002070", [ # SUB r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 1, ' (SUB)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06004070", [ # AND r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 2, ' (AND)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06006070", [ # OR r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 3, ' (OR)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("16008070", [ # MOVE r2, r1 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 4, ' (MOVE)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("0600a070", [ # LSH r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 5, ' (LSH)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("0600c070", [ # RSH r2, r1, r0 + ('dreg' , 2, ''), + ('opcode' , 7, ''), + ('sel' , 6, ' (RSH)'), + ('sreg' , 1, ''), + ('sub_opcode', 0, ''), + ('treg' , 0, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_ALU = 7, SUB_OPCODE_ALU_IMM + assert_decode_fields("06000072", [ # ADD r2, r1, 0 + ('dreg' , 2, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 0, ' (ADD)'), + ('sreg' , 1, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06002072", [ # SUB r2, r1, 0 + ('dreg' , 2, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 1, ' (SUB)'), + ('sreg' , 1, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06004072", [ # AND r2, r1, 0 + ('dreg' , 2, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 2, ' (AND)'), + ('sreg' , 1, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("06006072", [ # OR r2, r1, 0 + ('dreg' , 2, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 3, ' (OR)'), + ('sreg' , 1, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("01008072", [ # MOVE r1, 0 + ('dreg' , 1, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 4, ' (MOVE)'), + ('sreg' , 0, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("0600a072", [ # LSH r2, r1, 0 + ('dreg' , 2, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 5, ' (LSH)'), + ('sreg' , 1, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("0600c072", [ # RSH r2, r1, 0 + ('dreg' , 2, ''), + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 6, ' (RSH)'), + ('sreg' , 1, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_ALU = 7, SUB_OPCODE_ALU_CNT + assert_decode_fields("00004074", [ # STAGE_RST + ('imm' , 0, ''), + ('opcode' , 7, ''), + ('sel' , 2, ' (STAGE_RST)'), + ('sub_opcode', 2, ''), + ('unused1' , 0, ''), + ('unused2' , 0, ''), + ]) + assert_decode_fields("70000074", [ # STAGE_INC 7 + ('imm' , 7, ''), + ('opcode' , 7, ''), + ('sel' , 0, ' (STAGE_INC)'), + ('sub_opcode', 2, ''), + ('unused1' , 0, ''), + ('unused2' , 0, ''), + ]) + assert_decode_fields("30002074", [ # STAGE_DEC 3 + ('imm' , 3, ''), + ('opcode' , 7, ''), + ('sel' , 1, ' (STAGE_DEC)'), + ('sub_opcode', 2, ''), + ('unused1' , 0, ''), + ('unused2' , 0, ''), + ]) + + # OPCODE_BRANCH = 8, SUB_OPCODE_BX + assert_decode_fields("00002080", [ # JUMP r0 + ('addr' , 0, ''), + ('dreg' , 0, ''), + ('opcode' , 8, ''), + ('reg' , 1, ''), + ('sub_opcode', 0, ''), + ('type' , 0, ' (--)'), + ('unused' , 0, ''), + ]) + assert_decode_fields("01006080", [ # JUMP r1, EQ + ('addr' , 0, ''), + ('dreg' , 1, ''), + ('opcode' , 8, ''), + ('reg' , 1, ''), + ('sub_opcode', 0, ''), + ('type' , 1, ' (EQ)'), + ('unused' , 0, ''), + ]) + assert_decode_fields("0200a080", [ # JUMP r2, OV + ('addr' , 0, ''), + ('dreg' , 2, ''), + ('opcode' , 8, ''), + ('reg' , 1, ''), + ('sub_opcode', 0, ''), + ('type' , 2, ' (OV)'), + ('unused' , 0, ''), + ]) + assert_decode_fields("00000080", [ # JUMP 0 + ('addr' , 0, ''), + ('dreg' , 0, ''), + ('opcode' , 8, ''), + ('reg' , 0, ''), + ('sub_opcode', 0, ''), + ('type' , 0, ' (--)'), + ('unused' , 0, ''), + ]) + assert_decode_fields("04004080", [ # JUMP 1, EQ + ('addr' , 1, ''), + ('dreg' , 0, ''), + ('opcode' , 8, ''), + ('reg' , 0, ''), + ('sub_opcode', 0, ''), + ('type' , 1, ' (EQ)'), + ('unused' , 0, ''), + ]) + assert_decode_fields("08008080", [ # JUMP 2, OV + ('addr' , 2, ''), + ('dreg' , 0, ''), + ('opcode' , 8, ''), + ('reg' , 0, ''), + ('sub_opcode', 0, ''), + ('type' , 2, ' (OV)'), + ('unused' , 0, ''), + ]) + + # OPCODE_BRANCH = 8, SUB_OPCODE_BR + assert_decode_fields("01000082", [ # JUMPR 0, 1, LT + ('cmp' , 0, ' (LT)'), + ('imm' , 1, ''), + ('offset' , 0, ''), + ('opcode' , 8, ''), + ('sign' , 0, ''), + ('sub_opcode', 1, ''), + ]) + assert_decode_fields("05000382", [ # JUMPR 1, 5, GE + ('cmp' , 1, ' (GE)'), + ('imm' , 5, ''), + ('offset' , 1, ''), + ('opcode' , 8, ''), + ('sign' , 0, ''), + ('sub_opcode', 1, ''), + ]) + + # OPCODE_BRANCH = 8, SUB_OPCODE_BS + assert_decode_fields("01000084", [ # JUMPS 0, 1, LT + ('cmp' , 0, ' (LT)'), + ('imm' , 1, ''), + ('offset' , 0, ''), + ('opcode' , 8, ''), + ('sign' , 0, ''), + ('sub_opcode', 2, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("05800284", [ # JUMPS 1, 5, GE + ('cmp' , 1, ' (GE)'), + ('imm' , 5, ''), + ('offset' , 1, ''), + ('opcode' , 8, ''), + ('sign' , 0, ''), + ('sub_opcode', 2, ''), + ('unused' , 0, ''), + ]) + assert_decode_fields("09000584", [ # JUMPS 2, 9, LE + ('cmp' , 2, ' (LE)'), + ('imm' , 9, ''), + ('offset' , 2, ''), + ('opcode' , 8, ''), + ('sign' , 0, ''), + ('sub_opcode', 2, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_END = 9, SUB_OPCODE_END + assert_decode_fields("01000090", [ # WAKE + ('opcode' , 9, ''), + ('sub_opcode', 0, ''), + ('unused' , 0, ''), + ('wakeup' , 1, ''), + ]) + + # OPCODE_END = 9, SUB_OPCODE_SLEEP + assert_decode_fields("07000092", [ # SLEEP 7 + ('cycle_sel' , 7, ''), + ('opcode' , 9, ''), + ('sub_opcode', 1, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_TSENS = 10 + assert_decode_fields("090000a0", [ # TSENS r0, 0 + ('delay' , 2, ''), + ('dreg' , 1, ''), + ('opcode' , 10, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_HALT = 11 + assert_decode_fields("000000b0", [ # HALT + ('opcode' , 11, ''), + ('unused' , 0, ''), + ]) + + # OPCODE_LD = 13 + assert_decode_fields("060000d0", [ # LD r2, r1, 0 + ('dreg' , 2, ''), + ('offset' , 0, ''), + ('opcode' , 13, ''), + ('sreg' , 1, ''), + ('unused1' , 0, ''), + ('unused2' , 0, ''), + ]) + + if __name__ == '__main__': # run all methods marked with @test for t in tests: diff --git a/tools/disassemble.py b/tools/disassemble.py index 184b128..ee992e6 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -117,17 +117,7 @@ def decode_instruction(i): return ins, name -def decode_instruction_and_print(i): - print(ubinascii.hexlify(i.to_bytes(4, 'little'))) - - try: - ins, name = decode_instruction(i) - except Exception as e: - print(e) - return - - print(name) - +def get_instruction_fields(ins): possible_fields = ( 'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg', 'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode', @@ -135,6 +125,7 @@ def decode_instruction_and_print(i): 'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1', 'unused2', 'wakeup' ) + field_details = [] for field in possible_fields: try: # eval is ugly but constrained to possible_fields and variable ins @@ -151,6 +142,24 @@ def decode_instruction_and_print(i): extra = ' (%s)' % jump_types[val] elif field == 'cmp': # JUMPR/JUMPS extra = ' (%s)' % cmp_ops[val] + + field_details.append((field, val, extra)) + + return field_details + + +def decode_instruction_and_print(i): + print(ubinascii.hexlify(i.to_bytes(4, 'little'))) + + try: + ins, name = decode_instruction(i) + except Exception as e: + print(e) + return + + print(name) + + for field, val, extra in get_instruction_fields(ins): print(" {:10} = {:3}{}".format(field, val, extra)) From 6720584b3a75232b0bcaba897295024b7eca2571 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 19:58:58 +0300 Subject: [PATCH 07/17] Show empty "instructions" as These are likely memory left empty for storing data. --- tests/disassemble.py | 5 +++++ tools/disassemble.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/tests/disassemble.py b/tests/disassemble.py index 012fc14..4a8ad24 100644 --- a/tests/disassemble.py +++ b/tests/disassemble.py @@ -52,6 +52,11 @@ def test_unknown_instruction(): assert_decode_exception("10000001", 'Unknown instruction') +@test +def test_empty_instruction(): + assert_decode_exception("00000000", '') + + # All hex sequences were generated using our assembler. # Note: disassembled instructions always show field values according # to what is actually encoded into the binary instruction, not as per diff --git a/tools/disassemble.py b/tools/disassemble.py index ee992e6..bdf3c18 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -89,6 +89,9 @@ def decode_instruction(i): + if i == 0: + raise Exception('') + ins = opcodes._end ins.all = i # abuse a struct to get opcode From 2a06a54874915cc8d2e1ea614b16564eacc97722 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Mon, 26 Jun 2023 23:46:23 +0300 Subject: [PATCH 08/17] Add support for disassembling a provided file The original "manual disassembling" now requires the "-m" option, followed by the sequence of hex digits representing the instructions. The sequence of hex digits does not need to be quoted. All parameters after -m will be joined together into a sequence of hex digits. --- tools/disassemble.py | 61 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index bdf3c18..f670f1f 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -151,6 +151,17 @@ def get_instruction_fields(ins): return field_details +def chunk_into_words(code, bytes_per_word, byteorder): + chunks = [ + ubinascii.hexlify(code[i:i + bytes_per_word]) + for i in range(0, len(code), bytes_per_word) + ] + + words = [int.from_bytes(ubinascii.unhexlify(i), byteorder) for i in chunks] + + return words + + def decode_instruction_and_print(i): print(ubinascii.hexlify(i.to_bytes(4, 'little'))) @@ -180,28 +191,66 @@ def disassemble_manually(byte_sequence_string): decode_instruction_and_print(i) +def disassemble_file(filename): + with open(filename, 'rb') as f: + data = f.read() + + code = data[12:] # text_offset (where code starts) is always 12 for ULP binaries + words = chunk_into_words(code, bytes_per_word=4, byteorder='little') + + for i in words: + decode_instruction_and_print(i) + + def print_help(): - print('Usage: disassemble.py [] ') + print('Usage: disassemble.py [] [-m | ]') print('') print('Options:') - print(' -h Show this help text') - print(' Sequence of hex bytes (8 per instruction)') + print(' -h Show this help text') + print(' -m Sequence of hex bytes (8 per instruction)') + print(' Path to ULP binary') pass def handle_cmdline(params): - byte_sequence = '' + filename = None + byte_sequence = None while params: if params[0] == '-h': print_help() sys.exit(0) + elif params[0] == '-m': + if len(params) == 1: + print_help() + sys.exit(1) + params = params[1:] # remove -m from list + + sequence_len = len(params) + for i in range(0, len(params)): + if params[i][0] == '-': # start of a next option + sequence_len = i-1 + break + + if sequence_len < 0: + print_help() + sys.exit(1) + + byte_sequence = "".join(params[:sequence_len+1]) + params = params[sequence_len:] + elif params[0][0] == '-': + # ignore unknown options for now + pass else: - byte_sequence += params[0] + if not filename: + filename = params[0] params = params[1:] # remove first param from list - disassemble_manually(byte_sequence) + if byte_sequence: + disassemble_manually(byte_sequence) + elif filename: + disassemble_file(filename) if sys.argv: # if run from cmdline From a4867e8fed7b71a936c1acc8d06f406efc272e66 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 18:18:09 +0300 Subject: [PATCH 09/17] Add verbose option. Hide field level detail when not verbose. --- tools/disassemble.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index f670f1f..d4dbdb7 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -162,7 +162,7 @@ def chunk_into_words(code, bytes_per_word, byteorder): return words -def decode_instruction_and_print(i): +def decode_instruction_and_print(i, verbose=False): print(ubinascii.hexlify(i.to_bytes(4, 'little'))) try: @@ -173,11 +173,12 @@ def decode_instruction_and_print(i): print(name) - for field, val, extra in get_instruction_fields(ins): - print(" {:10} = {:3}{}".format(field, val, extra)) + if verbose: + for field, val, extra in get_instruction_fields(ins): + print(" {:10} = {:3}{}".format(field, val, extra)) -def disassemble_manually(byte_sequence_string): +def disassemble_manually(byte_sequence_string, verbose=False): sequence = byte_sequence_string.strip().replace(' ','') chars_per_instruction = 8 list = [ @@ -188,10 +189,10 @@ def disassemble_manually(byte_sequence_string): for instruction in list: byte_sequence = ubinascii.unhexlify(instruction.replace(' ','')) i = int.from_bytes(byte_sequence, 'little') - decode_instruction_and_print(i) + decode_instruction_and_print(i, verbose) -def disassemble_file(filename): +def disassemble_file(filename, verbose=False): with open(filename, 'rb') as f: data = f.read() @@ -199,7 +200,7 @@ def disassemble_file(filename): words = chunk_into_words(code, bytes_per_word=4, byteorder='little') for i in words: - decode_instruction_and_print(i) + decode_instruction_and_print(i, verbose) def print_help(): @@ -208,11 +209,13 @@ def print_help(): print('Options:') print(' -h Show this help text') print(' -m Sequence of hex bytes (8 per instruction)') + print(' -v Verbose mode. Also show instruction fields') print(' Path to ULP binary') pass def handle_cmdline(params): + verbose = False filename = None byte_sequence = None @@ -238,6 +241,8 @@ def handle_cmdline(params): byte_sequence = "".join(params[:sequence_len+1]) params = params[sequence_len:] + elif params[0] == '-v': + verbose = True elif params[0][0] == '-': # ignore unknown options for now pass @@ -248,9 +253,9 @@ def handle_cmdline(params): params = params[1:] # remove first param from list if byte_sequence: - disassemble_manually(byte_sequence) + disassemble_manually(byte_sequence, verbose) elif filename: - disassemble_file(filename) + disassemble_file(filename, verbose) if sys.argv: # if run from cmdline From b51677df56c4f9f3d6f1283d1fcfcc9761848879 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 18:20:59 +0300 Subject: [PATCH 10/17] Improve/clean-up dissambled output format Now the instruction (hex) and disassembled code will appear on one line next to each other and the bytes are no longer printed with Python specific formatting (not wrapped in b''). This results in a much cleaner looking output. Example output: 40008072 MOVE r0, 4 010000d0 LD r1, r0, 0 --- tools/disassemble.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index d4dbdb7..d92ef8f 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -162,20 +162,24 @@ def chunk_into_words(code, bytes_per_word, byteorder): return words -def decode_instruction_and_print(i, verbose=False): - print(ubinascii.hexlify(i.to_bytes(4, 'little'))) +def print_code_line(i, asm): + lineformat = '{0} {1}' + hex = ubinascii.hexlify(i.to_bytes(4, 'little')) + print(lineformat.format(hex.decode('utf-8'), asm)) + +def decode_instruction_and_print(i, verbose=False): try: ins, name = decode_instruction(i) except Exception as e: - print(e) + print_code_line(i, e) return - print(name) + print_code_line(i, name) if verbose: for field, val, extra in get_instruction_fields(ins): - print(" {:10} = {:3}{}".format(field, val, extra)) + print(" {:10} = {:3}{}".format(field, val, extra)) def disassemble_manually(byte_sequence_string, verbose=False): From 40ea7e93163410b565a31f55f8a9939215d0beb8 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 18:30:38 +0300 Subject: [PATCH 11/17] Add byte offset to output to make seeing offsets easier Offsets are in number of bytes (matches how 'GNU as' outputs listings) --- tools/disassemble.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index d92ef8f..bc083fb 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -162,24 +162,24 @@ def chunk_into_words(code, bytes_per_word, byteorder): return words -def print_code_line(i, asm): - lineformat = '{0} {1}' +def print_code_line(byte_offset, i, asm): + lineformat = '{0:04x} {1} {2}' hex = ubinascii.hexlify(i.to_bytes(4, 'little')) - print(lineformat.format(hex.decode('utf-8'), asm)) + print(lineformat.format(byte_offset, hex.decode('utf-8'), asm)) -def decode_instruction_and_print(i, verbose=False): +def decode_instruction_and_print(byte_offset, i, verbose=False): try: ins, name = decode_instruction(i) except Exception as e: - print_code_line(i, e) + print_code_line(byte_offset, i, e) return - print_code_line(i, name) + print_code_line(byte_offset, i, name) if verbose: for field, val, extra in get_instruction_fields(ins): - print(" {:10} = {:3}{}".format(field, val, extra)) + print(" {:10} = {:3}{}".format(field, val, extra)) def disassemble_manually(byte_sequence_string, verbose=False): @@ -190,10 +190,10 @@ def disassemble_manually(byte_sequence_string, verbose=False): for i in range(0, len(sequence), chars_per_instruction) ] - for instruction in list: + for idx, instruction in enumerate(list): byte_sequence = ubinascii.unhexlify(instruction.replace(' ','')) i = int.from_bytes(byte_sequence, 'little') - decode_instruction_and_print(i, verbose) + decode_instruction_and_print(idx << 2, i, verbose) def disassemble_file(filename, verbose=False): @@ -203,8 +203,8 @@ def disassemble_file(filename, verbose=False): code = data[12:] # text_offset (where code starts) is always 12 for ULP binaries words = chunk_into_words(code, bytes_per_word=4, byteorder='little') - for i in words: - decode_instruction_and_print(i, verbose) + for idx, i in enumerate(words): + decode_instruction_and_print(idx << 2, i, verbose) def print_help(): From 08bb182199c53865b91094aca765f9554e936adf Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 18:35:33 +0300 Subject: [PATCH 12/17] use text_offset from ULP header instead of hardcoded offset --- tools/disassemble.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index bc083fb..dbf5565 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -1,3 +1,4 @@ +from uctypes import struct, addressof, LITTLE_ENDIAN, UINT16, UINT32 from esp32_ulp.opcodes import RD_REG_PERIPH_RTC_CNTL, RD_REG_PERIPH_RTC_IO, RD_REG_PERIPH_RTC_I2C, \ RD_REG_PERIPH_SENS, DR_REG_MAX_DIRECT import esp32_ulp.opcodes as opcodes @@ -200,7 +201,16 @@ def disassemble_file(filename, verbose=False): with open(filename, 'rb') as f: data = f.read() - code = data[12:] # text_offset (where code starts) is always 12 for ULP binaries + binary_header_struct_def = dict( + magic = 0 | UINT32, + text_offset = 4 | UINT16, + text_size = 6 | UINT16, + data_size = 8 | UINT16, + bss_size = 10 | UINT16, + ) + h = struct(addressof(data), binary_header_struct_def, LITTLE_ENDIAN) + + code = data[h.text_offset:] words = chunk_into_words(code, bytes_per_word=4, byteorder='little') for idx, i in enumerate(words): From 15a631ae7ed837357e2e020b5d3ef332bd54ad74 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 18:38:13 +0300 Subject: [PATCH 13/17] Output header in verbose mode. Also validate ULP header. If the magic bytes in the header are not 'ulp\0' then the file is not a ULP binary or otherwise corrupt. --- tools/disassemble.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index dbf5565..4bcb958 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -163,6 +163,16 @@ def chunk_into_words(code, bytes_per_word, byteorder): return words +def print_ulp_header(h): + print('ULP magic : %s (0x%08x)' % (h.magic.to_bytes(4, 'little'), h.magic)) + print('.text offset : %s (0x%02x)' % (h.text_offset, h.text_offset)) + print('.text size : %s (0x%02x)' % (h.text_size, h.text_size)) + print('.data offset : %s (0x%02x)' % (h.text_offset+h.text_size, h.text_offset+h.text_size)) + print('.data size : %s (0x%02x)' % (h.data_size, h.data_size)) + print('.bss size : %s (0x%02x)' % (h.bss_size, h.bss_size)) + print('----------------------------------------') + + def print_code_line(byte_offset, i, asm): lineformat = '{0:04x} {1} {2}' hex = ubinascii.hexlify(i.to_bytes(4, 'little')) @@ -210,6 +220,13 @@ def disassemble_file(filename, verbose=False): ) h = struct(addressof(data), binary_header_struct_def, LITTLE_ENDIAN) + if (h.magic != 0x00706c75): + print('Invalid signature: 0x%08x (should be: 0x%08x)' % (h.magic, 0x00706c75)) + return + + if verbose: + print_ulp_header(h) + code = data[h.text_offset:] words = chunk_into_words(code, bytes_per_word=4, byteorder='little') @@ -223,7 +240,7 @@ def print_help(): print('Options:') print(' -h Show this help text') print(' -m Sequence of hex bytes (8 per instruction)') - print(' -v Verbose mode. Also show instruction fields') + print(' -v Verbose mode. Show ULP header and fields of each instruction') print(' Path to ULP binary') pass From 59766fb979786c81c1f23533875164e7239c8494 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 18:45:34 +0300 Subject: [PATCH 14/17] Print .text and .data section separately --- tools/disassemble.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tools/disassemble.py b/tools/disassemble.py index 4bcb958..7a92552 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -164,6 +164,7 @@ def chunk_into_words(code, bytes_per_word, byteorder): def print_ulp_header(h): + print('header') print('ULP magic : %s (0x%08x)' % (h.magic.to_bytes(4, 'little'), h.magic)) print('.text offset : %s (0x%02x)' % (h.text_offset, h.text_offset)) print('.text size : %s (0x%02x)' % (h.text_size, h.text_size)) @@ -193,6 +194,25 @@ def decode_instruction_and_print(byte_offset, i, verbose=False): print(" {:10} = {:3}{}".format(field, val, extra)) +def print_text_section(code, verbose=False): + print('.text') + + words = chunk_into_words(code, bytes_per_word=4, byteorder='little') + + for idx, i in enumerate(words): + decode_instruction_and_print(idx << 2,i , verbose) + + +def print_data_section(data_offset, code): + print('.data') + + words = chunk_into_words(code, bytes_per_word=4, byteorder='little') + + for idx, i in enumerate(words): + asm = "" if i == 0 else "" + print_code_line(data_offset + (idx << 2), i, asm) + + def disassemble_manually(byte_sequence_string, verbose=False): sequence = byte_sequence_string.strip().replace(' ','') chars_per_instruction = 8 @@ -227,11 +247,15 @@ def disassemble_file(filename, verbose=False): if verbose: print_ulp_header(h) - code = data[h.text_offset:] - words = chunk_into_words(code, bytes_per_word=4, byteorder='little') + code = data[h.text_offset:(h.text_offset+h.text_size)] + print_text_section(code, verbose) - for idx, i in enumerate(words): - decode_instruction_and_print(idx << 2, i, verbose) + if verbose: + print('----------------------------------------') + + data_offset = h.text_offset+h.text_size + code = data[data_offset:(data_offset+h.data_size)] + print_data_section(data_offset-h.text_offset, code) def print_help(): From 10a5051b92e5fab6f7ebd9e8829702a1c82c5519 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 20:17:44 +0300 Subject: [PATCH 15/17] Add hex representation for any field value > 9 Some values are easier to read as hex values than as decimal. For example peripheral register addresses like 0x123 where the first digit (1) indicates which peripheral register to address, while the remaining 2 digits (0x23) are the offset within that register in number of 32-bit words. Also absolute JUMP addresses are easier to find via the hex value given that the disassembler includes the byte offset of each instruction in hex format. --- tests/disassemble.py | 18 +++++++++--------- tools/disassemble.py | 5 ++++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/disassemble.py b/tests/disassemble.py index 4a8ad24..7e7f6df 100644 --- a/tests/disassemble.py +++ b/tests/disassemble.py @@ -150,22 +150,22 @@ def test_all_instructions(): @test def test_instruction_field_decoding(): # OPCODE_WR_REG = 1 - assert_decode_fields("000c8810", [ # REG_WR 0x0, 1, 2, 3 - ('addr' , 0, ''), + assert_decode_fields("230d8810", [ # REG_WR 0x123, 1, 2, 3 + ('addr' , 35, ' (0x23)'), ('data' , 3, ''), ('high' , 1, ''), ('low' , 2, ''), ('opcode' , 1, ''), - ('periph_sel', 0, ''), + ('periph_sel', 1, ''), ]) # OPCODE_RD_REG = 2 - assert_decode_fields("03000421", [ # REG_RD 0x3, 2, 1 - ('addr' , 3, ''), + assert_decode_fields("21030421", [ # REG_RD 0x321, 2, 1 + ('addr' , 33, ' (0x21)'), ('high' , 2, ''), ('low' , 1, ''), ('opcode' , 2, ''), - ('periph_sel', 0, ''), + ('periph_sel', 3, ''), ('unused' , 0, ''), ]) @@ -558,13 +558,13 @@ def test_instruction_field_decoding(): assert_decode_fields("090000a0", [ # TSENS r0, 0 ('delay' , 2, ''), ('dreg' , 1, ''), - ('opcode' , 10, ''), + ('opcode' , 10, ' (0x0a)'), ('unused' , 0, ''), ]) # OPCODE_HALT = 11 assert_decode_fields("000000b0", [ # HALT - ('opcode' , 11, ''), + ('opcode' , 11, ' (0x0b)'), ('unused' , 0, ''), ]) @@ -572,7 +572,7 @@ def test_instruction_field_decoding(): assert_decode_fields("060000d0", [ # LD r2, r1, 0 ('dreg' , 2, ''), ('offset' , 0, ''), - ('opcode' , 13, ''), + ('opcode' , 13, ' (0x0d)'), ('sreg' , 1, ''), ('unused1' , 0, ''), ('unused2' , 0, ''), diff --git a/tools/disassemble.py b/tools/disassemble.py index 7a92552..f647576 100644 --- a/tools/disassemble.py +++ b/tools/disassemble.py @@ -131,12 +131,15 @@ def get_instruction_fields(ins): ) field_details = [] for field in possible_fields: + extra = '' try: # eval is ugly but constrained to possible_fields and variable ins val = eval('i.%s' % field, {}, {'i': ins}) + if (val>9): + extra = ' (0x%02x)' % val except KeyError: continue - extra = '' + if field == 'sel': # ALU if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT: extra = ' (%s)' % alu_cnt_ops[val] From eff6f96e50d672835cd344cc6f5008f7c1bb5dc1 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 21:01:48 +0300 Subject: [PATCH 16/17] Add integration tests for disassembler Test both disassembling a file (assembled from source for the test), and disassembling a byte sequence provided on the command line. Source code to be assembled and expected disassembler listings are provided in the tests/fixtures directory. --- .github/workflows/run_tests.yaml | 7 + tests/03_disassembler_tests.sh | 67 +++++++ tests/fixtures/all_opcodes-v.lst | 313 ++++++++++++++++++++++++++++++ tests/fixtures/all_opcodes.S | 62 ++++++ tests/fixtures/all_opcodes.lst | 45 +++++ tests/fixtures/manual_bytes-v.lst | 42 ++++ tests/fixtures/manual_bytes.lst | 6 + 7 files changed, 542 insertions(+) create mode 100755 tests/03_disassembler_tests.sh create mode 100644 tests/fixtures/all_opcodes-v.lst create mode 100644 tests/fixtures/all_opcodes.S create mode 100644 tests/fixtures/all_opcodes.lst create mode 100644 tests/fixtures/manual_bytes-v.lst create mode 100644 tests/fixtures/manual_bytes.lst diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index ab8d4bd..40ef419 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -91,3 +91,10 @@ jobs: export PATH=$PATH:${{ steps.fetch_binutils.outputs.bin_dir }} cd tests ./02_compat_rtc_tests.sh + + - name: Run disassembler tests + id: disassembler_tests + run: | + export PATH=$PATH:${{ steps.build_micropython.outputs.bin_dir }} + cd tests + ./03_disassembler_tests.sh diff --git a/tests/03_disassembler_tests.sh b/tests/03_disassembler_tests.sh new file mode 100755 index 0000000..7c76f11 --- /dev/null +++ b/tests/03_disassembler_tests.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +set -e + +test_disassembling_a_file() { + local verbose + if [ "$1" == verbose ]; then + verbose=-v + echo -e "Testing disassembling a file in VERBOSE mode" + else + echo -e "Testing disassembling a file in NORMAL mode" + fi + + testname=all_opcodes + fixture=fixtures/${testname}.S + echo -e "\tBuilding $fixture using micropython-esp32-ulp" + + log_file="${testname}.log" + ulp_file="fixtures/${testname}.ulp" + micropython -m esp32_ulp $fixture 1>$log_file # generates $ulp_file + + lst_file="${testname}.lst" + lst_file_fixture=fixtures/${testname}${verbose}.lst + echo -e "\tDisassembling $ulp_file using micropython-esp32-ulp disassembler" + micropython tools/disassemble.py $verbose $ulp_file > $lst_file + + if ! diff $lst_file_fixture $lst_file 1>/dev/null; then + echo -e "\tDisassembled output differs from expected output!" + echo "" + echo "Disassembly test failed for $fixture" + echo "micropython-esp32-ulp log:" + cat $log_file + echo "Diff of disassembly: expected vs actual" + diff -u $lst_file_fixture $lst_file + fi +} + +test_disassembling_a_manual_sequence() { + local verbose + if [ "$1" == verbose ]; then + verbose=-v + echo -e "Testing disassembling a manual byte sequence in VERBOSE mode" + else + echo -e "Testing disassembling a manual byte sequence in NORMAL mode" + fi + + sequence="e1af 8c72 0100 0068 2705 cc19 0005 681d 0000 00a0 0000 0074" + + lst_file="manual_bytes.lst" + lst_file_fixture=fixtures/manual_bytes${verbose}.lst + echo -e "\tDisassembling manual byte sequence using micropython-esp32-ulp disassembler" + micropython tools/disassemble.py $verbose -m $sequence > $lst_file + + if ! diff $lst_file_fixture $lst_file 1>/dev/null; then + echo -e "\tDisassembled output differs from expected output!" + echo "" + echo "Disassembly test failed for manual byte sequence" + echo "Diff of disassembly: expected vs actual" + diff -u $lst_file_fixture $lst_file + fi +} + +test_disassembling_a_file +test_disassembling_a_file verbose + +test_disassembling_a_manual_sequence +test_disassembling_a_manual_sequence verbose diff --git a/tests/fixtures/all_opcodes-v.lst b/tests/fixtures/all_opcodes-v.lst new file mode 100644 index 0000000..4bc7975 --- /dev/null +++ b/tests/fixtures/all_opcodes-v.lst @@ -0,0 +1,313 @@ +header +ULP magic : b'ulp\x00' (0x00706c75) +.text offset : 12 (0x0c) +.text size : 164 (0xa4) +.data offset : 176 (0xb0) +.data size : 8 (0x08) +.bss size : 0 (0x00) +---------------------------------------- +.text +0000 230d8810 REG_WR 0x123, 1, 2, 3 + addr = 35 (0x23) + data = 3 + high = 1 + low = 2 + opcode = 1 + periph_sel = 1 +0004 21030421 REG_RD 0x321, 2, 1 + addr = 33 (0x21) + high = 2 + low = 1 + opcode = 2 + periph_sel = 3 + unused = 0 +0008 03001130 I2C_RD 3, 2, 1, 0 + data = 0 + high = 2 + i2c_sel = 0 + low = 1 + opcode = 3 + rw = 0 + sub_addr = 3 + unused = 0 +000c 00011339 I2C_WR 0, 2, 3, 4 + data = 1 + high = 2 + i2c_sel = 4 + low = 3 + opcode = 3 + rw = 1 + sub_addr = 0 + unused = 0 +0010 00000040 NOP + cycles = 0 + opcode = 4 + unused = 0 +0014 07000040 WAIT 7 + cycles = 7 + opcode = 4 + unused = 0 +0018 07000050 ADC r3, 1, 0 + cycles = 0 + dreg = 3 + mux = 1 + opcode = 5 + sar_sel = 0 + unused1 = 0 + unused2 = 0 +001c 0b000068 ST r3, r2, 0 + dreg = 2 + offset = 0 + opcode = 6 + sreg = 3 + sub_opcode = 4 + unused1 = 0 + unused2 = 0 +0020 06000070 ADD r2, r1, r0 + dreg = 2 + opcode = 7 + sel = 0 (ADD) + sreg = 1 + sub_opcode = 0 + treg = 0 + unused = 0 +0024 06002070 SUB r2, r1, r0 + dreg = 2 + opcode = 7 + sel = 1 (SUB) + sreg = 1 + sub_opcode = 0 + treg = 0 + unused = 0 +0028 06004070 AND r2, r1, r0 + dreg = 2 + opcode = 7 + sel = 2 (AND) + sreg = 1 + sub_opcode = 0 + treg = 0 + unused = 0 +002c 06006070 OR r2, r1, r0 + dreg = 2 + opcode = 7 + sel = 3 (OR) + sreg = 1 + sub_opcode = 0 + treg = 0 + unused = 0 +0030 16008070 MOVE r2, r1 + dreg = 2 + opcode = 7 + sel = 4 (MOVE) + sreg = 1 + sub_opcode = 0 + treg = 1 + unused = 0 +0034 0600a070 LSH r2, r1, r0 + dreg = 2 + opcode = 7 + sel = 5 (LSH) + sreg = 1 + sub_opcode = 0 + treg = 0 + unused = 0 +0038 0600c070 RSH r2, r1, r0 + dreg = 2 + opcode = 7 + sel = 6 (RSH) + sreg = 1 + sub_opcode = 0 + treg = 0 + unused = 0 +003c 06000072 ADD r2, r1, 0 + dreg = 2 + imm = 0 + opcode = 7 + sel = 0 (ADD) + sreg = 1 + sub_opcode = 1 + unused = 0 +0040 06002072 SUB r2, r1, 0 + dreg = 2 + imm = 0 + opcode = 7 + sel = 1 (SUB) + sreg = 1 + sub_opcode = 1 + unused = 0 +0044 06004072 AND r2, r1, 0 + dreg = 2 + imm = 0 + opcode = 7 + sel = 2 (AND) + sreg = 1 + sub_opcode = 1 + unused = 0 +0048 06006072 OR r2, r1, 0 + dreg = 2 + imm = 0 + opcode = 7 + sel = 3 (OR) + sreg = 1 + sub_opcode = 1 + unused = 0 +004c 01008072 MOVE r1, 0 + dreg = 1 + imm = 0 + opcode = 7 + sel = 4 (MOVE) + sreg = 0 + sub_opcode = 1 + unused = 0 +0050 0600a072 LSH r2, r1, 0 + dreg = 2 + imm = 0 + opcode = 7 + sel = 5 (LSH) + sreg = 1 + sub_opcode = 1 + unused = 0 +0054 0600c072 RSH r2, r1, 0 + dreg = 2 + imm = 0 + opcode = 7 + sel = 6 (RSH) + sreg = 1 + sub_opcode = 1 + unused = 0 +0058 00004074 STAGE_RST + imm = 0 + opcode = 7 + sel = 2 (STAGE_RST) + sub_opcode = 2 + unused1 = 0 + unused2 = 0 +005c 70000074 STAGE_INC 7 + imm = 7 + opcode = 7 + sel = 0 (STAGE_INC) + sub_opcode = 2 + unused1 = 0 + unused2 = 0 +0060 30002074 STAGE_DEC 3 + imm = 3 + opcode = 7 + sel = 1 (STAGE_DEC) + sub_opcode = 2 + unused1 = 0 + unused2 = 0 +0064 00002080 JUMP r0 + addr = 0 + dreg = 0 + opcode = 8 + reg = 1 + sub_opcode = 0 + type = 0 (--) + unused = 0 +0068 01006080 JUMP r1, EQ + addr = 0 + dreg = 1 + opcode = 8 + reg = 1 + sub_opcode = 0 + type = 1 (EQ) + unused = 0 +006c 0200a080 JUMP r2, OV + addr = 0 + dreg = 2 + opcode = 8 + reg = 1 + sub_opcode = 0 + type = 2 (OV) + unused = 0 +0070 00000080 JUMP 0 + addr = 0 + dreg = 0 + opcode = 8 + reg = 0 + sub_opcode = 0 + type = 0 (--) + unused = 0 +0074 00004080 JUMP 0, EQ + addr = 0 + dreg = 0 + opcode = 8 + reg = 0 + sub_opcode = 0 + type = 1 (EQ) + unused = 0 +0078 00008080 JUMP 0, OV + addr = 0 + dreg = 0 + opcode = 8 + reg = 0 + sub_opcode = 0 + type = 2 (OV) + unused = 0 +007c 01000082 JUMPR 0, 1, LT + cmp = 0 (LT) + imm = 1 + offset = 0 + opcode = 8 + sign = 0 + sub_opcode = 1 +0080 05000382 JUMPR 1, 5, GE + cmp = 1 (GE) + imm = 5 + offset = 1 + opcode = 8 + sign = 0 + sub_opcode = 1 +0084 01000084 JUMPS 0, 1, LT + cmp = 0 (LT) + imm = 1 + offset = 0 + opcode = 8 + sign = 0 + sub_opcode = 2 + unused = 0 +0088 05800284 JUMPS 1, 5, GE + cmp = 1 (GE) + imm = 5 + offset = 1 + opcode = 8 + sign = 0 + sub_opcode = 2 + unused = 0 +008c 09000584 JUMPS 2, 9, LE + cmp = 2 (LE) + imm = 9 + offset = 2 + opcode = 8 + sign = 0 + sub_opcode = 2 + unused = 0 +0090 01000090 WAKE + opcode = 9 + sub_opcode = 0 + unused = 0 + wakeup = 1 +0094 07000092 SLEEP 7 + cycle_sel = 7 + opcode = 9 + sub_opcode = 1 + unused = 0 +0098 090000a0 TSENS r1, 2 + delay = 2 + dreg = 1 + opcode = 10 (0x0a) + unused = 0 +009c 000000b0 HALT + opcode = 11 (0x0b) + unused = 0 +00a0 060000d0 LD r2, r1, 0 + dreg = 2 + offset = 0 + opcode = 13 (0x0d) + sreg = 1 + unused1 = 0 + unused2 = 0 +---------------------------------------- +.data +00a4 00000000 +00a8 fecadec0 diff --git a/tests/fixtures/all_opcodes.S b/tests/fixtures/all_opcodes.S new file mode 100644 index 0000000..7f8c916 --- /dev/null +++ b/tests/fixtures/all_opcodes.S @@ -0,0 +1,62 @@ +.data +empty: .long 0 +magic: .long 0xc0decafe + +.text +REG_WR 0x123, 1, 2, 3 + +REG_RD 0x321, 2, 1 + +I2C_RD 3, 2, 1, 0 +I2C_WR 0, 1, 2, 3, 4 + +NOP +WAIT 7 + +ADC r3, 2, 1 + +ST r3, r2, 1 + +ADD r2, r1, r0 +SUB r2, r1, r0 +AND r2, r1, r0 +OR r2, r1, r0 +MOVE r2, r1 +LSH r2, r1, r0 +RSH r2, r1, r0 + +ADD r2, r1, 0 +SUB r2, r1, 0 +AND r2, r1, 0 +OR r2, r1, 0 +MOVE r1, 0 +LSH r2, r1, 0 +RSH r2, r1, 0 + +STAGE_RST +STAGE_INC 7 +STAGE_DEC 3 + +JUMP r0 +JUMP r1, EQ +JUMP r2, OV + +JUMP 0 +JUMP 0, EQ +JUMP 0, OV + +JUMPR 0, 1, LT +JUMPR 4, 5, GE + +JUMPS 0, 1, LT +JUMPS 4, 5, GE +JUMPS 8, 9, LE + +WAKE +SLEEP 7 + +TSENS r1, 2 + +HALT + +LD r2, r1, 0 diff --git a/tests/fixtures/all_opcodes.lst b/tests/fixtures/all_opcodes.lst new file mode 100644 index 0000000..2ef1bd7 --- /dev/null +++ b/tests/fixtures/all_opcodes.lst @@ -0,0 +1,45 @@ +.text +0000 230d8810 REG_WR 0x123, 1, 2, 3 +0004 21030421 REG_RD 0x321, 2, 1 +0008 03001130 I2C_RD 3, 2, 1, 0 +000c 00011339 I2C_WR 0, 2, 3, 4 +0010 00000040 NOP +0014 07000040 WAIT 7 +0018 07000050 ADC r3, 1, 0 +001c 0b000068 ST r3, r2, 0 +0020 06000070 ADD r2, r1, r0 +0024 06002070 SUB r2, r1, r0 +0028 06004070 AND r2, r1, r0 +002c 06006070 OR r2, r1, r0 +0030 16008070 MOVE r2, r1 +0034 0600a070 LSH r2, r1, r0 +0038 0600c070 RSH r2, r1, r0 +003c 06000072 ADD r2, r1, 0 +0040 06002072 SUB r2, r1, 0 +0044 06004072 AND r2, r1, 0 +0048 06006072 OR r2, r1, 0 +004c 01008072 MOVE r1, 0 +0050 0600a072 LSH r2, r1, 0 +0054 0600c072 RSH r2, r1, 0 +0058 00004074 STAGE_RST +005c 70000074 STAGE_INC 7 +0060 30002074 STAGE_DEC 3 +0064 00002080 JUMP r0 +0068 01006080 JUMP r1, EQ +006c 0200a080 JUMP r2, OV +0070 00000080 JUMP 0 +0074 00004080 JUMP 0, EQ +0078 00008080 JUMP 0, OV +007c 01000082 JUMPR 0, 1, LT +0080 05000382 JUMPR 1, 5, GE +0084 01000084 JUMPS 0, 1, LT +0088 05800284 JUMPS 1, 5, GE +008c 09000584 JUMPS 2, 9, LE +0090 01000090 WAKE +0094 07000092 SLEEP 7 +0098 090000a0 TSENS r1, 2 +009c 000000b0 HALT +00a0 060000d0 LD r2, r1, 0 +.data +00a4 00000000 +00a8 fecadec0 diff --git a/tests/fixtures/manual_bytes-v.lst b/tests/fixtures/manual_bytes-v.lst new file mode 100644 index 0000000..7f44ea4 --- /dev/null +++ b/tests/fixtures/manual_bytes-v.lst @@ -0,0 +1,42 @@ +0000 e1af8c72 MOVE r1, 51966 + dreg = 1 + imm = 51966 (0xcafe) + opcode = 7 + sel = 4 (MOVE) + sreg = 0 + sub_opcode = 1 + unused = 0 +0004 01000068 ST r1, r0, 0 + dreg = 0 + offset = 0 + opcode = 6 + sreg = 1 + sub_opcode = 4 + unused1 = 0 + unused2 = 0 +0008 2705cc19 REG_WR 0x127, 19, 19, 1 + addr = 39 (0x27) + data = 1 + high = 19 (0x13) + low = 19 (0x13) + opcode = 1 + periph_sel = 1 +000c 0005681d REG_WR 0x100, 26, 26, 1 + addr = 0 + data = 1 + high = 26 (0x1a) + low = 26 (0x1a) + opcode = 1 + periph_sel = 1 +0010 000000a0 TSENS r0, 0 + delay = 0 + dreg = 0 + opcode = 10 (0x0a) + unused = 0 +0014 00000074 STAGE_INC 0 + imm = 0 + opcode = 7 + sel = 0 (STAGE_INC) + sub_opcode = 2 + unused1 = 0 + unused2 = 0 diff --git a/tests/fixtures/manual_bytes.lst b/tests/fixtures/manual_bytes.lst new file mode 100644 index 0000000..beb1b77 --- /dev/null +++ b/tests/fixtures/manual_bytes.lst @@ -0,0 +1,6 @@ +0000 e1af8c72 MOVE r1, 51966 +0004 01000068 ST r1, r0, 0 +0008 2705cc19 REG_WR 0x127, 19, 19, 1 +000c 0005681d REG_WR 0x100, 26, 26, 1 +0010 000000a0 TSENS r0, 0 +0014 00000074 STAGE_INC 0 From 06b277ebac79fbaa4eb928453455c09ca8bf9e3a Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 27 Jun 2023 21:24:28 +0300 Subject: [PATCH 17/17] Add documentation for disassembler --- README.rst | 1 + docs/disassembler.rst | 146 ++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 9 +++ 3 files changed, 156 insertions(+) create mode 100644 docs/disassembler.rst diff --git a/README.rst b/README.rst index 5ffa906..e29d841 100644 --- a/README.rst +++ b/README.rst @@ -35,6 +35,7 @@ The following features are supported: * expressions in assembly code and constant definitions * RTC convenience macros (e.g. ``WRITE_RTC_REG``) * many ESP32 ULP code examples found on the web will work unmodified +* a simple disassembler is also provided Quick start diff --git a/docs/disassembler.rst b/docs/disassembler.rst new file mode 100644 index 0000000..b92a19e --- /dev/null +++ b/docs/disassembler.rst @@ -0,0 +1,146 @@ +===================== +Disassembler +===================== + +micropython-esp32-ulp contains a disassembler for disassembling code for the +ESP32 ULP (Ultra Low-Power) Co-Processor. + +The main purpose of this tool is to inspect what instructions our assembler +created, what value each field is set to, and to compare this with the output +created by the assembler from Espressif (part of their `binutils-gdb fork `_), +which we use as our reference implementation. + + +Usage +------------------------ + +To disassemble a ULP binary, simply run: + +.. code-block:: bash + + micropython -m tools.disassemble path/to/binary.ulp + +You can also specify additional options to ``disassemble.py`` as follows: + ++--------------------------+----------------------------------------------------------------+ +| Option | Description | ++==========================+================================================================+ +| ``-h`` | Show help text | ++--------------------------+----------------------------------------------------------------+ +|| ``-m `` || Disassemble a provided sequence of hex bytes | +|| || (in this case any filename specified is ignored) | ++--------------------------+----------------------------------------------------------------+ +| ``-v`` | Verbose mode (shows ULP header and fields of each instruction) | ++--------------------------+----------------------------------------------------------------+ + + +Disassembling a file +------------------------ + +The simplest and default mode of the disassembler is to disassemble the +specified file. + +Note that the ULP header is validates and files with unknown magic bytes will be +rejected. The correct 4 magic bytes at the start of a ULP binary are ``ulp\x00``. + +Example: + +.. code-block:: shell + + $ micropython -m tools.disassemble path/to/binary.ulp + .text + 0000 040000d0 LD r0, r1, 0 + 0004 0e0400d0 LD r2, r3, 1 + 0008 84010068 ST r0, r1, 0 + 000c 8b090068 ST r3, r2, 2 + .data + 0000 00000000 + + +Disassembling a byte sequence +----------------------------- + +The ``-m`` option allows disassembling a sequences hex letters representing +ULP instructions. + +This option expects the actual instructions directly, without any ULP header. + +The sequence must contain a number of hex letters exactly divisible by 8, i.e. +8, 16, 24, etc, because each 32-bit word is made up of 8 hex letters. Spaces +can be included in the sequence and they are ignored. + +The typical use case for this feature is to copy/paste some instructions from +a hexdump (e.g. xxd output) for analysis. + +Example: + +.. code-block:: shell + + # hexdump binary.ulp + $ xxd path/to/binary.ulp + 00000000: 756c 7000 0c00 2400 0400 0000 9300 8074 ulp...$........t + 00000010: 2a80 0488 2004 8074 1c00 0084 0000 0040 *... ..t.......@ + (...) + + # analyse the last 2 instructions + $ micropython -m tools.disassemble -m "1c00 0084 0000 0040" + 0000 1c000084 JUMPS 0, 28, LT + 0004 00000040 NOP + + +Verbose mode +------------------------ + +In verbose mode the following extra outputs are enabled: + +* ULP header (except when using ``-m``) +* The fields of each instruction and their values + +For example: + +.. code-block:: + + header + ULP magic : b'ulp\x00' (0x00706c75) + .text offset : 12 (0x0c) + .text size : 36 (0x24) + .data offset : 48 (0x30) + .data size : 4 (0x04) + .bss size : 0 (0x00) + ---------------------------------------- + .text + 0000 93008072 MOVE r3, 9 + dreg = 3 + imm = 9 + opcode = 7 + sel = 4 (MOV) + sreg = 0 + sub_opcode = 1 + unused = 0 + (...detail truncated...) + 0020 000000b0 HALT + opcode = 11 (0x0b) + unused = 0 + ---------------------------------------- + .data + 0000 00000000 + + +Disassembling on device +----------------------------- + +The disassembler also works when used on an ESP32. + +To use the disassembler on a real device: + +* ensure ``micropython-esp32-ulp`` is installed on the device (see `docs/index.rst `_). +* upload ``tools/disassemble.py`` to the device (any directory will do) +* run the following: + + .. code-block:: python + + from disassemble import disassemble_file + # then either: + disassemble_file('path/to/file.ulp') # normal mode + # or: + disassemble_file('path/to/file.ulp', True) # verbose mode diff --git a/docs/index.rst b/docs/index.rst index 82712ec..314bc68 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -136,6 +136,15 @@ found as part of Arduino/ESP-IDF projects. The preprocessor and how to use it is documented here: `Preprocessor support `_. +Disassembler +------------ +There is a disassembler for disassembling ULP binary code. This is mainly used to +inspect what instructions our assembler created, however it can be used to analyse +any ULP binaries. + +The disassembler and how to use it is documented here: `Disassembler `_. + + Limitations -----------