From ce82cbb0e2caa9bc2d947f6b6cb980eda14bb4fa Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 08:27:03 +0300
Subject: [PATCH 01/17] First crude version of a disassembler

Pass bytes from a hexdump in as command line arguments, eg:

micropython -m tools.disassemble 401f 0040

(If the byte sequence is not quoted, all args are joined together
into a single byte sequence. Spaces are allowed and will be ignored)
---
 tools/disassemble.py | 127 +++++++++++++++++++++++++++++++++++++++++++
 tools/esp32_ulp      |   1 +
 2 files changed, 128 insertions(+)
 create mode 100644 tools/disassemble.py
 create mode 120000 tools/esp32_ulp

diff --git a/tools/disassemble.py b/tools/disassemble.py
new file mode 100644
index 0000000..d67c86f
--- /dev/null
+++ b/tools/disassemble.py
@@ -0,0 +1,127 @@
+import esp32_ulp.opcodes as opcodes
+import ubinascii
+import sys
+
+
+def decode_instruction(i):
+    ins = opcodes._end
+    ins.all = i  # abuse a struct to get opcode and sub_opcode
+
+    print(ubinascii.hexlify(i.to_bytes(4, 'little')))
+
+    if ins.opcode == opcodes.OPCODE_ADC:
+        print('OPCODE_ADC')
+        opcodes._adc.all = i
+        ins = opcodes._adc
+    elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
+        print('OPCODE_ALU / SUB_OPCODE_ALU_CNT')
+        opcodes._alu_cnt.all = i
+        ins = opcodes._alu_cnt
+    elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_IMM:
+        print('OPCODE_ALU / SUB_OPCODE_ALU_IMM')
+        opcodes._alu_imm.all = i
+        ins = opcodes._alu_imm
+    elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_REG:
+        print('OPCODE_ALU / SUB_OPCODE_ALU_REG')
+        opcodes._alu_reg.all = i
+        ins = opcodes._alu_reg
+    elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BX:
+        print('JUMP')
+        opcodes._bx.all = i
+        ins = opcodes._bx
+    elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BR:
+        print('JUMPR')
+        opcodes._br.all = i
+        ins = opcodes._br
+    elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BS:
+        print('JUMPS')
+        opcodes._bs.all = i
+        ins = opcodes._bs
+    elif ins.opcode == opcodes.OPCODE_DELAY:
+        print('OPCODE_DELAY')
+        opcodes._delay.all = i
+        ins = opcodes._delay
+    elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_END:
+        print('OPCODE_END')
+        opcodes._end.all = i
+        ins = opcodes._end
+    elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_SLEEP:
+        print('OPCODE_SLEEP')
+        opcodes._sleep.all = i
+        ins = opcodes._sleep
+    elif ins.opcode == opcodes.OPCODE_HALT:
+        print('OPCODE_HALT')
+        opcodes._halt.all = i
+        ins = opcodes._halt
+    elif ins.opcode == opcodes.OPCODE_I2C:
+        print('OPCODE_I2C')
+        opcodes._i2c.all = i
+        ins = opcodes._i2c
+    elif ins.opcode == opcodes.OPCODE_LD:
+        print('OPCODE_LD')
+        opcodes._ld.all = i
+        ins = opcodes._ld
+    elif ins.opcode == opcodes.OPCODE_RD_REG:
+        print('OPCODE_RD_REG')
+        opcodes._rd_reg.all = i
+        ins = opcodes._rd_reg
+    elif ins.opcode == opcodes.OPCODE_ST:
+        print('OPCODE_ST')
+        opcodes._st.all = i
+        ins = opcodes._st
+    elif ins.opcode == opcodes.OPCODE_TSENS:
+        print('OPCODE_TSENS')
+        opcodes._tsens.all = i
+        ins = opcodes._tsens
+    elif ins.opcode == opcodes.OPCODE_WR_REG:
+        print('OPCODE_WR_REG')
+        opcodes._wr_reg.all = i
+        ins = opcodes._wr_reg
+
+    possible_fields = (
+        'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg',
+        'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode',
+        'periph_sel', 'reg', 'rw', 'sar_sel', 'sel', 'sign', 'sreg',
+        'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1',
+        'unused2', 'wakeup'
+    )
+    for field in possible_fields:
+        try:
+            # eval is ugly but constrained to possible_fields and variable ins
+            val = eval('i.%s' % field, {}, {'i': ins})
+        except KeyError:
+            continue
+        extra = ''
+        if field == 'sel':
+            if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
+                alu_ops = ('INC', 'DEC', 'RST')
+            else:
+                alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOV', 'LSH', 'RSH')
+            extra = ' (%s)' % alu_ops[val]
+        elif field == 'cmp':
+            cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT')
+            extra = ' (%s)' % cmp_ops[val]
+        print("  {:10} = {:3}{}".format(field, val, extra))
+
+
+def disassemble_manually(byte_sequence_string):
+    sequence = byte_sequence_string.strip().replace(' ','')
+    chars_per_instruction = 8
+    list = [
+        sequence[i:i+chars_per_instruction]
+        for i in range(0, len(sequence), chars_per_instruction)
+    ]
+
+    for instruction in list:
+        byte_sequence = ubinascii.unhexlify(instruction.replace(' ',''))
+        i = int.from_bytes(byte_sequence, 'little')
+        decode_instruction(i)
+
+
+def handle_cmdline(params):
+    byte_sequence = "".join(params)
+    disassemble_manually(byte_sequence)
+
+
+if sys.argv: # if run from cmdline
+    handle_cmdline(sys.argv[1:])
diff --git a/tools/esp32_ulp b/tools/esp32_ulp
new file mode 120000
index 0000000..0bc67d4
--- /dev/null
+++ b/tools/esp32_ulp
@@ -0,0 +1 @@
+../esp32_ulp
\ No newline at end of file

From 4c834e3dabad5d388a8581947dbc485568969d08 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 08:38:32 +0300
Subject: [PATCH 02/17] Replace crude disassembly with lookup table

In this approach, each opcode has its own decoding (using the correct
struct for each opcode). Each opcode (or opcode+subopcode) also has
its own rendering function.

The lookup table is hierarchical so the same structure used for opcodes
is also used within opcodes for looking up subopcodes.
---
 tools/disassemble.py | 184 ++++++++++++++++++++++++++-----------------
 1 file changed, 111 insertions(+), 73 deletions(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index d67c86f..fa55148 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -1,82 +1,122 @@
+from esp32_ulp.opcodes import RD_REG_PERIPH_RTC_CNTL, RD_REG_PERIPH_RTC_IO, RD_REG_PERIPH_RTC_I2C, \
+    RD_REG_PERIPH_SENS, DR_REG_MAX_DIRECT
 import esp32_ulp.opcodes as opcodes
+import esp32_ulp.soc as soc
 import ubinascii
 import sys
 
 
+alu_cnt_ops = ('STAGE_INC', 'STAGE_DEC', 'STAGE_RST')
+alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOVE', 'LSH', 'RSH')
+jump_types = ('--', 'EQ', 'OV')
+cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT')
+
+lookup = {
+    opcodes.OPCODE_ADC: ('ADC', opcodes._adc, lambda op: 'ADC r%s, %s, %s' % (op.dreg, op.mux, op.sar_sel)),
+    opcodes.OPCODE_ALU: ('ALU', opcodes._alu_imm, {
+        opcodes.SUB_OPCODE_ALU_CNT: (
+            'ALU_CNT',
+            opcodes._alu_cnt,
+            lambda op: '%s%s' % (alu_cnt_ops[op.sel], '' if op.sel == opcodes.ALU_SEL_RST else ' %s' % op.imm)
+        ),
+        opcodes.SUB_OPCODE_ALU_IMM: (
+            'ALU_IMM',
+            opcodes._alu_imm,
+            lambda op: '%s r%s, %s' % (alu_ops[op.sel], op.dreg, op.imm) if op.sel == opcodes.ALU_SEL_MOV
+                else '%s r%s, r%s, %s' % (alu_ops[op.sel], op.dreg, op.sreg, op.imm)
+        ),
+        opcodes.SUB_OPCODE_ALU_REG: (
+            'ALU_REG',
+            opcodes._alu_reg,
+            lambda op: '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg)
+        ),
+    }),
+    opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, {
+        opcodes.SUB_OPCODE_BX: (
+            'BX',
+            opcodes._bx,
+            lambda op: 'JUMP %s%s' % (op.addr if op.reg == 0 else 'r%s' % op.dreg, ', %s' % jump_types[op.type]
+                if op.type != 0 else '')
+        ),
+        opcodes.SUB_OPCODE_BR: (
+            'BR',
+            opcodes._br,
+            lambda op: 'JUMPR %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
+        ),
+        opcodes.SUB_OPCODE_BS: (
+            'BS',
+            opcodes._bs,
+            lambda op: 'JUMPS %s, %s, %s' % ('%s%s' % ('-' if op.sign == 1 else '', op.offset), op.imm, cmp_ops[op.cmp])
+        ),
+    }),
+    opcodes.OPCODE_DELAY: (
+        'DELAY',
+        opcodes._delay,
+        lambda op: 'NOP' if op.cycles == 0 else 'WAIT %s' % op.cycles
+    ),
+    opcodes.OPCODE_END: ('END', opcodes._end, {
+        opcodes.SUB_OPCODE_END: (
+            'WAKE',
+            opcodes._end
+        ),
+        opcodes.SUB_OPCODE_SLEEP: (
+            'SLEEP',
+            opcodes._sleep,
+            lambda op: 'SLEEP %s' % op.cycle_sel
+        ),
+    }),
+    opcodes.OPCODE_HALT: ('HALT', opcodes._halt),
+    opcodes.OPCODE_I2C: (
+        'I2C',
+        opcodes._i2c,
+        lambda op: 'I2C_%s %s, %s, %s, %s' % ('RD' if op.rw == 0 else 'WR', op.sub_addr, op.high, op.low, op.i2c_sel)
+    ),
+    opcodes.OPCODE_LD: ('LD', opcodes._ld, lambda op: 'LD r%s, r%s, %s' % (op.dreg, op.sreg, op.offset)),
+    opcodes.OPCODE_ST: ('ST', opcodes._st, lambda op: 'ST r%s, r%s, %s' % (op.sreg, op.dreg, op.offset)),
+    opcodes.OPCODE_RD_REG: (
+        'RD_REG',
+        opcodes._rd_reg,
+        lambda op: 'REG_RD 0x%x, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low)
+    ),
+    opcodes.OPCODE_WR_REG: (
+        'WR_REG',
+        opcodes._wr_reg,
+        lambda op: 'REG_WR 0x%x, %s, %s, %s' % (op.periph_sel << 8 | op.addr, op.high, op.low, op.data)
+    ),
+    opcodes.OPCODE_TSENS: ('TSENS', opcodes._tsens, lambda op: 'TSENS r%s, %s' % (op.dreg, op.delay)),
+}
+
+
 def decode_instruction(i):
     ins = opcodes._end
-    ins.all = i  # abuse a struct to get opcode and sub_opcode
+    ins.all = i  # abuse a struct to get opcode
 
     print(ubinascii.hexlify(i.to_bytes(4, 'little')))
 
-    if ins.opcode == opcodes.OPCODE_ADC:
-        print('OPCODE_ADC')
-        opcodes._adc.all = i
-        ins = opcodes._adc
-    elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
-        print('OPCODE_ALU / SUB_OPCODE_ALU_CNT')
-        opcodes._alu_cnt.all = i
-        ins = opcodes._alu_cnt
-    elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_IMM:
-        print('OPCODE_ALU / SUB_OPCODE_ALU_IMM')
-        opcodes._alu_imm.all = i
-        ins = opcodes._alu_imm
-    elif ins.opcode == opcodes.OPCODE_ALU and ins.sub_opcode == opcodes.SUB_OPCODE_ALU_REG:
-        print('OPCODE_ALU / SUB_OPCODE_ALU_REG')
-        opcodes._alu_reg.all = i
-        ins = opcodes._alu_reg
-    elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BX:
-        print('JUMP')
-        opcodes._bx.all = i
-        ins = opcodes._bx
-    elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BR:
-        print('JUMPR')
-        opcodes._br.all = i
-        ins = opcodes._br
-    elif ins.opcode == opcodes.OPCODE_BRANCH and ins.sub_opcode == opcodes.SUB_OPCODE_BS:
-        print('JUMPS')
-        opcodes._bs.all = i
-        ins = opcodes._bs
-    elif ins.opcode == opcodes.OPCODE_DELAY:
-        print('OPCODE_DELAY')
-        opcodes._delay.all = i
-        ins = opcodes._delay
-    elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_END:
-        print('OPCODE_END')
-        opcodes._end.all = i
-        ins = opcodes._end
-    elif ins.opcode == opcodes.OPCODE_END and ins.sub_opcode == opcodes.SUB_OPCODE_SLEEP:
-        print('OPCODE_SLEEP')
-        opcodes._sleep.all = i
-        ins = opcodes._sleep
-    elif ins.opcode == opcodes.OPCODE_HALT:
-        print('OPCODE_HALT')
-        opcodes._halt.all = i
-        ins = opcodes._halt
-    elif ins.opcode == opcodes.OPCODE_I2C:
-        print('OPCODE_I2C')
-        opcodes._i2c.all = i
-        ins = opcodes._i2c
-    elif ins.opcode == opcodes.OPCODE_LD:
-        print('OPCODE_LD')
-        opcodes._ld.all = i
-        ins = opcodes._ld
-    elif ins.opcode == opcodes.OPCODE_RD_REG:
-        print('OPCODE_RD_REG')
-        opcodes._rd_reg.all = i
-        ins = opcodes._rd_reg
-    elif ins.opcode == opcodes.OPCODE_ST:
-        print('OPCODE_ST')
-        opcodes._st.all = i
-        ins = opcodes._st
-    elif ins.opcode == opcodes.OPCODE_TSENS:
-        print('OPCODE_TSENS')
-        opcodes._tsens.all = i
-        ins = opcodes._tsens
-    elif ins.opcode == opcodes.OPCODE_WR_REG:
-        print('OPCODE_WR_REG')
-        opcodes._wr_reg.all = i
-        ins = opcodes._wr_reg
+    params = lookup.get(ins.opcode, None)
+
+    if not params:
+        print('Unknown instruction')
+        return
+
+    if len(params) == 3:
+        name, ins, third = params
+        ins.all = i
+
+        if callable(third):
+            params = (third(ins), ins)
+        else:
+            params = third.get(ins.sub_opcode, ())
+
+    if len(params) == 3:
+        name, ins, pretty = params
+        ins.all = i
+        name = pretty(ins)
+    else:
+        name, ins = params
+        ins.all = i
+
+    print(name)
 
     possible_fields = (
         'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg',
@@ -94,12 +134,10 @@ def decode_instruction(i):
         extra = ''
         if field == 'sel':
             if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
-                alu_ops = ('INC', 'DEC', 'RST')
+                extra = ' (%s)' % alu_cnt_ops[val]
             else:
-                alu_ops = ('ADD', 'SUB', 'AND', 'OR', 'MOV', 'LSH', 'RSH')
-            extra = ' (%s)' % alu_ops[val]
+                extra = ' (%s)' % alu_ops[val]
         elif field == 'cmp':
-            cmp_ops = ('LT', 'GE', 'LE', 'EQ', 'GT')
             extra = ' (%s)' % cmp_ops[val]
         print("  {:10} = {:3}{}".format(field, val, extra))
 

From 8325c2bddc8573715649a5b2f9ca12e9d68d296c Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 08:47:16 +0300
Subject: [PATCH 03/17] Add command line handling, implementing help (-h)

---
 tools/disassemble.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index fa55148..fc35bf0 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -156,8 +156,27 @@ def disassemble_manually(byte_sequence_string):
         decode_instruction(i)
 
 
+def print_help():
+    print('Usage: disassemble.py [<options>] <byte_sequence>')
+    print('')
+    print('Options:')
+    print('  -h               Show this help text')
+    print('  <byte_sequence>  Sequence of hex bytes (8 per instruction)')
+    pass
+
+
 def handle_cmdline(params):
-    byte_sequence = "".join(params)
+    byte_sequence = ''
+
+    while params:
+        if params[0] == '-h':
+            print_help()
+            sys.exit(0)
+        else:
+            byte_sequence += params[0]
+
+        params = params[1:]  # remove first param from list
+
     disassemble_manually(byte_sequence)
 
 

From e4b34e2a6ae917e280a3c39525a50fe3cde0ecd5 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 19:58:34 +0300
Subject: [PATCH 04/17] Tease apart decoding of instruction and printing. Add
 unit tests.

---
 tests/00_unit_tests.sh |   2 +-
 tests/disassemble.py   | 138 +++++++++++++++++++++++++++++++++++++++++
 tests/tools            |   1 +
 tools/disassemble.py   |  28 ++++++---
 4 files changed, 160 insertions(+), 9 deletions(-)
 create mode 100644 tests/disassemble.py
 create mode 120000 tests/tools

diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh
index ee1a239..b56fd3c 100755
--- a/tests/00_unit_tests.sh
+++ b/tests/00_unit_tests.sh
@@ -4,7 +4,7 @@
 
 set -e
 
-for file in opcodes assemble link util preprocess definesdb; do
+for file in opcodes assemble link util preprocess definesdb disassemble; do
     echo testing $file...
     micropython $file.py
 done
diff --git a/tests/disassemble.py b/tests/disassemble.py
new file mode 100644
index 0000000..5cd0fc6
--- /dev/null
+++ b/tests/disassemble.py
@@ -0,0 +1,138 @@
+from tools.disassemble import decode_instruction
+import esp32_ulp.opcodes as opcodes
+import ubinascii
+
+tests = []
+
+
+def test(param):
+    tests.append(param)
+
+
+def hex_to_int(sequence):
+    byte_sequence = ubinascii.unhexlify(sequence)
+    return int.from_bytes(byte_sequence, 'little')
+
+
+def assert_decode(sequence, expected_struct, expected_name):
+    i = hex_to_int(sequence)
+
+    ins, name = decode_instruction(i)
+
+    assert ins is expected_struct, 'incorrect instruction struct'
+    assert name == expected_name, '%s != %s' % (name, expected_name)
+
+
+def assert_decode_exception(sequence, expected_message):
+    i = hex_to_int(sequence)
+
+    try:
+        decode_instruction(i)
+    except Exception as e:
+        assert str(e) == expected_message, str(e)
+        raised = True
+    else:
+        raised = False
+
+    assert raised, 'Exception not raised'
+
+
+@test
+def test_unknown_instruction():
+    assert_decode_exception("10000001", 'Unknown instruction')
+
+
+# All hex sequences were generated using our assembler.
+# Note: disassembled instructions always show field values according
+# to what is actually encoded into the binary instruction, not as per
+# original assembly code.
+# For example in JUMP instructions in the source code one would
+# specify jump offsets in bytes (e.g. 4 bytes) but in the actual
+# instruction offset encoded in the binary instruction will be in
+# words (1 word = 4 bytes).
+# The disassembled instructions would therefore show as "JUMP 1"
+# for what was originally "JUMP 4" in the source code.@test
+@test
+def test_all_instructions():
+    # OPCODE_WR_REG = 1
+    assert_decode("00000010", opcodes._wr_reg, 'REG_WR 0x0, 0, 0, 0')
+
+    # OPCODE_RD_REG = 2
+    assert_decode("00000020", opcodes._rd_reg, 'REG_RD 0x0, 0, 0')
+
+    # OPCODE_I2C = 3
+    assert_decode("00000030", opcodes._i2c, 'I2C_RD 0, 0, 0, 0')
+    assert_decode("00000038", opcodes._i2c, 'I2C_WR 0, 0, 0, 0')
+
+    # OPCODE_DELAY = 4
+    assert_decode("00000040", opcodes._delay, 'NOP')
+    assert_decode("01000040", opcodes._delay, 'WAIT 1')
+
+    # OPCODE_ADC = 5
+    assert_decode("00000050", opcodes._adc, 'ADC r0, 0, 0')
+
+    # OPCODE_ST = 6
+    assert_decode("00000068", opcodes._st, 'ST r0, r0, 0')
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_REG
+    assert_decode("00000070", opcodes._alu_reg, 'ADD r0, r0, r0')
+    assert_decode("00002070", opcodes._alu_reg, 'SUB r0, r0, r0')
+    assert_decode("00004070", opcodes._alu_reg, 'AND r0, r0, r0')
+    assert_decode("00006070", opcodes._alu_reg, 'OR r0, r0, r0')
+    assert_decode("00008070", opcodes._alu_reg, "MOVE r0, r0")
+    assert_decode("0000a070", opcodes._alu_reg, 'LSH r0, r0, r0')
+    assert_decode("0000c070", opcodes._alu_reg, 'RSH r0, r0, r0')
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_IMM
+    assert_decode("00000072", opcodes._alu_imm, 'ADD r0, r0, 0')
+    assert_decode("00002072", opcodes._alu_imm, 'SUB r0, r0, 0')
+    assert_decode("00004072", opcodes._alu_imm, 'AND r0, r0, 0')
+    assert_decode("00006072", opcodes._alu_imm, 'OR r0, r0, 0')
+    assert_decode("00008072", opcodes._alu_imm, "MOVE r0, 0")
+    assert_decode("0000a072", opcodes._alu_imm, 'LSH r0, r0, 0')
+    assert_decode("0000c072", opcodes._alu_imm, 'RSH r0, r0, 0')
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_CNT
+    assert_decode("00004074", opcodes._alu_cnt, 'STAGE_RST')
+    assert_decode("00000074", opcodes._alu_cnt, 'STAGE_INC 0')
+    assert_decode("00002074", opcodes._alu_cnt, 'STAGE_DEC 0')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX (IMM)
+    assert_decode("00000080", opcodes._bx, 'JUMP 0')
+    assert_decode("00004080", opcodes._bx, 'JUMP 0, EQ')
+    assert_decode("00008080", opcodes._bx, 'JUMP 0, OV')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX (REG)
+    assert_decode("00002080", opcodes._bx, 'JUMP r0')
+    assert_decode("00006080", opcodes._bx, 'JUMP r0, EQ')
+    assert_decode("0000a080", opcodes._bx, 'JUMP r0, OV')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BR
+    assert_decode("00000082", opcodes._br, 'JUMPR 0, 0, LT')
+    assert_decode("00000182", opcodes._br, 'JUMPR 0, 0, GE')
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX
+    assert_decode("00000084", opcodes._bs, 'JUMPS 0, 0, LT')
+    assert_decode("00800084", opcodes._bs, 'JUMPS 0, 0, GE')
+    assert_decode("00000184", opcodes._bs, 'JUMPS 0, 0, LE')
+
+    # OPCODE_END = 9, SUB_OPCODE_END
+    assert_decode("01000090", opcodes._end, 'WAKE')
+
+    # OPCODE_END = 9, SUB_OPCODE_SLEEP
+    assert_decode("00000092", opcodes._sleep, 'SLEEP 0')
+
+    # OPCODE_TSENS = 10
+    assert_decode("000000a0", opcodes._tsens, 'TSENS r0, 0')
+
+    # OPCODE_HALT = 11
+    assert_decode("000000b0", opcodes._halt, 'HALT')
+
+    # OPCODE_LD = 13
+    assert_decode("000000d0", opcodes._ld, 'LD r0, r0, 0')
+
+
+if __name__ == '__main__':
+    # run all methods marked with @test
+    for t in tests:
+        t()
diff --git a/tests/tools b/tests/tools
new file mode 120000
index 0000000..4887d6e
--- /dev/null
+++ b/tests/tools
@@ -0,0 +1 @@
+../tools
\ No newline at end of file
diff --git a/tools/disassemble.py b/tools/disassemble.py
index fc35bf0..184b128 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -28,7 +28,8 @@
         opcodes.SUB_OPCODE_ALU_REG: (
             'ALU_REG',
             opcodes._alu_reg,
-            lambda op: '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg)
+            lambda op: '%s r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg) if op.sel == opcodes.ALU_SEL_MOV
+                else '%s r%s, r%s, r%s' % (alu_ops[op.sel], op.dreg, op.sreg, op.treg)
         ),
     }),
     opcodes.OPCODE_BRANCH: ('BRANCH', opcodes._bx, {
@@ -91,13 +92,10 @@ def decode_instruction(i):
     ins = opcodes._end
     ins.all = i  # abuse a struct to get opcode
 
-    print(ubinascii.hexlify(i.to_bytes(4, 'little')))
-
     params = lookup.get(ins.opcode, None)
 
     if not params:
-        print('Unknown instruction')
-        return
+        raise Exception('Unknown instruction')
 
     if len(params) == 3:
         name, ins, third = params
@@ -116,6 +114,18 @@ def decode_instruction(i):
         name, ins = params
         ins.all = i
 
+    return ins, name
+
+
+def decode_instruction_and_print(i):
+    print(ubinascii.hexlify(i.to_bytes(4, 'little')))
+
+    try:
+        ins, name = decode_instruction(i)
+    except Exception as e:
+        print(e)
+        return
+
     print(name)
 
     possible_fields = (
@@ -132,12 +142,14 @@ def decode_instruction(i):
         except KeyError:
             continue
         extra = ''
-        if field == 'sel':
+        if field == 'sel':  # ALU
             if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
                 extra = ' (%s)' % alu_cnt_ops[val]
             else:
                 extra = ' (%s)' % alu_ops[val]
-        elif field == 'cmp':
+        elif field == 'type':  # JUMP
+            extra = ' (%s)' % jump_types[val]
+        elif field == 'cmp':  # JUMPR/JUMPS
             extra = ' (%s)' % cmp_ops[val]
         print("  {:10} = {:3}{}".format(field, val, extra))
 
@@ -153,7 +165,7 @@ def disassemble_manually(byte_sequence_string):
     for instruction in list:
         byte_sequence = ubinascii.unhexlify(instruction.replace(' ',''))
         i = int.from_bytes(byte_sequence, 'little')
-        decode_instruction(i)
+        decode_instruction_and_print(i)
 
 
 def print_help():

From 2ebaacc540ecdb128cc6e396d4a1fed39c0434b7 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 20:00:07 +0300
Subject: [PATCH 05/17] Allow choosing which unit tests to run

Useful for running just one unit test file instead of all.

Now one can pass the name of a unit test (or a list of names)
to the 00_unit_tests.sh script.

Example:
  cd tests
  ./00_unit_tests.sh disassemble  # run only disassemble.py

The default (if nothing is passed the script) is still to run
all tests as before.
---
 tests/00_unit_tests.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/00_unit_tests.sh b/tests/00_unit_tests.sh
index b56fd3c..1dc05e9 100755
--- a/tests/00_unit_tests.sh
+++ b/tests/00_unit_tests.sh
@@ -4,7 +4,9 @@
 
 set -e
 
-for file in opcodes assemble link util preprocess definesdb disassemble; do
+LIST=${1:-opcodes assemble link util preprocess definesdb disassemble}
+
+for file in $LIST; do
     echo testing $file...
     micropython $file.py
 done

From 278bbf016534a11fad3b07b44e97f656c5b64459 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 19:35:07 +0300
Subject: [PATCH 06/17] Add unit tests for field level output

---
 tests/disassemble.py | 444 ++++++++++++++++++++++++++++++++++++++++++-
 tools/disassemble.py |  31 +--
 2 files changed, 463 insertions(+), 12 deletions(-)

diff --git a/tests/disassemble.py b/tests/disassemble.py
index 5cd0fc6..012fc14 100644
--- a/tests/disassemble.py
+++ b/tests/disassemble.py
@@ -1,4 +1,4 @@
-from tools.disassemble import decode_instruction
+from tools.disassemble import decode_instruction, get_instruction_fields
 import esp32_ulp.opcodes as opcodes
 import ubinascii
 
@@ -37,6 +37,16 @@ def assert_decode_exception(sequence, expected_message):
     assert raised, 'Exception not raised'
 
 
+def assert_decode_fields(sequence, expected_field_details):
+    i = hex_to_int(sequence)
+
+    ins, _ = decode_instruction(i)
+
+    actual_field_details = get_instruction_fields(ins)
+
+    assert actual_field_details == expected_field_details, '\n- %s \n+ %s' % (actual_field_details, expected_field_details)
+
+
 @test
 def test_unknown_instruction():
     assert_decode_exception("10000001", 'Unknown instruction')
@@ -132,6 +142,438 @@ def test_all_instructions():
     assert_decode("000000d0", opcodes._ld, 'LD r0, r0, 0')
 
 
+@test
+def test_instruction_field_decoding():
+    # OPCODE_WR_REG = 1
+    assert_decode_fields("000c8810", [  # REG_WR 0x0, 1, 2, 3
+        ('addr'      ,  0, ''),
+        ('data'      ,  3, ''),
+        ('high'      ,  1, ''),
+        ('low'       ,  2, ''),
+        ('opcode'    ,  1, ''),
+        ('periph_sel',  0, ''),
+    ])
+
+    # OPCODE_RD_REG = 2
+    assert_decode_fields("03000421", [  # REG_RD 0x3, 2, 1
+        ('addr'      ,  3, ''),
+        ('high'      ,  2, ''),
+        ('low'       ,  1, ''),
+        ('opcode'    ,  2, ''),
+        ('periph_sel',  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_I2C = 3
+    assert_decode_fields("03001130", [  # I2C_RD 3, 2, 1, 0
+        ('data'      ,  0, ''),
+        ('high'      ,  2, ''),
+        ('i2c_sel'   ,  0, ''),
+        ('low'       ,  1, ''),
+        ('opcode'    ,  3, ''),
+        ('rw'        ,  0, ''),
+        ('sub_addr'  ,  3, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("00011339", [  # I2C_WR 0, 2, 3, 4
+        ('data'      ,  1, ''),
+        ('high'      ,  2, ''),
+        ('i2c_sel'   ,  4, ''),
+        ('low'       ,  3, ''),
+        ('opcode'    ,  3, ''),
+        ('rw'        ,  1, ''),
+        ('sub_addr'  ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_DELAY = 4
+    assert_decode_fields("00000040", [  # NOP
+        ('cycles'    ,  0, ''),
+        ('opcode'    ,  4, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("07000040", [  # WAIT 7
+        ('cycles'    ,  7, ''),
+        ('opcode'    ,  4, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_ADC = 5
+    assert_decode_fields("07000050", [  # ADC r3, 1, 0
+        ('cycles'    ,  0, ''),
+        ('dreg'      ,  3, ''),
+        ('mux'       ,  1, ''),
+        ('opcode'    ,  5, ''),
+        ('sar_sel'   ,  0, ''),
+        ('unused1'   ,  0, ''),
+        ('unused2'   ,  0, ''),
+    ])
+
+    # OPCODE_ST = 6
+    assert_decode_fields("0b000068", [  # ST r3, r2, 0
+        ('dreg'      ,  2, ''),
+        ('offset'    ,  0, ''),
+        ('opcode'    ,  6, ''),
+        ('sreg'      ,  3, ''),
+        ('sub_opcode',  4, ''),
+        ('unused1'   ,  0, ''),
+        ('unused2'   ,  0, ''),
+    ])
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_REG
+    assert_decode_fields("06000070", [  # ADD r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  0, ' (ADD)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06002070", [  # SUB r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  1, ' (SUB)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06004070", [  # AND r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  2, ' (AND)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06006070", [  # OR r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  3, ' (OR)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0600a070", [  # LSH r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  5, ' (LSH)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0600c070", [  # RSH r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  6, ' (RSH)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06000070", [  # ADD r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  0, ' (ADD)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06002070", [  # SUB r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  1, ' (SUB)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06004070", [  # AND r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  2, ' (AND)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06006070", [  # OR r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  3, ' (OR)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("16008070", [  # MOVE r2, r1
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  4, ' (MOVE)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0600a070", [  # LSH r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  5, ' (LSH)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0600c070", [  # RSH r2, r1, r0
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  6, ' (RSH)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('treg'      ,  0, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_IMM
+    assert_decode_fields("06000072", [  # ADD r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  0, ' (ADD)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06002072", [  # SUB r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  1, ' (SUB)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06004072", [  # AND r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  2, ' (AND)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("06006072", [  # OR r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  3, ' (OR)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("01008072", [  # MOVE r1, 0
+        ('dreg'      ,  1, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  4, ' (MOVE)'),
+        ('sreg'      ,  0, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0600a072", [  # LSH r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  5, ' (LSH)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0600c072", [  # RSH r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  6, ' (RSH)'),
+        ('sreg'      ,  1, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_ALU = 7, SUB_OPCODE_ALU_CNT
+    assert_decode_fields("00004074", [  # STAGE_RST
+        ('imm'       ,  0, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  2, ' (STAGE_RST)'),
+        ('sub_opcode',  2, ''),
+        ('unused1'   ,  0, ''),
+        ('unused2'   ,  0, ''),
+    ])
+    assert_decode_fields("70000074", [  # STAGE_INC 7
+        ('imm'       ,  7, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  0, ' (STAGE_INC)'),
+        ('sub_opcode',  2, ''),
+        ('unused1'   ,  0, ''),
+        ('unused2'   ,  0, ''),
+    ])
+    assert_decode_fields("30002074", [  # STAGE_DEC 3
+        ('imm'       ,  3, ''),
+        ('opcode'    ,  7, ''),
+        ('sel'       ,  1, ' (STAGE_DEC)'),
+        ('sub_opcode',  2, ''),
+        ('unused1'   ,  0, ''),
+        ('unused2'   ,  0, ''),
+    ])
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BX
+    assert_decode_fields("00002080", [  # JUMP r0
+        ('addr'      ,  0, ''),
+        ('dreg'      ,  0, ''),
+        ('opcode'    ,  8, ''),
+        ('reg'       ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('type'      ,  0, ' (--)'),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("01006080", [  # JUMP r1, EQ
+        ('addr'      ,  0, ''),
+        ('dreg'      ,  1, ''),
+        ('opcode'    ,  8, ''),
+        ('reg'       ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('type'      ,  1, ' (EQ)'),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("0200a080", [  # JUMP r2, OV
+        ('addr'      ,  0, ''),
+        ('dreg'      ,  2, ''),
+        ('opcode'    ,  8, ''),
+        ('reg'       ,  1, ''),
+        ('sub_opcode',  0, ''),
+        ('type'      ,  2, ' (OV)'),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("00000080", [  # JUMP 0
+        ('addr'      ,  0, ''),
+        ('dreg'      ,  0, ''),
+        ('opcode'    ,  8, ''),
+        ('reg'       ,  0, ''),
+        ('sub_opcode',  0, ''),
+        ('type'      ,  0, ' (--)'),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("04004080", [  # JUMP 1, EQ
+        ('addr'      ,  1, ''),
+        ('dreg'      ,  0, ''),
+        ('opcode'    ,  8, ''),
+        ('reg'       ,  0, ''),
+        ('sub_opcode',  0, ''),
+        ('type'      ,  1, ' (EQ)'),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("08008080", [  # JUMP 2, OV
+        ('addr'      ,  2, ''),
+        ('dreg'      ,  0, ''),
+        ('opcode'    ,  8, ''),
+        ('reg'       ,  0, ''),
+        ('sub_opcode',  0, ''),
+        ('type'      ,  2, ' (OV)'),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BR
+    assert_decode_fields("01000082", [  # JUMPR 0, 1, LT
+        ('cmp'       ,  0, ' (LT)'),
+        ('imm'       ,  1, ''),
+        ('offset'    ,  0, ''),
+        ('opcode'    ,  8, ''),
+        ('sign'      ,  0, ''),
+        ('sub_opcode',  1, ''),
+    ])
+    assert_decode_fields("05000382", [  # JUMPR 1, 5, GE
+        ('cmp'       ,  1, ' (GE)'),
+        ('imm'       ,  5, ''),
+        ('offset'    ,  1, ''),
+        ('opcode'    ,  8, ''),
+        ('sign'      ,  0, ''),
+        ('sub_opcode',  1, ''),
+    ])
+
+    # OPCODE_BRANCH = 8, SUB_OPCODE_BS
+    assert_decode_fields("01000084", [  # JUMPS 0, 1, LT
+        ('cmp'       ,  0, ' (LT)'),
+        ('imm'       ,  1, ''),
+        ('offset'    ,  0, ''),
+        ('opcode'    ,  8, ''),
+        ('sign'      ,  0, ''),
+        ('sub_opcode',  2, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("05800284", [  # JUMPS 1, 5, GE
+        ('cmp'       ,  1, ' (GE)'),
+        ('imm'       ,  5, ''),
+        ('offset'    ,  1, ''),
+        ('opcode'    ,  8, ''),
+        ('sign'      ,  0, ''),
+        ('sub_opcode',  2, ''),
+        ('unused'    ,  0, ''),
+    ])
+    assert_decode_fields("09000584", [  # JUMPS 2, 9, LE
+        ('cmp'       ,  2, ' (LE)'),
+        ('imm'       ,  9, ''),
+        ('offset'    ,  2, ''),
+        ('opcode'    ,  8, ''),
+        ('sign'      ,  0, ''),
+        ('sub_opcode',  2, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_END = 9, SUB_OPCODE_END
+    assert_decode_fields("01000090", [  # WAKE
+        ('opcode'    ,  9, ''),
+        ('sub_opcode',  0, ''),
+        ('unused'    ,  0, ''),
+        ('wakeup'    ,  1, ''),
+    ])
+
+    # OPCODE_END = 9, SUB_OPCODE_SLEEP
+    assert_decode_fields("07000092", [  # SLEEP 7
+        ('cycle_sel' ,  7, ''),
+        ('opcode'    ,  9, ''),
+        ('sub_opcode',  1, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_TSENS = 10
+    assert_decode_fields("090000a0", [  # TSENS r0, 0
+        ('delay'     ,  2, ''),
+        ('dreg'      ,  1, ''),
+        ('opcode'    , 10, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_HALT = 11
+    assert_decode_fields("000000b0", [  # HALT
+        ('opcode'    , 11, ''),
+        ('unused'    ,  0, ''),
+    ])
+
+    # OPCODE_LD = 13
+    assert_decode_fields("060000d0", [  # LD r2, r1, 0
+        ('dreg'      ,  2, ''),
+        ('offset'    ,  0, ''),
+        ('opcode'    , 13, ''),
+        ('sreg'      ,  1, ''),
+        ('unused1'   ,  0, ''),
+        ('unused2'   ,  0, ''),
+    ])
+
+
 if __name__ == '__main__':
     # run all methods marked with @test
     for t in tests:
diff --git a/tools/disassemble.py b/tools/disassemble.py
index 184b128..ee992e6 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -117,17 +117,7 @@ def decode_instruction(i):
     return ins, name
 
 
-def decode_instruction_and_print(i):
-    print(ubinascii.hexlify(i.to_bytes(4, 'little')))
-
-    try:
-        ins, name = decode_instruction(i)
-    except Exception as e:
-        print(e)
-        return
-
-    print(name)
-
+def get_instruction_fields(ins):
     possible_fields = (
         'addr', 'cmp', 'cycle_sel', 'cycles', 'data', 'delay', 'dreg',
         'high', 'i2c_sel', 'imm', 'low', 'mux', 'offset', 'opcode',
@@ -135,6 +125,7 @@ def decode_instruction_and_print(i):
         'sub_addr', 'sub_opcode', 'treg', 'type', 'unused', 'unused1',
         'unused2', 'wakeup'
     )
+    field_details = []
     for field in possible_fields:
         try:
             # eval is ugly but constrained to possible_fields and variable ins
@@ -151,6 +142,24 @@ def decode_instruction_and_print(i):
             extra = ' (%s)' % jump_types[val]
         elif field == 'cmp':  # JUMPR/JUMPS
             extra = ' (%s)' % cmp_ops[val]
+
+        field_details.append((field, val, extra))
+
+    return field_details
+
+
+def decode_instruction_and_print(i):
+    print(ubinascii.hexlify(i.to_bytes(4, 'little')))
+
+    try:
+        ins, name = decode_instruction(i)
+    except Exception as e:
+        print(e)
+        return
+
+    print(name)
+
+    for field, val, extra in get_instruction_fields(ins):
         print("  {:10} = {:3}{}".format(field, val, extra))
 
 

From 6720584b3a75232b0bcaba897295024b7eca2571 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 19:58:58 +0300
Subject: [PATCH 07/17] Show empty "instructions" as <empty>

These are likely memory left empty for storing data.
---
 tests/disassemble.py | 5 +++++
 tools/disassemble.py | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/tests/disassemble.py b/tests/disassemble.py
index 012fc14..4a8ad24 100644
--- a/tests/disassemble.py
+++ b/tests/disassemble.py
@@ -52,6 +52,11 @@ def test_unknown_instruction():
     assert_decode_exception("10000001", 'Unknown instruction')
 
 
+@test
+def test_empty_instruction():
+    assert_decode_exception("00000000", '<empty>')
+
+
 # All hex sequences were generated using our assembler.
 # Note: disassembled instructions always show field values according
 # to what is actually encoded into the binary instruction, not as per
diff --git a/tools/disassemble.py b/tools/disassemble.py
index ee992e6..bdf3c18 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -89,6 +89,9 @@
 
 
 def decode_instruction(i):
+    if i == 0:
+        raise Exception('<empty>')
+
     ins = opcodes._end
     ins.all = i  # abuse a struct to get opcode
 

From 2a06a54874915cc8d2e1ea614b16564eacc97722 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Mon, 26 Jun 2023 23:46:23 +0300
Subject: [PATCH 08/17] Add support for disassembling a provided file

The original "manual disassembling" now requires the "-m" option,
followed by the sequence of hex digits representing the instructions.

The sequence of hex digits does not need to be quoted. All parameters
after -m will be joined together into a sequence of hex digits.
---
 tools/disassemble.py | 61 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 6 deletions(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index bdf3c18..f670f1f 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -151,6 +151,17 @@ def get_instruction_fields(ins):
     return field_details
 
 
+def chunk_into_words(code, bytes_per_word, byteorder):
+    chunks = [
+        ubinascii.hexlify(code[i:i + bytes_per_word])
+        for i in range(0, len(code), bytes_per_word)
+    ]
+
+    words = [int.from_bytes(ubinascii.unhexlify(i), byteorder) for i in chunks]
+
+    return words
+
+
 def decode_instruction_and_print(i):
     print(ubinascii.hexlify(i.to_bytes(4, 'little')))
 
@@ -180,28 +191,66 @@ def disassemble_manually(byte_sequence_string):
         decode_instruction_and_print(i)
 
 
+def disassemble_file(filename):
+    with open(filename, 'rb') as f:
+        data = f.read()
+
+    code = data[12:]  # text_offset (where code starts) is always 12 for ULP binaries
+    words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
+
+    for i in words:
+        decode_instruction_and_print(i)
+
+
 def print_help():
-    print('Usage: disassemble.py [<options>] <byte_sequence>')
+    print('Usage: disassemble.py [<options>] [-m <byte_sequence> | <filename>]')
     print('')
     print('Options:')
-    print('  -h               Show this help text')
-    print('  <byte_sequence>  Sequence of hex bytes (8 per instruction)')
+    print('  -h                  Show this help text')
+    print('  -m <byte_sequence>  Sequence of hex bytes (8 per instruction)')
+    print('  <filename>          Path to ULP binary')
     pass
 
 
 def handle_cmdline(params):
-    byte_sequence = ''
+    filename = None
+    byte_sequence = None
 
     while params:
         if params[0] == '-h':
             print_help()
             sys.exit(0)
+        elif params[0] == '-m':
+            if len(params) == 1:
+                print_help()
+                sys.exit(1)
+            params = params[1:] # remove -m from list
+
+            sequence_len = len(params)
+            for i in range(0, len(params)):
+                if params[i][0] == '-':  # start of a next option
+                    sequence_len = i-1
+                    break
+
+            if sequence_len < 0:
+                print_help()
+                sys.exit(1)
+
+            byte_sequence = "".join(params[:sequence_len+1])
+            params = params[sequence_len:]
+        elif params[0][0] == '-':
+            # ignore unknown options for now
+            pass
         else:
-            byte_sequence += params[0]
+            if not filename:
+                filename = params[0]
 
         params = params[1:]  # remove first param from list
 
-    disassemble_manually(byte_sequence)
+    if byte_sequence:
+        disassemble_manually(byte_sequence)
+    elif filename:
+        disassemble_file(filename)
 
 
 if sys.argv: # if run from cmdline

From a4867e8fed7b71a936c1acc8d06f406efc272e66 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 18:18:09 +0300
Subject: [PATCH 09/17] Add verbose option. Hide field level detail when not
 verbose.

---
 tools/disassemble.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index f670f1f..d4dbdb7 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -162,7 +162,7 @@ def chunk_into_words(code, bytes_per_word, byteorder):
     return words
 
 
-def decode_instruction_and_print(i):
+def decode_instruction_and_print(i, verbose=False):
     print(ubinascii.hexlify(i.to_bytes(4, 'little')))
 
     try:
@@ -173,11 +173,12 @@ def decode_instruction_and_print(i):
 
     print(name)
 
-    for field, val, extra in get_instruction_fields(ins):
-        print("  {:10} = {:3}{}".format(field, val, extra))
+    if verbose:
+        for field, val, extra in get_instruction_fields(ins):
+            print("  {:10} = {:3}{}".format(field, val, extra))
 
 
-def disassemble_manually(byte_sequence_string):
+def disassemble_manually(byte_sequence_string, verbose=False):
     sequence = byte_sequence_string.strip().replace(' ','')
     chars_per_instruction = 8
     list = [
@@ -188,10 +189,10 @@ def disassemble_manually(byte_sequence_string):
     for instruction in list:
         byte_sequence = ubinascii.unhexlify(instruction.replace(' ',''))
         i = int.from_bytes(byte_sequence, 'little')
-        decode_instruction_and_print(i)
+        decode_instruction_and_print(i, verbose)
 
 
-def disassemble_file(filename):
+def disassemble_file(filename, verbose=False):
     with open(filename, 'rb') as f:
         data = f.read()
 
@@ -199,7 +200,7 @@ def disassemble_file(filename):
     words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
 
     for i in words:
-        decode_instruction_and_print(i)
+        decode_instruction_and_print(i, verbose)
 
 
 def print_help():
@@ -208,11 +209,13 @@ def print_help():
     print('Options:')
     print('  -h                  Show this help text')
     print('  -m <byte_sequence>  Sequence of hex bytes (8 per instruction)')
+    print('  -v                  Verbose mode. Also show instruction fields')
     print('  <filename>          Path to ULP binary')
     pass
 
 
 def handle_cmdline(params):
+    verbose = False
     filename = None
     byte_sequence = None
 
@@ -238,6 +241,8 @@ def handle_cmdline(params):
 
             byte_sequence = "".join(params[:sequence_len+1])
             params = params[sequence_len:]
+        elif params[0] == '-v':
+            verbose = True
         elif params[0][0] == '-':
             # ignore unknown options for now
             pass
@@ -248,9 +253,9 @@ def handle_cmdline(params):
         params = params[1:]  # remove first param from list
 
     if byte_sequence:
-        disassemble_manually(byte_sequence)
+        disassemble_manually(byte_sequence, verbose)
     elif filename:
-        disassemble_file(filename)
+        disassemble_file(filename, verbose)
 
 
 if sys.argv: # if run from cmdline

From b51677df56c4f9f3d6f1283d1fcfcc9761848879 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 18:20:59 +0300
Subject: [PATCH 10/17] Improve/clean-up dissambled output format

Now the instruction (hex) and disassembled code will appear on one line
next to each other and the bytes are no longer printed with Python
specific formatting (not wrapped in b''). This results in a much cleaner
looking output.

Example output:

40008072  MOVE r0, 4
010000d0  LD r1, r0, 0
---
 tools/disassemble.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index d4dbdb7..d92ef8f 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -162,20 +162,24 @@ def chunk_into_words(code, bytes_per_word, byteorder):
     return words
 
 
-def decode_instruction_and_print(i, verbose=False):
-    print(ubinascii.hexlify(i.to_bytes(4, 'little')))
+def print_code_line(i, asm):
+    lineformat = '{0}  {1}'
+    hex = ubinascii.hexlify(i.to_bytes(4, 'little'))
+    print(lineformat.format(hex.decode('utf-8'), asm))
+
 
+def decode_instruction_and_print(i, verbose=False):
     try:
         ins, name = decode_instruction(i)
     except Exception as e:
-        print(e)
+        print_code_line(i, e)
         return
 
-    print(name)
+    print_code_line(i, name)
 
     if verbose:
         for field, val, extra in get_instruction_fields(ins):
-            print("  {:10} = {:3}{}".format(field, val, extra))
+            print("           {:10} = {:3}{}".format(field, val, extra))
 
 
 def disassemble_manually(byte_sequence_string, verbose=False):

From 40ea7e93163410b565a31f55f8a9939215d0beb8 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 18:30:38 +0300
Subject: [PATCH 11/17] Add byte offset to output to make seeing offsets easier

Offsets are in number of bytes (matches how 'GNU as' outputs listings)
---
 tools/disassemble.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index d92ef8f..bc083fb 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -162,24 +162,24 @@ def chunk_into_words(code, bytes_per_word, byteorder):
     return words
 
 
-def print_code_line(i, asm):
-    lineformat = '{0}  {1}'
+def print_code_line(byte_offset, i, asm):
+    lineformat = '{0:04x}  {1}  {2}'
     hex = ubinascii.hexlify(i.to_bytes(4, 'little'))
-    print(lineformat.format(hex.decode('utf-8'), asm))
+    print(lineformat.format(byte_offset, hex.decode('utf-8'), asm))
 
 
-def decode_instruction_and_print(i, verbose=False):
+def decode_instruction_and_print(byte_offset, i, verbose=False):
     try:
         ins, name = decode_instruction(i)
     except Exception as e:
-        print_code_line(i, e)
+        print_code_line(byte_offset, i, e)
         return
 
-    print_code_line(i, name)
+    print_code_line(byte_offset, i, name)
 
     if verbose:
         for field, val, extra in get_instruction_fields(ins):
-            print("           {:10} = {:3}{}".format(field, val, extra))
+            print("                 {:10} = {:3}{}".format(field, val, extra))
 
 
 def disassemble_manually(byte_sequence_string, verbose=False):
@@ -190,10 +190,10 @@ def disassemble_manually(byte_sequence_string, verbose=False):
         for i in range(0, len(sequence), chars_per_instruction)
     ]
 
-    for instruction in list:
+    for idx, instruction in enumerate(list):
         byte_sequence = ubinascii.unhexlify(instruction.replace(' ',''))
         i = int.from_bytes(byte_sequence, 'little')
-        decode_instruction_and_print(i, verbose)
+        decode_instruction_and_print(idx << 2, i, verbose)
 
 
 def disassemble_file(filename, verbose=False):
@@ -203,8 +203,8 @@ def disassemble_file(filename, verbose=False):
     code = data[12:]  # text_offset (where code starts) is always 12 for ULP binaries
     words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
 
-    for i in words:
-        decode_instruction_and_print(i, verbose)
+    for idx, i in enumerate(words):
+        decode_instruction_and_print(idx << 2, i, verbose)
 
 
 def print_help():

From 08bb182199c53865b91094aca765f9554e936adf Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 18:35:33 +0300
Subject: [PATCH 12/17] use text_offset from ULP header instead of hardcoded
 offset

---
 tools/disassemble.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index bc083fb..dbf5565 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -1,3 +1,4 @@
+from uctypes import struct, addressof, LITTLE_ENDIAN, UINT16, UINT32
 from esp32_ulp.opcodes import RD_REG_PERIPH_RTC_CNTL, RD_REG_PERIPH_RTC_IO, RD_REG_PERIPH_RTC_I2C, \
     RD_REG_PERIPH_SENS, DR_REG_MAX_DIRECT
 import esp32_ulp.opcodes as opcodes
@@ -200,7 +201,16 @@ def disassemble_file(filename, verbose=False):
     with open(filename, 'rb') as f:
         data = f.read()
 
-    code = data[12:]  # text_offset (where code starts) is always 12 for ULP binaries
+    binary_header_struct_def = dict(
+        magic = 0 | UINT32,
+        text_offset = 4 | UINT16,
+        text_size = 6 | UINT16,
+        data_size = 8 | UINT16,
+        bss_size = 10 | UINT16,
+    )
+    h = struct(addressof(data), binary_header_struct_def, LITTLE_ENDIAN)
+
+    code = data[h.text_offset:]
     words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
 
     for idx, i in enumerate(words):

From 15a631ae7ed837357e2e020b5d3ef332bd54ad74 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 18:38:13 +0300
Subject: [PATCH 13/17] Output header in verbose mode. Also validate ULP
 header.

If the magic bytes in the header are not 'ulp\0' then the file
is not a ULP binary or otherwise corrupt.
---
 tools/disassemble.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index dbf5565..4bcb958 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -163,6 +163,16 @@ def chunk_into_words(code, bytes_per_word, byteorder):
     return words
 
 
+def print_ulp_header(h):
+    print('ULP magic    : %s (0x%08x)' % (h.magic.to_bytes(4, 'little'), h.magic))
+    print('.text offset : %s (0x%02x)' % (h.text_offset, h.text_offset))
+    print('.text size   : %s (0x%02x)' % (h.text_size, h.text_size))
+    print('.data offset : %s (0x%02x)' % (h.text_offset+h.text_size, h.text_offset+h.text_size))
+    print('.data size   : %s (0x%02x)' % (h.data_size, h.data_size))
+    print('.bss size    : %s (0x%02x)' % (h.bss_size, h.bss_size))
+    print('----------------------------------------')
+
+
 def print_code_line(byte_offset, i, asm):
     lineformat = '{0:04x}  {1}  {2}'
     hex = ubinascii.hexlify(i.to_bytes(4, 'little'))
@@ -210,6 +220,13 @@ def disassemble_file(filename, verbose=False):
     )
     h = struct(addressof(data), binary_header_struct_def, LITTLE_ENDIAN)
 
+    if (h.magic != 0x00706c75):
+        print('Invalid signature: 0x%08x (should be: 0x%08x)' % (h.magic, 0x00706c75))
+        return
+
+    if verbose:
+        print_ulp_header(h)
+
     code = data[h.text_offset:]
     words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
 
@@ -223,7 +240,7 @@ def print_help():
     print('Options:')
     print('  -h                  Show this help text')
     print('  -m <byte_sequence>  Sequence of hex bytes (8 per instruction)')
-    print('  -v                  Verbose mode. Also show instruction fields')
+    print('  -v                  Verbose mode. Show ULP header and fields of each instruction')
     print('  <filename>          Path to ULP binary')
     pass
 

From 59766fb979786c81c1f23533875164e7239c8494 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 18:45:34 +0300
Subject: [PATCH 14/17] Print .text and .data section separately

---
 tools/disassemble.py | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/tools/disassemble.py b/tools/disassemble.py
index 4bcb958..7a92552 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -164,6 +164,7 @@ def chunk_into_words(code, bytes_per_word, byteorder):
 
 
 def print_ulp_header(h):
+    print('header')
     print('ULP magic    : %s (0x%08x)' % (h.magic.to_bytes(4, 'little'), h.magic))
     print('.text offset : %s (0x%02x)' % (h.text_offset, h.text_offset))
     print('.text size   : %s (0x%02x)' % (h.text_size, h.text_size))
@@ -193,6 +194,25 @@ def decode_instruction_and_print(byte_offset, i, verbose=False):
             print("                 {:10} = {:3}{}".format(field, val, extra))
 
 
+def print_text_section(code, verbose=False):
+    print('.text')
+
+    words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
+
+    for idx, i in enumerate(words):
+        decode_instruction_and_print(idx << 2,i , verbose)
+
+
+def print_data_section(data_offset, code):
+    print('.data')
+
+    words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
+
+    for idx, i in enumerate(words):
+        asm = "<empty>" if i == 0 else "<non-empty>"
+        print_code_line(data_offset + (idx << 2), i, asm)
+
+
 def disassemble_manually(byte_sequence_string, verbose=False):
     sequence = byte_sequence_string.strip().replace(' ','')
     chars_per_instruction = 8
@@ -227,11 +247,15 @@ def disassemble_file(filename, verbose=False):
     if verbose:
         print_ulp_header(h)
 
-    code = data[h.text_offset:]
-    words = chunk_into_words(code, bytes_per_word=4, byteorder='little')
+    code = data[h.text_offset:(h.text_offset+h.text_size)]
+    print_text_section(code, verbose)
 
-    for idx, i in enumerate(words):
-        decode_instruction_and_print(idx << 2, i, verbose)
+    if verbose:
+        print('----------------------------------------')
+
+    data_offset = h.text_offset+h.text_size
+    code = data[data_offset:(data_offset+h.data_size)]
+    print_data_section(data_offset-h.text_offset, code)
 
 
 def print_help():

From 10a5051b92e5fab6f7ebd9e8829702a1c82c5519 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 20:17:44 +0300
Subject: [PATCH 15/17] Add hex representation for any field value > 9

Some values are easier to read as hex values than as decimal.
For example peripheral register addresses like 0x123 where the
first digit (1) indicates which peripheral register to address,
while the remaining 2 digits (0x23) are the offset within that
register in number of 32-bit words.

Also absolute JUMP addresses are easier to find via the hex value
given that the disassembler includes the byte offset of each
instruction in hex format.
---
 tests/disassemble.py | 18 +++++++++---------
 tools/disassemble.py |  5 ++++-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/disassemble.py b/tests/disassemble.py
index 4a8ad24..7e7f6df 100644
--- a/tests/disassemble.py
+++ b/tests/disassemble.py
@@ -150,22 +150,22 @@ def test_all_instructions():
 @test
 def test_instruction_field_decoding():
     # OPCODE_WR_REG = 1
-    assert_decode_fields("000c8810", [  # REG_WR 0x0, 1, 2, 3
-        ('addr'      ,  0, ''),
+    assert_decode_fields("230d8810", [  # REG_WR 0x123, 1, 2, 3
+        ('addr'      , 35, ' (0x23)'),
         ('data'      ,  3, ''),
         ('high'      ,  1, ''),
         ('low'       ,  2, ''),
         ('opcode'    ,  1, ''),
-        ('periph_sel',  0, ''),
+        ('periph_sel',  1, ''),
     ])
 
     # OPCODE_RD_REG = 2
-    assert_decode_fields("03000421", [  # REG_RD 0x3, 2, 1
-        ('addr'      ,  3, ''),
+    assert_decode_fields("21030421", [  # REG_RD 0x321, 2, 1
+        ('addr'      , 33, ' (0x21)'),
         ('high'      ,  2, ''),
         ('low'       ,  1, ''),
         ('opcode'    ,  2, ''),
-        ('periph_sel',  0, ''),
+        ('periph_sel',  3, ''),
         ('unused'    ,  0, ''),
     ])
 
@@ -558,13 +558,13 @@ def test_instruction_field_decoding():
     assert_decode_fields("090000a0", [  # TSENS r0, 0
         ('delay'     ,  2, ''),
         ('dreg'      ,  1, ''),
-        ('opcode'    , 10, ''),
+        ('opcode'    , 10, ' (0x0a)'),
         ('unused'    ,  0, ''),
     ])
 
     # OPCODE_HALT = 11
     assert_decode_fields("000000b0", [  # HALT
-        ('opcode'    , 11, ''),
+        ('opcode'    , 11, ' (0x0b)'),
         ('unused'    ,  0, ''),
     ])
 
@@ -572,7 +572,7 @@ def test_instruction_field_decoding():
     assert_decode_fields("060000d0", [  # LD r2, r1, 0
         ('dreg'      ,  2, ''),
         ('offset'    ,  0, ''),
-        ('opcode'    , 13, ''),
+        ('opcode'    , 13, ' (0x0d)'),
         ('sreg'      ,  1, ''),
         ('unused1'   ,  0, ''),
         ('unused2'   ,  0, ''),
diff --git a/tools/disassemble.py b/tools/disassemble.py
index 7a92552..f647576 100644
--- a/tools/disassemble.py
+++ b/tools/disassemble.py
@@ -131,12 +131,15 @@ def get_instruction_fields(ins):
     )
     field_details = []
     for field in possible_fields:
+        extra = ''
         try:
             # eval is ugly but constrained to possible_fields and variable ins
             val = eval('i.%s' % field, {}, {'i': ins})
+            if (val>9):
+                extra = ' (0x%02x)' % val
         except KeyError:
             continue
-        extra = ''
+
         if field == 'sel':  # ALU
             if ins.sub_opcode == opcodes.SUB_OPCODE_ALU_CNT:
                 extra = ' (%s)' % alu_cnt_ops[val]

From eff6f96e50d672835cd344cc6f5008f7c1bb5dc1 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 21:01:48 +0300
Subject: [PATCH 16/17] Add integration tests for disassembler

Test both disassembling a file (assembled from source for the test),
and disassembling a byte sequence provided on the command line.

Source code to be assembled and expected disassembler listings are
provided in the tests/fixtures directory.
---
 .github/workflows/run_tests.yaml  |   7 +
 tests/03_disassembler_tests.sh    |  67 +++++++
 tests/fixtures/all_opcodes-v.lst  | 313 ++++++++++++++++++++++++++++++
 tests/fixtures/all_opcodes.S      |  62 ++++++
 tests/fixtures/all_opcodes.lst    |  45 +++++
 tests/fixtures/manual_bytes-v.lst |  42 ++++
 tests/fixtures/manual_bytes.lst   |   6 +
 7 files changed, 542 insertions(+)
 create mode 100755 tests/03_disassembler_tests.sh
 create mode 100644 tests/fixtures/all_opcodes-v.lst
 create mode 100644 tests/fixtures/all_opcodes.S
 create mode 100644 tests/fixtures/all_opcodes.lst
 create mode 100644 tests/fixtures/manual_bytes-v.lst
 create mode 100644 tests/fixtures/manual_bytes.lst

diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
index ab8d4bd..40ef419 100644
--- a/.github/workflows/run_tests.yaml
+++ b/.github/workflows/run_tests.yaml
@@ -91,3 +91,10 @@ jobs:
         export PATH=$PATH:${{ steps.fetch_binutils.outputs.bin_dir }}
         cd tests
         ./02_compat_rtc_tests.sh
+
+    - name: Run disassembler tests
+      id: disassembler_tests
+      run: |
+        export PATH=$PATH:${{ steps.build_micropython.outputs.bin_dir }}
+        cd tests
+        ./03_disassembler_tests.sh
diff --git a/tests/03_disassembler_tests.sh b/tests/03_disassembler_tests.sh
new file mode 100755
index 0000000..7c76f11
--- /dev/null
+++ b/tests/03_disassembler_tests.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+set -e
+
+test_disassembling_a_file() {
+    local verbose
+    if [ "$1" == verbose ]; then
+        verbose=-v
+        echo -e "Testing disassembling a file in VERBOSE mode"
+    else
+        echo -e "Testing disassembling a file in NORMAL mode"
+    fi
+
+    testname=all_opcodes
+    fixture=fixtures/${testname}.S
+    echo -e "\tBuilding $fixture using micropython-esp32-ulp"
+
+    log_file="${testname}.log"
+    ulp_file="fixtures/${testname}.ulp"
+    micropython -m esp32_ulp $fixture 1>$log_file   # generates $ulp_file
+
+    lst_file="${testname}.lst"
+    lst_file_fixture=fixtures/${testname}${verbose}.lst
+    echo -e "\tDisassembling $ulp_file using micropython-esp32-ulp disassembler"
+    micropython tools/disassemble.py $verbose $ulp_file > $lst_file
+
+    if ! diff $lst_file_fixture $lst_file 1>/dev/null; then
+        echo -e "\tDisassembled output differs from expected output!"
+        echo ""
+        echo "Disassembly test failed for $fixture"
+        echo "micropython-esp32-ulp log:"
+        cat $log_file
+        echo "Diff of disassembly: expected vs actual"
+        diff -u $lst_file_fixture $lst_file
+    fi
+}
+
+test_disassembling_a_manual_sequence() {
+    local verbose
+    if [ "$1" == verbose ]; then
+        verbose=-v
+        echo -e "Testing disassembling a manual byte sequence in VERBOSE mode"
+    else
+        echo -e "Testing disassembling a manual byte sequence in NORMAL mode"
+    fi
+
+    sequence="e1af 8c72 0100 0068 2705 cc19 0005 681d 0000 00a0 0000 0074"
+
+    lst_file="manual_bytes.lst"
+    lst_file_fixture=fixtures/manual_bytes${verbose}.lst
+    echo -e "\tDisassembling manual byte sequence using micropython-esp32-ulp disassembler"
+    micropython tools/disassemble.py $verbose -m $sequence > $lst_file
+
+    if ! diff $lst_file_fixture $lst_file 1>/dev/null; then
+        echo -e "\tDisassembled output differs from expected output!"
+        echo ""
+        echo "Disassembly test failed for manual byte sequence"
+        echo "Diff of disassembly: expected vs actual"
+        diff -u $lst_file_fixture $lst_file
+    fi
+}
+
+test_disassembling_a_file
+test_disassembling_a_file verbose
+
+test_disassembling_a_manual_sequence
+test_disassembling_a_manual_sequence verbose
diff --git a/tests/fixtures/all_opcodes-v.lst b/tests/fixtures/all_opcodes-v.lst
new file mode 100644
index 0000000..4bc7975
--- /dev/null
+++ b/tests/fixtures/all_opcodes-v.lst
@@ -0,0 +1,313 @@
+header
+ULP magic    : b'ulp\x00' (0x00706c75)
+.text offset : 12 (0x0c)
+.text size   : 164 (0xa4)
+.data offset : 176 (0xb0)
+.data size   : 8 (0x08)
+.bss size    : 0 (0x00)
+----------------------------------------
+.text
+0000  230d8810  REG_WR 0x123, 1, 2, 3
+                 addr       =  35 (0x23)
+                 data       =   3
+                 high       =   1
+                 low        =   2
+                 opcode     =   1
+                 periph_sel =   1
+0004  21030421  REG_RD 0x321, 2, 1
+                 addr       =  33 (0x21)
+                 high       =   2
+                 low        =   1
+                 opcode     =   2
+                 periph_sel =   3
+                 unused     =   0
+0008  03001130  I2C_RD 3, 2, 1, 0
+                 data       =   0
+                 high       =   2
+                 i2c_sel    =   0
+                 low        =   1
+                 opcode     =   3
+                 rw         =   0
+                 sub_addr   =   3
+                 unused     =   0
+000c  00011339  I2C_WR 0, 2, 3, 4
+                 data       =   1
+                 high       =   2
+                 i2c_sel    =   4
+                 low        =   3
+                 opcode     =   3
+                 rw         =   1
+                 sub_addr   =   0
+                 unused     =   0
+0010  00000040  NOP
+                 cycles     =   0
+                 opcode     =   4
+                 unused     =   0
+0014  07000040  WAIT 7
+                 cycles     =   7
+                 opcode     =   4
+                 unused     =   0
+0018  07000050  ADC r3, 1, 0
+                 cycles     =   0
+                 dreg       =   3
+                 mux        =   1
+                 opcode     =   5
+                 sar_sel    =   0
+                 unused1    =   0
+                 unused2    =   0
+001c  0b000068  ST r3, r2, 0
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   3
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+0020  06000070  ADD r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   0 (ADD)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused     =   0
+0024  06002070  SUB r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   1 (SUB)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused     =   0
+0028  06004070  AND r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   2 (AND)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused     =   0
+002c  06006070  OR r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   3 (OR)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused     =   0
+0030  16008070  MOVE r2, r1
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   4 (MOVE)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   1
+                 unused     =   0
+0034  0600a070  LSH r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   5 (LSH)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused     =   0
+0038  0600c070  RSH r2, r1, r0
+                 dreg       =   2
+                 opcode     =   7
+                 sel        =   6 (RSH)
+                 sreg       =   1
+                 sub_opcode =   0
+                 treg       =   0
+                 unused     =   0
+003c  06000072  ADD r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   0 (ADD)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused     =   0
+0040  06002072  SUB r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   1 (SUB)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused     =   0
+0044  06004072  AND r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   2 (AND)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused     =   0
+0048  06006072  OR r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   3 (OR)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused     =   0
+004c  01008072  MOVE r1, 0
+                 dreg       =   1
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   4 (MOVE)
+                 sreg       =   0
+                 sub_opcode =   1
+                 unused     =   0
+0050  0600a072  LSH r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   5 (LSH)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused     =   0
+0054  0600c072  RSH r2, r1, 0
+                 dreg       =   2
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   6 (RSH)
+                 sreg       =   1
+                 sub_opcode =   1
+                 unused     =   0
+0058  00004074  STAGE_RST
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   2 (STAGE_RST)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+005c  70000074  STAGE_INC 7
+                 imm        =   7
+                 opcode     =   7
+                 sel        =   0 (STAGE_INC)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+0060  30002074  STAGE_DEC 3
+                 imm        =   3
+                 opcode     =   7
+                 sel        =   1 (STAGE_DEC)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
+0064  00002080  JUMP r0
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   1
+                 sub_opcode =   0
+                 type       =   0 (--)
+                 unused     =   0
+0068  01006080  JUMP r1, EQ
+                 addr       =   0
+                 dreg       =   1
+                 opcode     =   8
+                 reg        =   1
+                 sub_opcode =   0
+                 type       =   1 (EQ)
+                 unused     =   0
+006c  0200a080  JUMP r2, OV
+                 addr       =   0
+                 dreg       =   2
+                 opcode     =   8
+                 reg        =   1
+                 sub_opcode =   0
+                 type       =   2 (OV)
+                 unused     =   0
+0070  00000080  JUMP 0
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   0
+                 sub_opcode =   0
+                 type       =   0 (--)
+                 unused     =   0
+0074  00004080  JUMP 0, EQ
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   0
+                 sub_opcode =   0
+                 type       =   1 (EQ)
+                 unused     =   0
+0078  00008080  JUMP 0, OV
+                 addr       =   0
+                 dreg       =   0
+                 opcode     =   8
+                 reg        =   0
+                 sub_opcode =   0
+                 type       =   2 (OV)
+                 unused     =   0
+007c  01000082  JUMPR 0, 1, LT
+                 cmp        =   0 (LT)
+                 imm        =   1
+                 offset     =   0
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   1
+0080  05000382  JUMPR 1, 5, GE
+                 cmp        =   1 (GE)
+                 imm        =   5
+                 offset     =   1
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   1
+0084  01000084  JUMPS 0, 1, LT
+                 cmp        =   0 (LT)
+                 imm        =   1
+                 offset     =   0
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0088  05800284  JUMPS 1, 5, GE
+                 cmp        =   1 (GE)
+                 imm        =   5
+                 offset     =   1
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+008c  09000584  JUMPS 2, 9, LE
+                 cmp        =   2 (LE)
+                 imm        =   9
+                 offset     =   2
+                 opcode     =   8
+                 sign       =   0
+                 sub_opcode =   2
+                 unused     =   0
+0090  01000090  WAKE
+                 opcode     =   9
+                 sub_opcode =   0
+                 unused     =   0
+                 wakeup     =   1
+0094  07000092  SLEEP 7
+                 cycle_sel  =   7
+                 opcode     =   9
+                 sub_opcode =   1
+                 unused     =   0
+0098  090000a0  TSENS r1, 2
+                 delay      =   2
+                 dreg       =   1
+                 opcode     =  10 (0x0a)
+                 unused     =   0
+009c  000000b0  HALT
+                 opcode     =  11 (0x0b)
+                 unused     =   0
+00a0  060000d0  LD r2, r1, 0
+                 dreg       =   2
+                 offset     =   0
+                 opcode     =  13 (0x0d)
+                 sreg       =   1
+                 unused1    =   0
+                 unused2    =   0
+----------------------------------------
+.data
+00a4  00000000  <empty>
+00a8  fecadec0  <non-empty>
diff --git a/tests/fixtures/all_opcodes.S b/tests/fixtures/all_opcodes.S
new file mode 100644
index 0000000..7f8c916
--- /dev/null
+++ b/tests/fixtures/all_opcodes.S
@@ -0,0 +1,62 @@
+.data
+empty: .long 0
+magic: .long 0xc0decafe
+
+.text
+REG_WR 0x123, 1, 2, 3
+
+REG_RD 0x321, 2, 1
+
+I2C_RD 3, 2, 1, 0
+I2C_WR 0, 1, 2, 3, 4
+
+NOP
+WAIT 7
+
+ADC r3, 2, 1
+
+ST r3, r2, 1
+
+ADD r2, r1, r0
+SUB r2, r1, r0
+AND r2, r1, r0
+OR r2, r1, r0
+MOVE r2, r1
+LSH r2, r1, r0
+RSH r2, r1, r0
+
+ADD r2, r1, 0
+SUB r2, r1, 0
+AND r2, r1, 0
+OR r2, r1, 0
+MOVE r1, 0
+LSH r2, r1, 0
+RSH r2, r1, 0
+
+STAGE_RST
+STAGE_INC 7
+STAGE_DEC 3
+
+JUMP r0
+JUMP r1, EQ
+JUMP r2, OV
+
+JUMP 0
+JUMP 0, EQ
+JUMP 0, OV
+
+JUMPR 0, 1, LT
+JUMPR 4, 5, GE
+
+JUMPS 0, 1, LT
+JUMPS 4, 5, GE
+JUMPS 8, 9, LE
+
+WAKE
+SLEEP 7
+
+TSENS r1, 2
+
+HALT
+
+LD r2, r1, 0
diff --git a/tests/fixtures/all_opcodes.lst b/tests/fixtures/all_opcodes.lst
new file mode 100644
index 0000000..2ef1bd7
--- /dev/null
+++ b/tests/fixtures/all_opcodes.lst
@@ -0,0 +1,45 @@
+.text
+0000  230d8810  REG_WR 0x123, 1, 2, 3
+0004  21030421  REG_RD 0x321, 2, 1
+0008  03001130  I2C_RD 3, 2, 1, 0
+000c  00011339  I2C_WR 0, 2, 3, 4
+0010  00000040  NOP
+0014  07000040  WAIT 7
+0018  07000050  ADC r3, 1, 0
+001c  0b000068  ST r3, r2, 0
+0020  06000070  ADD r2, r1, r0
+0024  06002070  SUB r2, r1, r0
+0028  06004070  AND r2, r1, r0
+002c  06006070  OR r2, r1, r0
+0030  16008070  MOVE r2, r1
+0034  0600a070  LSH r2, r1, r0
+0038  0600c070  RSH r2, r1, r0
+003c  06000072  ADD r2, r1, 0
+0040  06002072  SUB r2, r1, 0
+0044  06004072  AND r2, r1, 0
+0048  06006072  OR r2, r1, 0
+004c  01008072  MOVE r1, 0
+0050  0600a072  LSH r2, r1, 0
+0054  0600c072  RSH r2, r1, 0
+0058  00004074  STAGE_RST
+005c  70000074  STAGE_INC 7
+0060  30002074  STAGE_DEC 3
+0064  00002080  JUMP r0
+0068  01006080  JUMP r1, EQ
+006c  0200a080  JUMP r2, OV
+0070  00000080  JUMP 0
+0074  00004080  JUMP 0, EQ
+0078  00008080  JUMP 0, OV
+007c  01000082  JUMPR 0, 1, LT
+0080  05000382  JUMPR 1, 5, GE
+0084  01000084  JUMPS 0, 1, LT
+0088  05800284  JUMPS 1, 5, GE
+008c  09000584  JUMPS 2, 9, LE
+0090  01000090  WAKE
+0094  07000092  SLEEP 7
+0098  090000a0  TSENS r1, 2
+009c  000000b0  HALT
+00a0  060000d0  LD r2, r1, 0
+.data
+00a4  00000000  <empty>
+00a8  fecadec0  <non-empty>
diff --git a/tests/fixtures/manual_bytes-v.lst b/tests/fixtures/manual_bytes-v.lst
new file mode 100644
index 0000000..7f44ea4
--- /dev/null
+++ b/tests/fixtures/manual_bytes-v.lst
@@ -0,0 +1,42 @@
+0000  e1af8c72  MOVE r1, 51966
+                 dreg       =   1
+                 imm        = 51966 (0xcafe)
+                 opcode     =   7
+                 sel        =   4 (MOVE)
+                 sreg       =   0
+                 sub_opcode =   1
+                 unused     =   0
+0004  01000068  ST r1, r0, 0
+                 dreg       =   0
+                 offset     =   0
+                 opcode     =   6
+                 sreg       =   1
+                 sub_opcode =   4
+                 unused1    =   0
+                 unused2    =   0
+0008  2705cc19  REG_WR 0x127, 19, 19, 1
+                 addr       =  39 (0x27)
+                 data       =   1
+                 high       =  19 (0x13)
+                 low        =  19 (0x13)
+                 opcode     =   1
+                 periph_sel =   1
+000c  0005681d  REG_WR 0x100, 26, 26, 1
+                 addr       =   0
+                 data       =   1
+                 high       =  26 (0x1a)
+                 low        =  26 (0x1a)
+                 opcode     =   1
+                 periph_sel =   1
+0010  000000a0  TSENS r0, 0
+                 delay      =   0
+                 dreg       =   0
+                 opcode     =  10 (0x0a)
+                 unused     =   0
+0014  00000074  STAGE_INC 0
+                 imm        =   0
+                 opcode     =   7
+                 sel        =   0 (STAGE_INC)
+                 sub_opcode =   2
+                 unused1    =   0
+                 unused2    =   0
diff --git a/tests/fixtures/manual_bytes.lst b/tests/fixtures/manual_bytes.lst
new file mode 100644
index 0000000..beb1b77
--- /dev/null
+++ b/tests/fixtures/manual_bytes.lst
@@ -0,0 +1,6 @@
+0000  e1af8c72  MOVE r1, 51966
+0004  01000068  ST r1, r0, 0
+0008  2705cc19  REG_WR 0x127, 19, 19, 1
+000c  0005681d  REG_WR 0x100, 26, 26, 1
+0010  000000a0  TSENS r0, 0
+0014  00000074  STAGE_INC 0

From 06b277ebac79fbaa4eb928453455c09ca8bf9e3a Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 27 Jun 2023 21:24:28 +0300
Subject: [PATCH 17/17] Add documentation for disassembler

---
 README.rst            |   1 +
 docs/disassembler.rst | 146 ++++++++++++++++++++++++++++++++++++++++++
 docs/index.rst        |   9 +++
 3 files changed, 156 insertions(+)
 create mode 100644 docs/disassembler.rst

diff --git a/README.rst b/README.rst
index 5ffa906..e29d841 100644
--- a/README.rst
+++ b/README.rst
@@ -35,6 +35,7 @@ The following features are supported:
 * expressions in assembly code and constant definitions
 * RTC convenience macros (e.g. ``WRITE_RTC_REG``)
 * many ESP32 ULP code examples found on the web will work unmodified
+* a simple disassembler is also provided
 
 
 Quick start
diff --git a/docs/disassembler.rst b/docs/disassembler.rst
new file mode 100644
index 0000000..b92a19e
--- /dev/null
+++ b/docs/disassembler.rst
@@ -0,0 +1,146 @@
+=====================
+Disassembler
+=====================
+
+micropython-esp32-ulp contains a disassembler for disassembling code for the
+ESP32 ULP (Ultra Low-Power) Co-Processor.
+
+The main purpose of this tool is to inspect what instructions our assembler
+created, what value each field is set to, and to compare this with the output
+created by the assembler from Espressif (part of their `binutils-gdb fork <https://github.com/espressif/binutils-gdb/tree/esp32ulp-elf-2.35>`_),
+which we use as our reference implementation.
+
+
+Usage
+------------------------
+
+To disassemble a ULP binary, simply run:
+
+.. code-block:: bash
+
+   micropython -m tools.disassemble path/to/binary.ulp
+
+You can also specify additional options to ``disassemble.py`` as follows:
+
++--------------------------+----------------------------------------------------------------+
+| Option                   | Description                                                    |
++==========================+================================================================+
+| ``-h``                   | Show help text                                                 |
++--------------------------+----------------------------------------------------------------+
+|| ``-m <bytes sequence>`` || Disassemble a provided sequence of hex bytes                  |
+||                         || (in this case any filename specified is ignored)              |
++--------------------------+----------------------------------------------------------------+
+| ``-v``                   | Verbose mode (shows ULP header and fields of each instruction) |
++--------------------------+----------------------------------------------------------------+
+
+
+Disassembling a file
+------------------------
+
+The simplest and default mode of the disassembler is to disassemble the
+specified file.
+
+Note that the ULP header is validates and files with unknown magic bytes will be
+rejected. The correct 4 magic bytes at the start of a ULP binary are ``ulp\x00``.
+
+Example:
+
+.. code-block:: shell
+
+   $ micropython -m tools.disassemble path/to/binary.ulp
+   .text
+   0000  040000d0  LD r0, r1, 0
+   0004  0e0400d0  LD r2, r3, 1
+   0008  84010068  ST r0, r1, 0
+   000c  8b090068  ST r3, r2, 2
+   .data
+   0000  00000000  <empty>
+
+
+Disassembling a byte sequence
+-----------------------------
+
+The ``-m`` option allows disassembling a sequences hex letters representing
+ULP instructions.
+
+This option expects the actual instructions directly, without any ULP header.
+
+The sequence must contain a number of hex letters exactly divisible by 8, i.e.
+8, 16, 24, etc, because each 32-bit word is made up of 8 hex letters. Spaces
+can be included in the sequence and they are ignored.
+
+The typical use case for this feature is to copy/paste some instructions from
+a hexdump (e.g. xxd output) for analysis.
+
+Example:
+
+.. code-block:: shell
+
+   # hexdump binary.ulp
+   $ xxd path/to/binary.ulp
+   00000000: 756c 7000 0c00 2400 0400 0000 9300 8074  ulp...$........t
+   00000010: 2a80 0488 2004 8074 1c00 0084 0000 0040  *... ..t.......@
+   (...)
+
+   # analyse the last 2 instructions
+   $ micropython -m tools.disassemble -m "1c00 0084 0000 0040"
+   0000  1c000084  JUMPS 0, 28, LT
+   0004  00000040  NOP
+
+
+Verbose mode
+------------------------
+
+In verbose mode the following extra outputs are enabled:
+
+* ULP header (except when using ``-m``)
+* The fields of each instruction and their values
+
+For example:
+
+.. code-block::
+
+   header
+   ULP magic    : b'ulp\x00' (0x00706c75)
+   .text offset : 12 (0x0c)
+   .text size   : 36 (0x24)
+   .data offset : 48 (0x30)
+   .data size   : 4 (0x04)
+   .bss size    : 0 (0x00)
+   ----------------------------------------
+   .text
+   0000  93008072  MOVE r3, 9
+                    dreg       =   3
+                    imm        =   9
+                    opcode     =   7
+                    sel        =   4 (MOV)
+                    sreg       =   0
+                    sub_opcode =   1
+                    unused     =   0
+   (...detail truncated...)
+   0020  000000b0  HALT
+                    opcode     =  11 (0x0b)
+                    unused     =   0
+   ----------------------------------------
+   .data
+   0000  00000000  <empty>
+
+
+Disassembling on device
+-----------------------------
+
+The disassembler also works when used on an ESP32.
+
+To use the disassembler on a real device:
+
+* ensure ``micropython-esp32-ulp`` is installed on the device (see `docs/index.rst </docs/index.rst>`_).
+* upload ``tools/disassemble.py`` to the device (any directory will do)
+* run the following:
+
+  .. code-block:: python
+
+     from disassemble import disassemble_file
+     # then either:
+     disassemble_file('path/to/file.ulp')  # normal mode
+     # or:
+     disassemble_file('path/to/file.ulp', True)  # verbose mode
diff --git a/docs/index.rst b/docs/index.rst
index 82712ec..314bc68 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -136,6 +136,15 @@ found as part of Arduino/ESP-IDF projects.
 The preprocessor and how to use it is documented here: `Preprocessor support </docs/preprocess.rst>`_.
 
 
+Disassembler
+------------
+There is a disassembler for disassembling ULP binary code. This is mainly used to
+inspect what instructions our assembler created, however it can be used to analyse
+any ULP binaries.
+
+The disassembler and how to use it is documented here: `Disassembler </docs/disassembler.rst>`_.
+
+
 Limitations
 -----------