From ab928172b45c88c64d6d79ec18ed68a59211a125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Mon, 13 Jul 2020 11:12:08 +0200 Subject: [PATCH] Add support for disassembling a number of instructions, convert disassembly to generator (#29) * Add support for disassembling n instructions * Convert size to optional * Add instruction count to disassemble --- malduck/disasm.py | 23 ++++++++++++++--------- malduck/procmem/procmem.py | 22 ++++++++++++++++------ malduck/procmem/procmem.pyi | 8 +++++++- tests/test_disasm.py | 14 +++++++------- tests/test_procmem.py | 2 +- 5 files changed, 45 insertions(+), 24 deletions(-) diff --git a/malduck/disasm.py b/malduck/disasm.py index 23fea14..252d419 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -5,7 +5,7 @@ import collections from capstone import CsInsn from capstone.x86 import X86Op -from typing import Any, List, Optional, Dict, Union +from typing import Any, List, Optional, Dict, Union, Iterator __all__ = ["disasm", "insn", "Disassemble", "Instruction", "Operand", "Memory"] @@ -260,11 +260,16 @@ def __init__(self) -> None: Operand.regs[getattr(capstone.x86, reg)] = reg.split("_")[2].lower() def disassemble( - self, data: bytes, addr: int, x64: bool = False - ) -> List[Instruction]: + self, data: bytes, addr: int, x64: bool = False, count: int = 0 + ) -> Iterator[Instruction]: """ Disassembles data from specific address + .. versionchanged :: 4.0.0 + + Returns iterator instead of list of instructions, accepts maximum + number of instructions to disassemble + short: disasm :param data: Block of data to disasseble @@ -273,8 +278,10 @@ def disassemble( :type addr: int :param x64: Disassemble in x86-64 mode? :type x64: bool (default=False) - :return: Returns list of instructions - :rtype: List[:class:`Instruction`] + :param count: Number of instructions to disassemble + :type count: int (default=0) + :return: Returns iterator of instructions + :rtype: Iterator[:class:`Instruction`] """ import capstone @@ -282,10 +289,8 @@ def disassemble( capstone.CS_ARCH_X86, capstone.CS_MODE_64 if x64 else capstone.CS_MODE_32 ) cs.detail = True - ret = [] - for insn in cs.disasm(data, addr): - ret.append(Instruction.from_capstone(insn, x64=x64)) - return ret + for insn in cs.disasm(data, addr, count): + yield Instruction.from_capstone(insn, x64=x64) __call__ = disassemble diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 95cabec..43cc75a 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -396,7 +396,7 @@ def readv_regions(self, addr=None, length=None, contiguous=True): Used internally. - .. versionchanged: 3.0.0 + .. versionchanged:: 3.0.0 Contents of contiguous regions are merged into single string @@ -692,19 +692,29 @@ def regexv(self, query, addr=None, length=None): for entry in re.finditer(query, chunk, re.DOTALL): yield chunk_addr + entry.start() - def disasmv(self, addr, size, x64=False): + def disasmv(self, addr, size=None, x64=False, count=None): """ Disassembles code under specified address + .. versionchanged:: 4.0.0 + Returns iterator instead of list of instructions + :param addr: Virtual address :type addr: int :param size: Size of disassembled buffer - :type size: int + :type size: int (optional) + :param count: Number of instructions to disassemble + :type count: int (optional) :param x64: Assembly is 64bit :type x64: bool (optional) :return: :class:`List[Instruction]` """ - return disasm(self.readv(addr, size), addr, x64=x64) + if (not size and not count) or (size and count): + raise ValueError("procmem.disasmv needs either size or count to be set") + if count: + # Get the the maximum possible code size assuming maximum instruction size + size = count * 15 + return disasm(data=self.readv(addr, size), addr=addr, x64=x64, count=count or 0) def extract(self, modules=None, extract_manager=None): """ @@ -732,7 +742,7 @@ def yarap(self, ruleset, offset=None, length=None, extended=False): If offset is None, looks for match from the beginning of memory - .. versionchanged:: 4.0.0: + .. versionchanged:: 4.0.0 Added `extended` option which allows to get extended information about matched strings and rules. Default is False for backwards compatibility. @@ -755,7 +765,7 @@ def yarav(self, ruleset, addr=None, length=None, extended=False): If addr is None, looks for match from the beginning of memory - .. versionchanged:: 4.0.0: + .. versionchanged:: 4.0.0 Added `extended` option which allows to get extended information about matched strings and rules. Default is False for backwards compatibility. diff --git a/malduck/procmem/procmem.pyi b/malduck/procmem/procmem.pyi index ef565ef..30b4244 100644 --- a/malduck/procmem/procmem.pyi +++ b/malduck/procmem/procmem.pyi @@ -217,7 +217,13 @@ class ProcessMemory: def regexv( self, query: bytes, addr: Optional[int] = None, length: Optional[int] = None ) -> Iterator[int]: ... - def disasmv(self, addr: int, size: int, x64: bool = False) -> List[Instruction]: ... + def disasmv( + self, + addr: int, + size: Optional[int] = None, + x64: bool = False, + count: Optional[int] = None, + ) -> List[Instruction]: ... def extract( self, modules: ExtractorModules = None, extract_manager: ExtractManager = None, ) -> Optional[List[Dict[str, Any]]]: ... diff --git a/tests/test_disasm.py b/tests/test_disasm.py index 0ef23b0..fdafaa5 100644 --- a/tests/test_disasm.py +++ b/tests/test_disasm.py @@ -61,9 +61,9 @@ def test_insns(self): assert insn7.op2 == (None, None, None, 0x400000) def test_equal(self): - assert disasm(b"hAAAA", 0)[0].mnem == "push" - assert disasm(b"hAAAA", 0)[0].op1.value == 0x41414141 - assert disasm(b"hAAAA", 0) == disasm(b"hAAAA", 0) + assert next(disasm(b"hAAAA", 0)).mnem == "push" + assert next(disasm(b"hAAAA", 0)).op1.value == 0x41414141 + assert list(disasm(b"hAAAA", 0)) == list(disasm(b"hAAAA", 0)) class TestDisasm64bit(object): @@ -149,7 +149,7 @@ def test_insns(self): assert insn10.op2.reg == "rax" def test_equal(self): - assert disasm(b"hAAAA", 0)[0].mnem == "push" - assert disasm(b"hAAAA", 0)[0].op1.value == 0x41414141 - assert disasm(b"hAAAA", 0) == disasm(b"hAAAA", 0) - assert disasm(b"hAAAA", 0) != disasm(b"hAAAB", 0) + assert next(disasm(b"hAAAA", 0)).mnem == "push" + assert next(disasm(b"hAAAA", 0)).op1.value == 0x41414141 + assert list(disasm(b"hAAAA", 0)) == list(disasm(b"hAAAA", 0)) + assert list(disasm(b"hAAAA", 0)) != list(disasm(b"hAAAB", 0)) diff --git a/tests/test_procmem.py b/tests/test_procmem.py index 3671f4d..057e46c 100644 --- a/tests/test_procmem.py +++ b/tests/test_procmem.py @@ -136,7 +136,7 @@ def test_cuckoomem_methods(): assert list(buf.regexv(b" ", 0x401000)) == [0x401007, 0x401014] assert list(buf.regexv(b" ", 0x401000, 0x10)) == [0x401007] assert list(buf.regexv(b"test..h", 0x401000)) == [0x40100f] - assert buf.disasmv(0x401015, 6) == [ + assert list(buf.disasmv(0x401015, 6)) == [ insn("push", 0x41414141, addr=0x401015), insn("ret", addr=0x40101a), ]