Skip to content

Commit

Permalink
Add support for disassembling a number of instructions, convert disas…
Browse files Browse the repository at this point in the history
…sembly to generator (#29)

* Add support for disassembling n instructions

* Convert size to optional

* Add instruction count to disassemble
  • Loading branch information
nazywam authored Jul 13, 2020
1 parent caedc99 commit ab92817
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 24 deletions.
23 changes: 14 additions & 9 deletions malduck/disasm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import collections
from capstone import CsInsn
from capstone.x86 import X86Op
from typing import Any, List, Optional, Dict, Union
from typing import Any, List, Optional, Dict, Union, Iterator

__all__ = ["disasm", "insn", "Disassemble", "Instruction", "Operand", "Memory"]

Expand Down Expand Up @@ -260,11 +260,16 @@ def __init__(self) -> None:
Operand.regs[getattr(capstone.x86, reg)] = reg.split("_")[2].lower()

def disassemble(
self, data: bytes, addr: int, x64: bool = False
) -> List[Instruction]:
self, data: bytes, addr: int, x64: bool = False, count: int = 0
) -> Iterator[Instruction]:
"""
Disassembles data from specific address
.. versionchanged :: 4.0.0
Returns iterator instead of list of instructions, accepts maximum
number of instructions to disassemble
short: disasm
:param data: Block of data to disasseble
Expand All @@ -273,19 +278,19 @@ def disassemble(
:type addr: int
:param x64: Disassemble in x86-64 mode?
:type x64: bool (default=False)
:return: Returns list of instructions
:rtype: List[:class:`Instruction`]
:param count: Number of instructions to disassemble
:type count: int (default=0)
:return: Returns iterator of instructions
:rtype: Iterator[:class:`Instruction`]
"""
import capstone

cs = capstone.Cs(
capstone.CS_ARCH_X86, capstone.CS_MODE_64 if x64 else capstone.CS_MODE_32
)
cs.detail = True
ret = []
for insn in cs.disasm(data, addr):
ret.append(Instruction.from_capstone(insn, x64=x64))
return ret
for insn in cs.disasm(data, addr, count):
yield Instruction.from_capstone(insn, x64=x64)

__call__ = disassemble

Expand Down
22 changes: 16 additions & 6 deletions malduck/procmem/procmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def readv_regions(self, addr=None, length=None, contiguous=True):
Used internally.
.. versionchanged: 3.0.0
.. versionchanged:: 3.0.0
Contents of contiguous regions are merged into single string
Expand Down Expand Up @@ -692,19 +692,29 @@ def regexv(self, query, addr=None, length=None):
for entry in re.finditer(query, chunk, re.DOTALL):
yield chunk_addr + entry.start()

def disasmv(self, addr, size, x64=False):
def disasmv(self, addr, size=None, x64=False, count=None):
"""
Disassembles code under specified address
.. versionchanged:: 4.0.0
Returns iterator instead of list of instructions
:param addr: Virtual address
:type addr: int
:param size: Size of disassembled buffer
:type size: int
:type size: int (optional)
:param count: Number of instructions to disassemble
:type count: int (optional)
:param x64: Assembly is 64bit
:type x64: bool (optional)
:return: :class:`List[Instruction]`
"""
return disasm(self.readv(addr, size), addr, x64=x64)
if (not size and not count) or (size and count):
raise ValueError("procmem.disasmv needs either size or count to be set")
if count:
# Get the the maximum possible code size assuming maximum instruction size
size = count * 15
return disasm(data=self.readv(addr, size), addr=addr, x64=x64, count=count or 0)

def extract(self, modules=None, extract_manager=None):
"""
Expand Down Expand Up @@ -732,7 +742,7 @@ def yarap(self, ruleset, offset=None, length=None, extended=False):
If offset is None, looks for match from the beginning of memory
.. versionchanged:: 4.0.0:
.. versionchanged:: 4.0.0
Added `extended` option which allows to get extended information about matched strings and rules.
Default is False for backwards compatibility.
Expand All @@ -755,7 +765,7 @@ def yarav(self, ruleset, addr=None, length=None, extended=False):
If addr is None, looks for match from the beginning of memory
.. versionchanged:: 4.0.0:
.. versionchanged:: 4.0.0
Added `extended` option which allows to get extended information about matched strings and rules.
Default is False for backwards compatibility.
Expand Down
8 changes: 7 additions & 1 deletion malduck/procmem/procmem.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,13 @@ class ProcessMemory:
def regexv(
self, query: bytes, addr: Optional[int] = None, length: Optional[int] = None
) -> Iterator[int]: ...
def disasmv(self, addr: int, size: int, x64: bool = False) -> List[Instruction]: ...
def disasmv(
self,
addr: int,
size: Optional[int] = None,
x64: bool = False,
count: Optional[int] = None,
) -> List[Instruction]: ...
def extract(
self, modules: ExtractorModules = None, extract_manager: ExtractManager = None,
) -> Optional[List[Dict[str, Any]]]: ...
Expand Down
14 changes: 7 additions & 7 deletions tests/test_disasm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def test_insns(self):
assert insn7.op2 == (None, None, None, 0x400000)

def test_equal(self):
assert disasm(b"hAAAA", 0)[0].mnem == "push"
assert disasm(b"hAAAA", 0)[0].op1.value == 0x41414141
assert disasm(b"hAAAA", 0) == disasm(b"hAAAA", 0)
assert next(disasm(b"hAAAA", 0)).mnem == "push"
assert next(disasm(b"hAAAA", 0)).op1.value == 0x41414141
assert list(disasm(b"hAAAA", 0)) == list(disasm(b"hAAAA", 0))


class TestDisasm64bit(object):
Expand Down Expand Up @@ -149,7 +149,7 @@ def test_insns(self):
assert insn10.op2.reg == "rax"

def test_equal(self):
assert disasm(b"hAAAA", 0)[0].mnem == "push"
assert disasm(b"hAAAA", 0)[0].op1.value == 0x41414141
assert disasm(b"hAAAA", 0) == disasm(b"hAAAA", 0)
assert disasm(b"hAAAA", 0) != disasm(b"hAAAB", 0)
assert next(disasm(b"hAAAA", 0)).mnem == "push"
assert next(disasm(b"hAAAA", 0)).op1.value == 0x41414141
assert list(disasm(b"hAAAA", 0)) == list(disasm(b"hAAAA", 0))
assert list(disasm(b"hAAAA", 0)) != list(disasm(b"hAAAB", 0))
2 changes: 1 addition & 1 deletion tests/test_procmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_cuckoomem_methods():
assert list(buf.regexv(b" ", 0x401000)) == [0x401007, 0x401014]
assert list(buf.regexv(b" ", 0x401000, 0x10)) == [0x401007]
assert list(buf.regexv(b"test..h", 0x401000)) == [0x40100f]
assert buf.disasmv(0x401015, 6) == [
assert list(buf.disasmv(0x401015, 6)) == [
insn("push", 0x41414141, addr=0x401015),
insn("ret", addr=0x40101a),
]
Expand Down

0 comments on commit ab92817

Please sign in to comment.