diff --git a/docs/conf.py b/docs/conf.py index 9a64669..47bb629 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'CERT Polska' # The full version, including alpha/beta/rc tags -release = '2.1.1' +release = '3.0.0' # -- General configuration --------------------------------------------------- diff --git a/docs/procmem.rst b/docs/procmem.rst index 765639d..d81ea7c 100644 --- a/docs/procmem.rst +++ b/docs/procmem.rst @@ -1,4 +1,4 @@ -Memory model objects with PE support (procmem) +Memory model objects (procmem) ============================================== .. automodule:: malduck.procmem @@ -37,3 +37,11 @@ CuckooProcessMemory (cuckoomem) .. autoclass:: malduck.procmem.cuckoomem.CuckooProcessMemory :members: + +IDAProcessMemory (idamem) +--------------------------------- + +.. autoclass:: malduck.idamem + +.. autoclass:: malduck.procmem.idamem.IDAProcessMemory + :members: diff --git a/malduck/__init__.py b/malduck/__init__.py index bb0a0d2..5cff8f4 100644 --- a/malduck/__init__.py +++ b/malduck/__init__.py @@ -8,7 +8,7 @@ from .hash.crc import crc32 from .hash.sha import md5, sha1, sha224, sha384, sha256, sha512 from .string.inet import ipv4 -from .string.ops import asciiz, utf16z, chunks, enhex, unhex, uleb128 +from .string.ops import asciiz, utf16z, chunks, chunks_iter, enhex, unhex, uleb128 from .structure import Structure from .pe import pe2cuckoo @@ -19,7 +19,7 @@ ) from .short import ( - aes, blowfish, des3, rc4, pe, aplib, gzip, procmem, procmempe, procmemelf, cuckoomem, pad, unpad, + aes, blowfish, des3, rc4, pe, aplib, gzip, procmem, procmempe, procmemelf, cuckoomem, idamem, pad, unpad, insn, rsa, verify, base64, rabbit, serpent, lznt1, pkcs7, unpkcs7 ) diff --git a/malduck/compression/aplib.py b/malduck/compression/aplib.py index fdef979..8791666 100644 --- a/malduck/compression/aplib.py +++ b/malduck/compression/aplib.py @@ -1,8 +1,10 @@ from .components.aplib import ap_depack from ..py2compat import binary_type +import logging import struct -import warnings + +log = logging.getLogger(__name__) class aPLib(object): @@ -29,7 +31,7 @@ class aPLib(object): """ def decompress(self, buf, length=None, headerless=False): if length is not None: - warnings.warn("Length argument is ignored by aPLib.decompress") + log.warning("Length argument is ignored by aPLib.decompress") try: # Trim header if not headerless and buf.startswith(b"AP32"): diff --git a/malduck/crypto/aes.py b/malduck/crypto/aes.py index 8657d8e..89ba3d5 100644 --- a/malduck/crypto/aes.py +++ b/malduck/crypto/aes.py @@ -57,7 +57,7 @@ def export_key(self): class AES(object): r""" - AES decryption object + AES encryption/decryption object :param key: Encryption key :type key: bytes @@ -82,7 +82,18 @@ def __init__(self, key, iv=None, mode="cbc"): self.aes = Cipher( algorithms.AES(key), self.modes[mode](iv), backend=default_backend() - ).decryptor() + ) + + def encrypt(self, data): + """ + Encrypt provided data + + :param data: Buffer with data + :type data: bytes + :return: Encrypted data + """ + aes_enc = self.aes.encryptor() + return aes_enc.update(data) + aes_enc.finalize() def decrypt(self, data): """ @@ -92,7 +103,8 @@ def decrypt(self, data): :type data: bytes :return: Decrypted data """ - return self.aes.update(data) + self.aes.finalize() + aes_dec = self.aes.decryptor() + return aes_dec.update(data) + aes_dec.finalize() @staticmethod def import_key(data): diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 2f019dc..12ad423 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -1,12 +1,44 @@ +import json +import logging import os -import warnings from .extractor import Extractor from .loaders import load_modules from ..yara import Yara +log = logging.getLogger(__name__) -def merge_configs(config, new_config): + +def is_config_better(base_config, new_config): + """ + Checks whether new config looks more reliable than base. + Currently just checking the amount of non-empty keys. + """ + base = [(k, v) for k, v in base_config.items() if v] + new = [(k, v) for k, v in new_config.items() if v] + return len(new) > len(base) + + +def sanitize_config(config): + """ + Sanitize static configuration by removing empty strings/collections + + :param config: Configuration to sanitize + :return: Sanitized configuration + """ + return {k: v for k, v in config.items() if v in [0, False] or v} + + +def merge_configs(base_config, new_config): + """ + Merge static configurations. + Used internally. Removes "family" key from the result, which is set explicitly by ExtractManager.push_config + + :param base_config: Base configuration + :param new_config: Changes to apply + :return: Merged configuration + """ + config = dict(base_config) for k, v in new_config.items(): if k == "family": continue @@ -23,6 +55,7 @@ def merge_configs(config, new_config): "value of '{key}' with '{new_value}'".format(key=k, old_value=config[k], new_value=v)) + return config class ExtractorModules(object): @@ -54,7 +87,7 @@ def on_error(self, exc, module_name): :param module_name: Name of module which throwed exception :type module_name: str """ - warnings.warn("{} not loaded: {}".format(module_name, exc)) + log.warning("{} not loaded: {}".format(module_name, exc)) class ExtractManager(object): @@ -112,56 +145,88 @@ def on_extractor_error(self, exc, extractor, method_name): :type method_name: str """ import traceback - warnings.warn("{}.{} throwed exception: {}".format( + log.warning("{}.{} throwed exception: {}".format( extractor.__class__.__name__, method_name, traceback.format_exc())) - def push_file(self, filepath, base=0, pe=None, elf=None, image=None): + def push_file(self, filepath, base=0): """ Pushes file for extraction. Config extractor entrypoint. - + :param filepath: Path to extracted file :type filepath: str :param base: Memory dump base address :type base: int - :param pe: Determines whether file contains PE (default: detect automatically) - :type pe: bool or None ("detect") - :param elf: Determines whether file contains ELF (default: detect automatically) - :type elf: bool or None ("detect") - :param image: If pe is True, determines whether file contains PE image (default: detect automatically) - :type image: bool or None ("detect") - """ - from ..procmem import ProcessMemory, ProcessMemoryPE, ProcessMemoryELF + :return: Family name if ripped successfully and provided better configuration than previous files. + Returns None otherwise. + """ + from ..procmem import ProcessMemory + log.debug("Started extraction of file {}:{:x}".format(filepath, base)) with ProcessMemory.from_file(filepath, base=base) as p: - if pe is None and p.readp(0, 2) == b"MZ": - pe = True - if elf is None and p.readp(0, 4) == b"\x7fELF": - elf = True - if pe and elf: - raise RuntimeError("A binary can't be both ELF and PE file") - if pe: - p = ProcessMemoryPE.from_memory(p, image=image, detect_image=image is None) - elif elf: - if image is False: - raise RuntimeError("ELF dumps are not supported yet") - p = ProcessMemoryELF.from_memory(p, image=True) - self.push_procmem(p) - - def push_procmem(self, p): + return self.push_procmem(p, rip_binaries=True) + + def push_config(self, family, config): + config["family"] = family + if family not in self.configs: + self.configs[family] = config + else: + base_config = self.configs[family] + if is_config_better(base_config, config): + log.debug("Config looks better") + self.configs[family] = config + return family + else: + log.debug("Config doesn't look better - ignoring.") + + def push_procmem(self, p, rip_binaries=False): """ Pushes ProcessMemory object for extraction :param p: ProcessMemory object :type p: :class:`malduck.procmem.ProcessMemory` + :param rip_binaries: Look for binaries (PE, ELF) in provided ProcessMemory and try to perform extraction using + specialized variants (ProcessMemoryPE, ProcessMemoryELF) + :type rip_binaries: bool (default: False) + :return: Family name if ripped successfully and provided better configuration than previous procmems. + Returns None otherwise. """ - extractor = ProcmemExtractManager(self) - extractor.push_procmem(p) - if extractor.config: - if extractor.family not in self.configs: - self.configs[extractor.family] = extractor.config + from ..procmem import ProcessMemoryPE, ProcessMemoryELF + from ..procmem.binmem import ProcessMemoryBinary + + binaries = [p] + if rip_binaries: + binaries += list(ProcessMemoryPE.load_binaries_from_memory(p)) + \ + list(ProcessMemoryELF.load_binaries_from_memory(p)) + matches = p.yarav(self.rules) + + fmt_procmem = lambda p: "{}:{}:{:x}".format(p.__class__.__name__, + "IMG" if getattr(p, "is_image", False) else "DMP", p.imgbase) + + def extract_config(procmem): + log.debug("{} - ripping...".format(fmt_procmem(procmem))) + extractor = ProcmemExtractManager(self) + matches.remap(procmem.p2v) + extractor.push_procmem(procmem, _matches=matches) + if extractor.family: + log.debug("{} - found {}!".format(fmt_procmem(procmem), extractor.family)) + return self.push_config(extractor.family, extractor.config) else: - merge_configs(self.configs[extractor.family], extractor.config) + log.debug("{} - No luck.".format(fmt_procmem(procmem))) + + log.debug("Matched rules: {}".format(matches.keys())) + + ripped_family = None + + for binary in binaries: + found_family = extract_config(binary) + if found_family is not None: + ripped_family = found_family + if isinstance(binary, ProcessMemoryBinary) and binary.image is not None: + found_family = extract_config(binary.image) + if found_family is not None: + ripped_family = found_family + return ripped_family @property def config(self): @@ -194,22 +259,24 @@ def on_extractor_error(self, exc, extractor, method_name): """ self.parent.on_extractor_error(exc, extractor, method_name) - def push_procmem(self, p): + def push_procmem(self, p, _matches=None): """ Pushes ProcessMemory object for extraction :param p: ProcessMemory object :type p: :class:`malduck.procmem.ProcessMemory` + :param _matches: YaraMatches object (used internally) + :type _matches: :class:`malduck.yara.YaraMatches` """ - matched = p.yarav(self.parent.rules) + matches = _matches or p.yarav(self.parent.rules) # For each extractor... for ext_class in self.parent.extractors: extractor = ext_class(self) # For each rule identifier in extractor.yara_rules... for rule in extractor.yara_rules: - if rule in matched: + if rule in matches: try: - extractor.handle_yara(p, matched[rule]) + extractor.handle_yara(p, matches[rule]) except Exception as exc: self.parent.on_error(exc, extractor) @@ -226,24 +293,30 @@ def push_config(self, config, extractor): :param extractor: Extractor object reference :type extractor: :class:`malduck.extractor.Extractor` """ - if "family" in config: - if not self.family or ( - self.family != extractor.family and - self.family in extractor.overrides): - self.family = config["family"] + try: + json.dumps(config) + except (TypeError, OverflowError): + raise RuntimeError("Config must be JSON-encodable") + + config = sanitize_config(config) + + if not config: + return - new_config = dict(self.collected_config) + log.debug("%s found the following config parts: %s", extractor.__class__.__name__, sorted(config.keys())) - merge_configs(new_config, config) + self.collected_config = merge_configs(self.collected_config, config) - if self.family: - new_config["family"] = self.family - self.collected_config = new_config + if "family" in config and ( + not self.family or (self.family != extractor.family and self.family in extractor.overrides)): + self.family = config["family"] + log.debug("%s tells it's %s", extractor.__class__.__name__, self.family) @property def config(self): """ - Returns collected config, but if family is not matched - returns empty dict + Returns collected config, but if family is not matched - returns empty dict. + Family is not included in config itself, look at :py:attr:`ProcmemExtractManager.family`. """ if self.family is None: return {} diff --git a/malduck/extractor/extractor.py b/malduck/extractor/extractor.py index 941f19b..16d6050 100644 --- a/malduck/extractor/extractor.py +++ b/malduck/extractor/extractor.py @@ -1,11 +1,12 @@ import functools import logging -import warnings -from ..procmem.procmempe import ProcessMemoryPE +from ..procmem import ProcessMemoryPE, ProcessMemoryELF from ..py2compat import add_metaclass +log = logging.getLogger(__name__) + class MetaExtractor(type): """ @@ -43,7 +44,7 @@ class ExtractorMethod(object): def __init__(self, method): self.method = method self.weak = False - self.needs_pe = False + self.needs_exec = None self.final = False self.yara_string = method.__name__ functools.update_wrapper(self, method) @@ -136,9 +137,9 @@ class Extractor(ExtractorBase): Following parameters need to be defined: - * :py:attr:`family` (:py:attr:`extractor.ExtractorBase.family`) + * :py:attr:`family` (see :py:attr:`extractor.ExtractorBase.family`) * :py:attr:`yara_rules` - * :py:attr:`overrides` (optional, :py:attr:`extractor.ExtractorBase.overrides`) + * :py:attr:`overrides` (optional, see :py:attr:`extractor.ExtractorBase.overrides`) Example extractor code for Citadel: @@ -221,6 +222,10 @@ def get_config(self, p): Use this decorator for extractors that need PE instance. (:class:`malduck.procmem.ProcessMemoryPE`) + .. py:decoratormethod:: Extractor.needs_elf + + Use this decorator for extractors that need ELF instance. (:class:`malduck.procmem.ProcessMemoryELF`) + """ yara_rules = () #: Names of Yara rules for which handle_yara is called @@ -252,24 +257,37 @@ def handle_yara(self, p, match): method = getattr(self, method_name) for va in match[identifier]: try: - if method.needs_pe: - # If extractor explicitly needs this and p is raw procmem: find PE for specified offset - p_pe = ProcessMemoryPE.from_memory(p, base=p.findmz(va)) \ - if not isinstance(p, ProcessMemoryPE) else p - method(self, p_pe, va) - else: - method(self, p, va) + if method.needs_exec and not isinstance(p, method.needs_exec): + log.debug("Omitting %s.%s for %s@%x - %s is not %s", + self.__class__.__name__, + method_name, + identifier, + va, + p.__class__.__name__, + method.needs_exec.__name__) + continue + log.debug("Trying %s.%s for %s@%x", + self.__class__.__name__, + method_name, + identifier, + va) + method(self, p, va) except Exception as exc: self.on_error(exc, method_name) # Call final extractors for method_name in self.final_methods: method = getattr(self, method_name) - if method.needs_pe and not isinstance(p, ProcessMemoryPE): - warnings.warn("Method {}.{} not called because object is not ProcessMemoryPE".format( - self.__class__.__name__, - method_name)) + if method.needs_exec and not isinstance(p, method.needs_exec): + log.debug("Omitting %s.%s (final) - %s is not %s", + self.__class__.__name__, + method_name, + p.__class__.__name__, + method.needs_exec.__name__) continue + log.debug("Trying %s.%s (final)", + self.__class__.__name__, + method_name) try: method(self, p) except Exception as exc: @@ -280,7 +298,13 @@ def handle_yara(self, p, match): @staticmethod def needs_pe(method): method = Extractor._extractor_method(method) - method.needs_pe = True + method.needs_exec = ProcessMemoryPE + return method + + @staticmethod + def needs_elf(method): + method = Extractor._extractor_method(method) + method.needs_exec = ProcessMemoryELF return method @staticmethod @@ -315,6 +339,7 @@ def final(method): @staticmethod def _extractor_method(method): + # Check whether method is already wrapped by ExtractorMethod if isinstance(method, ExtractorMethod): return method else: diff --git a/malduck/extractor/loaders.py b/malduck/extractor/loaders.py index fa196ac..6103195 100644 --- a/malduck/extractor/loaders.py +++ b/malduck/extractor/loaders.py @@ -1,9 +1,11 @@ +import logging import pkgutil -import warnings from ..py2compat import import_module +log = logging.getLogger(__name__) + def load_modules(search_path, onerror=None): """ @@ -23,7 +25,7 @@ def load_modules(search_path, onerror=None): if not is_pkg: continue if module_name in modules: - warnings.warn("Module collision - {} overridden".format(module_name)) + log.warning("Module collision - {} overridden".format(module_name)) try: modules[module_name] = import_module(importer, module_name) except Exception as exc: diff --git a/malduck/hash/crc.py b/malduck/hash/crc.py index a8594ed..5092d86 100644 --- a/malduck/hash/crc.py +++ b/malduck/hash/crc.py @@ -4,4 +4,12 @@ import zlib -crc32 = zlib.crc32 + +def crc32(val): + """ + Computes CRC32 checksum for provided data + + .. versionchanged:: 3.0.0 + Guaranteed to be unsigned on both Py2/Py3 + """ + return zlib.crc32(val) & 0xFFFFFFFF diff --git a/malduck/main.py b/malduck/main.py index 1f93b0f..3a02278 100644 --- a/malduck/main.py +++ b/malduck/main.py @@ -2,11 +2,8 @@ import logging import json import os -import warnings from .procmem import ProcessMemoryPE -from pefile import PEFormatError -from elftools.elf.elffile import ELFError @click.group() @@ -47,13 +44,11 @@ def fixpe(mempath, outpath, force): @click.pass_context @click.argument("paths", nargs=-1, type=click.Path(exists=True), required=True) @click.option("--base", "-b", default=None, help="Base address of dump (use '0x' prefix for hexadecimal value)") -@click.option("--pe/--non-pe", default=None, help="Specified files are PE executables/dumps (default: detect)") -@click.option("--elf/--non-elf", default=None, help="Specified files are ELF executables (default: detect)") -@click.option("--single/--multi", default=False, help="Treat files as single analysis " - "(merge all configs from the same family into one)") +@click.option("--analysis/--files", default=False, help="Treat files as dumps from single analysis " + "(merge configs from the same family)") @click.option("--modules", default=None, type=click.Path(exists=True), required=False, help="Specify directory where Yara files and modules are located (default path is ~/.malduck)") -def extract(ctx, paths, base, pe, elf, single, modules): +def extract(ctx, paths, base, analysis, modules): """Extract static configuration from dumps""" from .extractor import ExtractManager, ExtractorModules @@ -90,16 +85,9 @@ def echo_config(extract_manager, file_path=None): files.sort() for file_path in files: - try: - extract_manager.push_file(file_path, base=base, pe=pe, elf=elf) - if not single: - echo_config(extract_manager, file_path) - extract_manager = ExtractManager(extractor_modules) - except PEFormatError as e: - warnings.warn('Error parsing ' + file_path + ' as PE: ' + str(e)) - except ELFError as e: - warnings.warn('Error parsing ' + file_path + ' as ELF: ' + str(e)) - #except Exception as e: - # warnings.warn('Unknown exception loading ' + file_path + ': ' + str(e)) - if single: + extract_manager.push_file(file_path, base=base) + if not analysis: + echo_config(extract_manager, file_path) + extract_manager = ExtractManager(extractor_modules) + if analysis: echo_config(extract_manager) diff --git a/malduck/pe.py b/malduck/pe.py index 552c2b1..c8d61b3 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -61,8 +61,7 @@ def map_offset(self, offs): return self.memory.imgbase + (self.pe.get_rva_from_offset(offs) or offs) def __len__(self): - r = self.memory.regions[-1] - return r.addr + r.size + return self.memory.regions[-1].addr - self.memory.regions[0].addr + self.memory.regions[-1].size def __getitem__(self, item): if type(item) is slice: @@ -74,6 +73,8 @@ def __getitem__(self, item): return self.memory.readv(start, stop - start + 1) def find(self, str, beg=0, end=None): + if end and beg >= end: + return -1 try: return next(self.memory.regexv(str, self.memory.imgbase + beg, end and end - beg)) except StopIteration: diff --git a/malduck/procmem/__init__.py b/malduck/procmem/__init__.py index 22a1d65..8cfa2e1 100644 --- a/malduck/procmem/__init__.py +++ b/malduck/procmem/__init__.py @@ -2,6 +2,7 @@ from .procmempe import ProcessMemoryPE from .procmemelf import ProcessMemoryELF from .cuckoomem import CuckooProcessMemory +from .idamem import IDAProcessMemory from .region import ( Region, diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py new file mode 100644 index 0000000..f3fab0e --- /dev/null +++ b/malduck/procmem/binmem.py @@ -0,0 +1,63 @@ +from .procmem import ProcessMemory + + +class ProcessMemoryBinary(ProcessMemory): + """ + Abstract class for memory-mapped executable binary + """ + __magic__ = None + + def __init__(self, buf, base=0, regions=None, image=False, detect_image=False): + super(ProcessMemoryBinary, self).__init__(buf, base=base, regions=regions) + if detect_image: + image = self.is_image_loaded_as_memdump() + self.is_image = image + self._image = None + if image: + self._reload_as_image() + + def _reload_as_image(self): + """ + Load executable file embedded in ProcessMemory like native loader does + """ + raise NotImplementedError() + + @property + def image(self): + """ + Returns ProcessMemory object loaded with image=True or None if can't be loaded or is loaded as image yet + """ + if self.is_image: + return None + try: + if not self._image: + self._image = self.__class__.from_memory(self, image=True) + return self._image + except Exception: + return None + + def is_valid(self): + """ + Checks whether imgbase is pointing at valid binary header + """ + raise NotImplementedError() + + @classmethod + def load_binaries_from_memory(cls, procmem): + """ + Looks for binaries in ProcessMemory object and yields specialized ProcessMemoryBinary objects + :param procmem: ProcessMemory object to search + """ + if cls.__magic__ is None: + raise NotImplementedError() + for binary_va in procmem.findv(cls.__magic__): + binary_procmem = cls.from_memory(procmem, base=binary_va) + if binary_procmem.is_valid(): + yield binary_procmem + + def is_image_loaded_as_memdump(self): + """ + Uses some heuristics to deduce whether contents can be loaded with `image=True`. + Used by `detect_image` + """ + raise NotImplementedError() diff --git a/malduck/procmem/idamem.py b/malduck/procmem/idamem.py new file mode 100644 index 0000000..afe3dbd --- /dev/null +++ b/malduck/procmem/idamem.py @@ -0,0 +1,62 @@ +from ..py2compat import iterbytes_ord +from .procmem import ProcessMemory +from .region import Region + +try: + import idautils + import idc + IDAPYTHON = 1 +except ImportError: + IDAPYTHON = 0 + + +class IDAVM(object): + def __init__(self, idamem): + self.idamem = idamem + + def _get_ea_range(self, item): + if type(item) is slice: + offset = item.start or 0 + length = (item.stop or len(self)) - offset + else: + offset = item + length = 1 + for region in self.idamem.regions: + if region.offset < offset + length and offset < region.end_offset: + ea_start = min(max(region.p2v(offset), region.addr), region.end) + ea_end = min(max(region.p2v(offset + length), region.addr), region.end) + yield (ea_start, ea_end) + + def __setitem__(self, item, value): + value_bytes = iterbytes_ord(value) + for ea_start, ea_end in self._get_ea_range(item): + for ea in range(ea_start, ea_end): + try: + idc.PatchByte(ea, next(value_bytes)) + except StopIteration: + return + + def __getitem__(self, item): + data = [] + for ea_start, ea_end in self._get_ea_range(item): + data.append(idc.GetManyBytes(ea_start, ea_end - ea_start)) + return b''.join(data) + + def __len__(self): + return self.idamem.regions[-1].end_offset + + +class IDAProcessMemory(ProcessMemory): + """ + ProcessMemory representation operating in IDAPython context [BETA] + """ + def __init__(self): + if not IDAPYTHON: + raise RuntimeError("This class is intended to work only in IDAPython context") + regions = [] + for seg in idautils.Segments(): + off = 0 if not regions else regions[-1].end_offset + region = Region(seg, idc.SegEnd(seg) - seg, 0, 0, 0, off) + regions.append(region) + super(IDAProcessMemory, self).__init__(IDAVM(self), regions=regions) + diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 3fa2f0b..1d5c1ab 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -17,7 +17,7 @@ class ProcessMemory(object): :param buf: Object with memory contents :type buf: bytes, mmap, memoryview or bytearray object - :param base: Virtual address of the beginning of buf + :param base: Virtual address of the region of interest (or beginning of buf when no regions provided) :type base: int, optional (default: 0) :param regions: Regions mapping. If set to None (default), buf is mapped into single-region with VA specified in `base` argument @@ -144,7 +144,7 @@ def from_file(cls, filename, **kwargs): return memory @classmethod - def from_memory(cls, memory): + def from_memory(cls, memory, base=None, **kwargs): """ Makes new instance based on another ProcessMemory object. @@ -152,9 +152,11 @@ def from_memory(cls, memory): :param memory: ProcessMemory object to be copied :type memory: :class:`ProcessMemory` + :param base: Virtual address of region of interest (imgbase) + :type base: int :rtype: :class:`ProcessMemory` """ - copied = cls(memory.m, base=memory.imgbase, regions=memory.regions) + copied = cls(memory.m, base=base or memory.imgbase, regions=memory.regions, **kwargs) copied.f = memory.f return copied @@ -169,27 +171,128 @@ def length(self): else: return len(self.m) - def v2p(self, addr): + def iter_regions(self, addr=None, offset=None, length=None, contiguous=False, trim=False): + """ + Iterates over Region objects starting at provided virtual address or offset + + This method is used internally to enumerate regions using provided strategy. + + .. warning:: + If starting point is not provided, iteration will start from the first mapped region. This could + be counter-intuitive when length is set. It literally means "get of mapped bytes". + If you want to look for regions from address 0, you need to explicitly provide this address as an argument. + + .. versionadded:: 3.0.0 + + :param addr: Virtual address of starting point + :type addr: int (default: None) + :param offset: Offset of starting point, which will be translated to virtual address + :type offset: int (default: None) + :param length: Length of queried range in VM mapping context + :type length: int (default: None, unlimited) + :param contiguous: If True, break after first gap. Starting point must be inside mapped region. + :type contiguous: bool (default: False) + :param trim: Trim Region objects to range boundaries (addr, addr+length) + :type trim: bool (default: False) + :rtype: Iterator[:class:`Region`] + """ + if addr is not None and offset is not None: + raise ValueError("'addr' and 'offset' arguments should be provided exclusively") + if addr is None and offset is None and contiguous: + raise ValueError("Starting point (addr or offset) must be provided for contiguous regions") + if length and length < 0: + raise ValueError("Length can't be less than 0") + # No length, no problem + if length == 0: + return + # If we don't have starting point provided: first region is the starting point + if addr is None and offset is None: + addr = self.regions[0].addr + # Skipping regions before starting point + for region_idx, region in enumerate(self.regions): + if (addr is not None and addr < region.end) or \ + (offset is not None and offset < region.end_offset): + break + else: + return + # If starting region is placed after starting point + if (addr is not None and addr < region.addr) or \ + (offset is not None and offset < region.offset): + # If expect only contiguous regions: we can't return anything + if contiguous: + return + # If not, we just need to adjust our starting point + if addr is not None: + if length is not None: + length -= region.addr - addr + addr = region.addr + else: + if length is not None: + raise ValueError("Don't know how to retrieve length-limited regions with offset from unmapped area") + offset = region.offset + # If we're out of length after adjustment: time to stop + if length is not None and length <= 0: + return + # Now, our starting "addr"/"offset" is placed inside starting "region" + # Let's translate our offset to addr if necessary + if addr is None: + addr = region.p2v(offset) + # Continue enumeration + prev_region = None + for region in self.regions[region_idx:]: + intersection = region.trim_range(addr, length) + # If we've got empty intersection: time to break + if not intersection: + break + # Is it still contiguous to previous? + if contiguous and prev_region and prev_region.end != region.addr: + break + yield intersection if trim else region + prev_region = region + + def v2p(self, addr, length=None): """ Virtual address to buffer (physical) offset translation + .. versionchanged:: 3.0.0 + + Added optional mapping length check + :param addr: Virtual address + :param length: Expected minimal length of mapping (optional) :return: Buffer offset or None if virtual address is not mapped """ - for region in self.regions: - if region.addr <= addr < region.end: - return region.offset + addr - region.addr + if addr is None: + return None + mapping_length = 0 + for region in self.iter_regions(addr=addr, length=length, contiguous=True, trim=True): + if length is None: + return region.v2p(addr) + mapping_length += region.size + if mapping_length >= length: + return region.v2p(addr) - def p2v(self, off): + def p2v(self, off, length=None): """ Buffer (physical) offset to virtual address translation + .. versionchanged:: 3.0.0 + + Added optional mapping length check + :param off: Buffer offset + :param length: Expected minimal length of mapping (optional) :return: Virtual address or None if offset is not mapped """ - for region in self.regions: - if region.offset <= off < region.offset + region.size: - return region.addr + off - region.offset + if off is None: + return None + mapping_length = 0 + for region in self.iter_regions(offset=off, length=length, contiguous=True, trim=True): + if length is None: + return region.p2v(off) + mapping_length += region.size + if mapping_length >= length: + return region.p2v(off) def is_addr(self, addr): """ @@ -206,23 +309,7 @@ def addr_region(self, addr): :param addr: Virtual address :rtype: :class:`Region` """ - for region in self.regions: - if region.addr <= addr < region.end: - return region - - def iter_region(self, addr=None): - """ - Returns generator of Region objects starting at virtual address - - :param addr: Virtual address - :rtype: Iterator[:class:`Region`] - """ - start = False - for region in self.regions: - if addr is None or region.addr <= addr < region.end: - start = True - if start: - yield region + return next(self.iter_regions(addr=addr, contiguous=True), None) def readp(self, offset, length=None): """ @@ -230,9 +317,9 @@ def readp(self, offset, length=None): .. warning:: - Family of *p methods doesn't care about continuity of regions. + Family of *p methods doesn't care about contiguity of regions. - Use :py:meth:`p2v` and :py:meth:`readv` if you want to operate on continuous regions only + Use :py:meth:`p2v` and :py:meth:`readv` if you want to operate on contiguous regions only :param offset: Buffer offset :param length: Length of chunk (optional) @@ -244,48 +331,35 @@ def readp(self, offset, length=None): else: return binary_type(self.m[offset:offset+length]) - def readv_regions(self, addr=None, length=None, continuous_wise=True): + def readv_regions(self, addr=None, length=None, contiguous=True): """ - Generate chunks of memory from next continuous regions, starting from the specified virtual address, + Generate chunks of memory from next contiguous regions, starting from the specified virtual address, until specified length of read data is reached. Used internally. + .. versionchanged: 3.0.0 + + Contents of contiguous regions are merged into single string + :param addr: Virtual address :param length: Size of memory to read (optional) - :param continuous_wise: If True, readv_regions breaks after first gap + :param contiguous: If True, readv_regions breaks after first gap :rtype: Iterator[Tuple[int, bytes]] """ - regions = self.iter_region(addr) + current_addr = None + current_strings = [] prev_region = None - while length or length is None: - try: - region = next(regions) - except StopIteration: - return - if prev_region: - chunk_addr = region.addr - if continuous_wise and prev_region.end != region.addr: - # Gap between regions - break - break - # If continuous-wise: no-op - # Otherwise: skip gap - if length is not None: - length -= region.addr - prev_region.end - else: - chunk_addr = addr or region.addr - # Get starting region offset - rel_offs = chunk_addr - region.addr - # ... and how many bytes we need to read - rel_length = region.size - rel_offs - if length is not None and length < rel_length: - rel_length = length - # Yield read chunk - yield chunk_addr, self.readp(region.offset + rel_offs, rel_length) - # Go to next region - if length is not None: - length -= rel_length + for region in self.iter_regions(addr=addr, length=length, contiguous=contiguous, trim=True): + if not prev_region or prev_region.end != region.addr: + if current_strings: + yield current_addr, b"".join(current_strings) + current_addr = region.addr + current_strings = [] + current_strings.append(self.readp(region.offset, region.size)) prev_region = region + if current_strings: + yield current_addr, b"".join(current_strings) def readv(self, addr, length=None): """ @@ -298,7 +372,10 @@ def readv(self, addr, length=None): :return: Chunk from specified location :rtype: bytes """ - return b''.join(map(operator.itemgetter(1), self.readv_regions(addr, length))) + if length is not None and length <= 0: + return b'' + _, chunk = next(self.readv_regions(addr, length), (0, b'')) + return chunk def readv_until(self, addr, s=None): """ @@ -310,13 +387,10 @@ def readv_until(self, addr, s=None): :type s: bytes :rtype: bytes """ - ret = [] - for _, chunk in self.readv_regions(addr): - if s in chunk: - ret.append(chunk[:chunk.index(s)]) - break - ret.append(chunk) - return b"".join(ret) + # readv_regions is merging contiguous regions now + _, chunk = next(self.readv_regions(addr), (0, b'')) + idx = chunk.find(s) + return chunk[:idx] if idx >= 0 else chunk def patchp(self, offset, buf): """ @@ -324,9 +398,9 @@ def patchp(self, offset, buf): .. warning:: - Family of *p methods doesn't care about continuity of regions. + Family of *p methods doesn't care about contiguity of regions. - Use :py:meth:`p2v` and :py:meth:`patchv` if you want to operate on continuous regions only + Use :py:meth:`p2v` and :py:meth:`patchv` if you want to operate on contiguous regions only :param offset: Buffer offset :type offset: int @@ -368,10 +442,10 @@ def patchv(self, addr, buf): :type buf: bytes """ region = self.addr_region(addr) - # Bound check + # Boundary check if region is None or region.end < (addr + len(buf)): raise ValueError("Cross-region patching is not supported") - return self.patchp(region.offset + addr - region.addr, buf) + return self.patchp(region.v2p(addr), buf) def uint8p(self, offset, fixed=False): """Read unsigned 8-bit value at offset.""" @@ -480,8 +554,7 @@ def findv(self, query, addr=None, length=None): :return: Generates offsets where regex was matched :rtype: Iterator[int] """ - for chunk_addr, chunk in self.readv_regions(addr, length, continuous_wise=False): - print(len(chunk)) + for chunk_addr, chunk in self.readv_regions(addr, length, contiguous=False): for idx in self._find(chunk, query): yield idx + chunk_addr @@ -521,7 +594,7 @@ def regexv(self, query, addr=None, length=None): Method doesn't match bytes overlapping the border between regions """ query = ensure_bytes(query) - for chunk_addr, chunk in self.readv_regions(addr, length, continuous_wise=False): + for chunk_addr, chunk in self.readv_regions(addr, length, contiguous=False): for entry in re.finditer(query, chunk, re.DOTALL): yield chunk_addr + entry.start() @@ -590,14 +663,9 @@ def yarav(self, ruleset, addr=None, length=None): length = self.regions[-1].end - addr def map_offset(off, len): - # TODO: This could be better, but works in most cases - va = self.p2v(off) - if (va is not None and - addr <= va < addr + length and - self.is_addr(va + len - 1) and - addr <= va + len - 1 < addr + length): - return va - + ptr = self.p2v(off, len) + if ptr is not None and addr <= ptr < addr + length: + return ptr return ruleset.match(data=self.readp(0), offset_mapper=map_offset) def _findbytes(self, yara_fn, query, addr, length): @@ -612,7 +680,7 @@ def _findbytes(self, yara_fn, query, addr, length): def findbytesp(self, query, offset=0, length=None): """ - Search for byte sequences (e.g., `4? AA BB ?? DD`). Uses :py:meth:`regexp` internally + Search for byte sequences (e.g., `4? AA BB ?? DD`). Uses :py:meth:`yarap` internally .. versionadded:: 1.4.0 Query is passed to yarap as single hexadecimal string rule. Use Yara-compatible strings only @@ -630,7 +698,7 @@ def findbytesp(self, query, offset=0, length=None): def findbytesv(self, query, addr=None, length=None): """ - Search for byte sequences (e.g., `4? AA BB ?? DD`). Uses :py:meth:`regexv` internally + Search for byte sequences (e.g., `4? AA BB ?? DD`). Uses :py:meth:`yarav` internally .. versionadded:: 1.4.0 Query is passed to yarav as single hexadecimal string rule. Use Yara-compatible strings only diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index 0006e02..4c2bb95 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -1,12 +1,12 @@ from .region import Region -from .procmem import ProcessMemory +from .binmem import ProcessMemoryBinary import elftools import elftools.elf.elffile import io -class ProcessMemoryELF(ProcessMemory): +class ProcessMemoryELF(ProcessMemoryBinary): """ Representation of memory-mapped ELF file @@ -15,42 +15,32 @@ class ProcessMemoryELF(ProcessMemory): ELF files can be read directly using inherited :py:meth:`ProcessMemory.from_file` with `image` argument set (look at :py:meth:`from_memory` method). """ - def __init__(self, buf, base=0, regions=None, image=False): - super(ProcessMemoryELF, self).__init__(buf, base=base, regions=regions) - self._elf = None - self._imgend = None - if image: - self._load_image(page_size=0x1000) - - @classmethod - def from_memory(cls, memory, base=None, image=False): - """ - Creates ProcessMemoryELF instance from ProcessMemory object. + __magic__ = b"\x7fELF" - :param memory: ProcessMemory object containing ELF image - :type memory: :class:`ProcessMemory` - :param base: Virtual address where ELF image is located (default: beginning of procmem) - :param image: True if memory contains ELF executable file instead of memory-mapped ELF (default: False) - :param detect_image: ProcessMemoryELF automatically detect whether image or memory-mapped ELF is loaded - (default: False) - :rtype: :class:`ProcessMemory` - - When image is True - ELF file will be loaded under location specified in program header - (elf.get_segment(0).header['p_vaddr']). :class:`ProcessMemoryELF` object created that way contains only memory regions - created during load (all other data will be wiped out). - """ - copied = cls(memory.m, base=base or memory.imgbase, regions=memory.regions, image=image) - copied.f = memory.f - return copied + def __init__(self, buf, base=0, regions=None, image=False, detect_image=False): + self._elf = None + super(ProcessMemoryELF, self).__init__(buf, base=base, regions=regions, image=image, detect_image=detect_image) - def _elf_direct_load(self, fast_load=True): + def _elf_direct_load(self): offset = self.v2p(self.imgbase) # Stream required for ELFFile() stream = io.BytesIO(self.readp(offset)) elf = elftools.elf.elffile.ELFFile(stream) + # Try to iter_segments to check whether ELFFile is really correct + list(elf.iter_segments()) return elf - def _load_image(self, page_size=None): + def is_valid(self): + if self.readv(self.imgbase, 4) != self.__magic__: + return False + try: + self._elf_direct_load() + return True + except Exception: + return False + + def _reload_as_image(self): + page_size = 0x1000 # Reset regions self.imgbase = None self.regions = [] @@ -80,14 +70,14 @@ def _load_image(self, page_size=None): def elf(self): """Related :class:`ELFFile` object""" if not self._elf: - self._elf = self._elf_direct_load(fast_load=False) + self._elf = self._elf_direct_load() return self._elf + def is_image_loaded_as_memdump(self): + raise NotImplementedError() + @property def imgend(self): """Address where ELF image ends""" - if not self._imgend: - lastSegment = self.elf.get_segment(self.elf.num_segment()-1) - self._imgend = lastSegment.header['p_vaddr'] + lastSegment.header['p_memsz'] - return self._imgend - + lastSegment = self.elf.get_segment(self.elf.num_segment()-1) + return lastSegment.header['p_vaddr'] + lastSegment.header['p_memsz'] diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 5bb41ab..aa1c51c 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -1,11 +1,11 @@ from .region import Region -from .procmem import ProcessMemory +from .binmem import ProcessMemoryBinary from ..bits import align from ..pe import PE -class ProcessMemoryPE(ProcessMemory): +class ProcessMemoryPE(ProcessMemoryBinary): """ Representation of memory-mapped PE file @@ -14,37 +14,11 @@ class ProcessMemoryPE(ProcessMemory): PE files can be read directly using inherited :py:meth:`ProcessMemory.from_file` with `image` argument set (look at :py:meth:`from_memory` method). """ + __magic__ = b"MZ" + def __init__(self, buf, base=0, regions=None, image=False, detect_image=False): - super(ProcessMemoryPE, self).__init__(buf, base=base, regions=regions) self._pe = None - self._imgend = None - if detect_image: - image = self.is_image_loaded_as_memdump() - if image: - self._load_image() - - @classmethod - def from_memory(cls, memory, base=None, image=False, detect_image=False): - """ - Creates ProcessMemoryPE instance from ProcessMemory object. - - :param memory: ProcessMemory object containing PE image - :type memory: :class:`ProcessMemory` - :param base: Virtual address where PE image is located (default: beginning of procmem) - :param image: True if memory contains EXE file instead of memory-mapped PE (default: False) - :param detect_image: ProcessMemoryPE automatically detect whether image or memory-mapped PE is loaded - (default: False) - :rtype: :class:`ProcessMemoryPE` - - When image is True - PE file will be loaded under location specified in PE header - (pe.optional_header.ImageBase). :class:`ProcessMemoryPE` object created that way contains only memory regions - created during load (all other data will be wiped out). If image contains relocation info, relocations will be - applied using :py:meth:`pefile.relocate_image` method. - """ - copied = cls(memory.m, base=base or memory.imgbase, regions=memory.regions, - image=image, detect_image=detect_image) - copied.f = memory.f - return copied + super(ProcessMemoryPE, self).__init__(buf, base=base, regions=regions, image=image, detect_image=detect_image) def _pe_direct_load(self, fast_load=True): offset = self.v2p(self.imgbase) @@ -53,7 +27,7 @@ def _pe_direct_load(self, fast_load=True): pe = PE(data=m, fast_load=fast_load) return pe - def _load_image(self): + def _reload_as_image(self): # Load PE data from imgbase offset pe = self._pe_direct_load(fast_load=False) # Reset regions @@ -73,6 +47,20 @@ def _load_image(self): section.PointerToRawData )) + def is_valid(self): + if self.readv(self.imgbase, 2) != self.__magic__: + return False + pe_offs = self.uint32v(self.imgbase + 0x3C) + if pe_offs is None: + return False + if self.readv(self.imgbase + pe_offs, 2) != b"PE": + return False + try: + PE(self) + return True + except Exception: + return False + def is_image_loaded_as_memdump(self): """ Checks whether memory region contains image incorrectly loaded as memory-mapped PE dump (image=False). @@ -107,13 +95,8 @@ def pe(self): @property def imgend(self): """Address where PE image ends""" - if not self._imgend: - section = self.pe.sections[-1] - self._imgend = ( - self.imgbase + - section.VirtualAddress + section.Misc_VirtualSize - ) - return self._imgend + section = self.pe.sections[-1] + return self.imgbase + section.VirtualAddress + section.Misc_VirtualSize def store(self): """ diff --git a/malduck/procmem/region.py b/malduck/procmem/region.py index 67f8d65..29376c8 100644 --- a/malduck/procmem/region.py +++ b/malduck/procmem/region.py @@ -27,13 +27,15 @@ class Region(object): def __init__(self, addr, size, state, type_, protect, offset): self.addr = addr self.size = size - self.end = addr + size self.state = state self.type_ = type_ self.protect = protect self.offset = offset def to_json(self): + """ + Returns JSON-like dict representation + """ return { "addr": "0x%08x" % self.addr, "end": "0x%08x" % (self.addr + self.size), @@ -44,6 +46,82 @@ def to_json(self): "offset": self.offset, } + @property + def end(self): + """ + Virtual address of region end (first unmapped byte) + """ + return self.addr + self.size + + @property + def end_offset(self): + """ + Offset of region end (first unmapped byte) + """ + return self.offset + self.size + + @property + def last(self): + """ + Virtual address of last region byte + """ + return self.addr + self.size - 1 + + @property + def last_offset(self): + """ + Offset of last region byte + """ + return self.offset + self.size - 1 + + def v2p(self, addr): + """ + Virtual address to physical offset translation. Assumes that address is valid within Region. + :param addr: Virtual address + :return: Physical offset + """ + return self.offset + addr - self.addr + + def p2v(self, off): + """ + Physical offset to translation. Assumes that offset is valid within Region. + :param addr: Physical offset + :return: Virtual address + """ + return self.addr + off - self.offset + + def contains_offset(self, offset): + """ + Checks whether region contains provided physical offset + """ + return self.offset <= offset < self.offset + self.size + + def contains_addr(self, addr): + """ + Checks whether region contains provided virtual address + """ + return self.addr <= addr < self.end + + def intersects_range(self, addr, length): + """ + Checks whether region mapping intersects with provided range + """ + return self.addr < addr + length and addr < self.end + + def trim_range(self, addr, length=None): + """ + Returns region intersection with provided range + :param addr: Virtual address of starting point + :param length: Length of range (optional) + :rtype: :class:`Region` + """ + new_addr = max(self.addr, addr) + new_end = min(self.end, addr + length) if length is not None else self.end + if new_end <= new_addr: + return None + new_offset = self.v2p(new_addr) + return Region(new_addr, new_end - new_addr, self.state, self.type_, self.protect, new_offset) + def __eq__(self, other): if not isinstance(other, Region): raise ValueError("Not a region object!") @@ -51,5 +129,5 @@ def __eq__(self, other): return ( self.addr == other.addr and self.size == other.size and self.state == other.state and self.type_ == other.type_ and - self.protect == other.protect + self.protect == other.protect and self.offset == other.offset ) diff --git a/malduck/short.py b/malduck/short.py index 77f9a95..65ec69a 100644 --- a/malduck/short.py +++ b/malduck/short.py @@ -14,7 +14,7 @@ from .crypto.rsa import RSA from .disasm import Instruction from .pe import PE -from .procmem import ProcessMemory, ProcessMemoryPE, ProcessMemoryELF, CuckooProcessMemory +from .procmem import ProcessMemory, ProcessMemoryPE, ProcessMemoryELF, CuckooProcessMemory, IDAProcessMemory from .string.ops import Padding, Unpadding, Base64 from .verify import Verify @@ -24,6 +24,9 @@ class aes(object): def __init__(self, mode): self.mode = mode + def encrypt(self, key=None, iv=None, data=None): + return AES(key, iv, self.mode).encrypt(data) + def decrypt(self, key=None, iv=None, data=None): return AES(key, iv, self.mode).decrypt(data) @@ -37,8 +40,12 @@ class _cbc_(object): aes.cbc(key=b'aes128cipher_key', iv=b"iv"*8, - data=pkcs7(b"data_to_be_encrypted", 16)) + data=pkcs7(b"data_to_be_decrypted", 16)) """ + @staticmethod + def encrypt(key=None, iv=None, data=None): + return aes("cbc").encrypt(key, iv, data) + @staticmethod def decrypt(key=None, iv=None, data=None): return aes("cbc").decrypt(key, iv, data) @@ -56,8 +63,12 @@ class _ecb_(object): from malduck import aes, pkcs7 aes.ecb(key=b'aes128cipher_key', - data=pkcs7(b"data_to_be_encrypted", 16)) + data=pkcs7(b"data_to_be_decrypted", 16)) """ + @staticmethod + def encrypt(key=None, data=None): + return aes("ecb").encrypt(key, None, data) + @staticmethod def decrypt(key=None, data=None): return aes("ecb").decrypt(key, None, data) @@ -76,8 +87,12 @@ class _ctr_(object): aes.ctr(key=b'aes128cipher_key', nonce=b"iv"*8 - data=pkcs7(b"data_to_be_encrypted", 16)) + data=pkcs7(b"data_to_be_decrypted", 16)) """ + @staticmethod + def encrypt(key=None, nonce=None, data=None): + return aes("ctr").encrypt(key, nonce, data) + @staticmethod def decrypt(key=None, nonce=None, data=None): return aes("ctr").decrypt(key, nonce, data) @@ -223,6 +238,7 @@ def serpent(key, data, iv=None): procmempe = ProcessMemoryPE procmemelf = ProcessMemoryELF cuckoomem = CuckooProcessMemory +idamem = IDAProcessMemory base64 = Base64() pad = Padding("pkcs7") pkcs7 = Padding("pkcs7") diff --git a/malduck/yara.py b/malduck/yara.py index af6e3ed..93fd6f7 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -1,11 +1,13 @@ from __future__ import absolute_import import json +import logging import os import re -import warnings import yara +log = logging.getLogger(__name__) + _YARA_RULE_FORMAT = """ rule {name} {{ strings: @@ -108,7 +110,7 @@ def from_dir(path, recursive=True, followlinks=True): ruleset_name = os.path.splitext(os.path.basename(fname))[0] ruleset_path = os.path.join(root, fname) if ruleset_name in rule_paths: - warnings.warn("Yara file name collision - {} overridden by {}".format( + log.warning("Yara file name collision - {} overridden by {}".format( rule_paths[ruleset_name], ruleset_path)) rule_paths[ruleset_name] = ruleset_path @@ -171,8 +173,13 @@ class YaraMatches(object): Rules can be referenced by both attribute and index. """ def __init__(self, match_results, offset_mapper=None): + self.match_results = match_results + self.matched_rules = {} + self.remap(offset_mapper) + + def remap(self, offset_mapper=None): self.matched_rules = {} - for match in match_results: + for match in self.match_results: yara_match = YaraMatch(match, offset_mapper=offset_mapper) if yara_match: self.matched_rules[match.rule] = yara_match diff --git a/setup.py b/setup.py index f0a4d12..5f59b49 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="malduck", - version="2.1.1", + version="3.0.0", description="Malduck is your ducky companion in malware analysis journeys", author="CERT Polska", author_email="info@cert.pl", diff --git a/tests/files/modules/olly/olly.py b/tests/files/modules/olly/olly.py index de67618..7c76aa3 100644 --- a/tests/files/modules/olly/olly.py +++ b/tests/files/modules/olly/olly.py @@ -7,8 +7,8 @@ class Ollydbg(Extractor): @Extractor.extractor def olly_is(self, p, hit): - return {"olly": [p.asciiz(hit)]} + return {"olly": [p.asciiz(hit).decode("utf8")]} @Extractor.extractor("olly_is_not") def olly_isnt(self, p, hit): - return {"olly": [p.asciiz(hit)]} + return {"olly": [p.asciiz(hit).decode("utf8")]} diff --git a/tests/test_crypto.py b/tests/test_crypto.py index 65a1054..747d9c3 100644 --- a/tests/test_crypto.py +++ b/tests/test_crypto.py @@ -10,6 +10,7 @@ def test_aes(): b"I\x96Z\xe4\xb5\xffX\xbdT]\x93\x03\x96\xfcw\xd9" b"I\x96Z\xe4\xb5\xffX\xbdT]\x93\x03\x96\xfcw\xd9" ) + assert aes.ecb.decrypt(b"A"*16, data=b"C"*32) == ( b"I\x96Z\xe4\xb5\xffX\xbdT]\x93\x03\x96\xfcw\xd9" b"I\x96Z\xe4\xb5\xffX\xbdT]\x93\x03\x96\xfcw\xd9" @@ -20,11 +21,25 @@ def test_aes(): b"\n\xd5\x19\xa7\xf6\xbc\x1b\xfe\x17\x1e\xd0@\xd5\xbf4\x9a" ) - assert aes.ctr( + assert aes.ctr.decrypt( b"hello world12345", b"A"*16, b"\x803\xe3J#\xf4;\x13\x11+h\xf5\xba-\x9b\x05" ) == b"B"*16 + assert aes.ecb.encrypt(b"A"*16, b"C"*32) == ( + b"\xbf\x1ej>.\xc2\xdb_\x9a1&\x17\xee\xfc\x95S" + b"\xbf\x1ej>.\xc2\xdb_\x9a1&\x17\xee\xfc\x95S" + ) + + assert aes.cbc.encrypt(b"A"*16, b"B"*16, b"C"*32) == ( + b"\xaa\x1a\x18\xffUa_a\"\xf2\x87He\xc8\x1b\xfc" + b"\xf9\xcb@\xed\xf6N\xd0-\x9d1rB\xd1\xf2Z\x00" + ) + + assert aes.ctr.encrypt(b"hello world12345", b"A"*16, b"B"*16) == ( + b'\x803\xe3J#\xf4;\x13\x11+h\xf5\xba-\x9b\x05' + ) + assert aes.import_key( b"\x08\x02\x00\x00\x0ef\x00\x00\x10\x00\x00\x00" + b"A"*16 ) == ("AES-128", b"A"*16) diff --git a/tests/test_extractor.py b/tests/test_extractor.py index 84595b9..2c52649 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -30,9 +30,9 @@ def test_scan_ollydbg(): assert cfg["family"] == "ollydbg" assert sorted(cfg["olly"]) == [ - b' - OllyDbg is a JIT debugger\n', - b" - OllyDbg is in Explorer's menu\n", - b" - OllyDbg is not in Explorer's menu" + " - OllyDbg is a JIT debugger\n", + " - OllyDbg is in Explorer's menu\n", + " - OllyDbg is not in Explorer's menu" ] diff --git a/tests/test_hash.py b/tests/test_hash.py index f91d30b..bf9b2eb 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -24,3 +24,8 @@ def test_hash(): b"9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72" b"323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043" ) + + +def test_crc32_unsigned(): + # Py2: zlib.crc32(b"aaaa") == -1382488763 + assert crc32(b"aaaa") == 2912478533 diff --git a/tests/test_pmem_regions.py b/tests/test_pmem_regions.py new file mode 100644 index 0000000..5141792 --- /dev/null +++ b/tests/test_pmem_regions.py @@ -0,0 +1,133 @@ +import pytest +from malduck import procmem +from malduck.procmem import Region + + +def test_single_region(): + payload = b"0123456789" + regions = [ + Region(0x10000, 8, 0, 0, 0, 1) + ] + mem = procmem(payload, regions=regions) + assert list(mem.iter_regions()) == mem.regions + + assert list(mem.iter_regions(addr=0xffff)) == mem.regions + assert list(mem.iter_regions(addr=0x10000)) == mem.regions + assert list(mem.iter_regions(addr=0x10007)) == mem.regions + assert list(mem.iter_regions(addr=0x10008)) == [] + + assert list(mem.iter_regions(offset=0)) == mem.regions + assert list(mem.iter_regions(offset=1)) == mem.regions + assert list(mem.iter_regions(offset=8)) == mem.regions + assert list(mem.iter_regions(offset=9)) == [] + + assert list(mem.iter_regions(length=0)) == [] + assert list(mem.iter_regions(length=1)) == mem.regions + + assert list(mem.iter_regions(addr=0xffff, length=1)) == [] + assert list(mem.iter_regions(addr=0xffff, length=2)) == mem.regions + assert list(mem.iter_regions(addr=0xffff, length=0x10)) == mem.regions + assert list(mem.iter_regions(addr=0x10007, length=0x10)) == mem.regions + assert list(mem.iter_regions(addr=0x10008, length=0x10)) == [] + + with pytest.raises(ValueError): + # ValueError("Don't know how to retrieve length-limited regions with offset from unmapped area") + list(mem.iter_regions(offset=0, length=1)) + assert list(mem.iter_regions(offset=1, length=1)) == mem.regions + + +def test_single_region_trim(): + payload = b"0123456789" + regions = [ + Region(0x10000, 8, 0, 0, 0, 1) + ] + mem = procmem(payload, regions=regions) + assert list(mem.iter_regions(trim=True)) == mem.regions + + assert list(mem.iter_regions(addr=0xffff, trim=True)) == mem.regions + assert list(mem.iter_regions(addr=0x10000, trim=True)) == mem.regions + assert list(mem.iter_regions(addr=0x10007, trim=True)) == [Region(0x10007, 1, 0, 0, 0, 8)] + assert list(mem.iter_regions(addr=0x10008, trim=True)) == [] + + assert list(mem.iter_regions(offset=0, trim=True)) == mem.regions + assert list(mem.iter_regions(offset=1, trim=True)) == mem.regions + assert list(mem.iter_regions(offset=8, trim=True)) == [Region(0x10007, 1, 0, 0, 0, 8)] + assert list(mem.iter_regions(offset=9, trim=True)) == [] + + assert list(mem.iter_regions(length=0, trim=True)) == [] + assert list(mem.iter_regions(length=1, trim=True)) == [Region(0x10000, 1, 0, 0, 0, 1)] + + assert list(mem.iter_regions(addr=0xffff, length=1, trim=True)) == [] + assert list(mem.iter_regions(addr=0xffff, length=2, trim=True)) == [Region(0x10000, 1, 0, 0, 0, 1)] + assert list(mem.iter_regions(addr=0xffff, length=8, trim=True)) == [Region(0x10000, 7, 0, 0, 0, 1)] + assert list(mem.iter_regions(addr=0x10001, length=4, trim=True)) == [Region(0x10001, 4, 0, 0, 0, 2)] + assert list(mem.iter_regions(addr=0x10007, length=0x10, trim=True)) == [Region(0x10007, 1, 0, 0, 0, 8)] + assert list(mem.iter_regions(addr=0x10008, length=0x10, trim=True)) == [] + + with pytest.raises(ValueError): + # ValueError("Don't know how to retrieve length-limited regions with offset from unmapped area") + list(mem.iter_regions(offset=0, length=1, trim=True)) + assert list(mem.iter_regions(offset=1, length=1, trim=True)) == [Region(0x10000, 1, 0, 0, 0, 1)] + assert list(mem.iter_regions(offset=4, length=2, trim=True)) == [Region(0x10003, 2, 0, 0, 0, 4)] + + +@pytest.fixture +def mem(): + # aaaaaaa bbbbbbccccccccdddd eeee + payload = b"0123456789abcdefghijklmnopqrstuvwxyz" + regions = [ + Region(0x10000, 7, 0, 0, 0, 1), + Region(0x10007, 6, 0, 0, 0, 10), + Region(0x10100, 8, 0, 0, 0, 16), + Region(0x10108, 4, 0, 0, 0, 24), + Region(0x10200, 4, 0, 0, 0, 31) + ] + # v---0x10000 v---- 0x10100 v-- 0x10200 + # VM: .....1234567abcdef..........ghijklmnopqr ..........vwxy....... + return procmem(payload, base=0x10000, regions=regions) + + +def test_regions_multi(mem): + # Test simple enum from specified address + assert list(mem.iter_regions()) == mem.regions + assert list(mem.iter_regions(0x1000)) == mem.regions + assert list(mem.iter_regions(0x10000)) == mem.regions + assert list(mem.iter_regions(0x10104)) == [Region(0x10100, 8, 0, 0, 0, 16), + Region(0x10108, 4, 0, 0, 0, 24), + Region(0x10200, 4, 0, 0, 0, 31)] + assert list(mem.iter_regions(0x10203)) == [Region(0x10200, 4, 0, 0, 0, 31)] + assert list(mem.iter_regions(0x10204)) == [] + + # Test simple enum from specified offset + assert list(mem.iter_regions(offset=0)) == mem.regions + assert list(mem.iter_regions(offset=10)) == mem.regions[1:] + assert list(mem.iter_regions(offset=20)) == mem.regions[2:] + assert list(mem.iter_regions(offset=30)) == [Region(0x10200, 4, 0, 0, 0, 31)] + assert list(mem.iter_regions(offset=40)) == [] + + assert list(mem.iter_regions(0xffff, contiguous=True)) == [] + assert list(mem.iter_regions(0x10000, contiguous=True)) == [Region(0x10000, 7, 0, 0, 0, 1), + Region(0x10007, 6, 0, 0, 0, 10)] + assert list(mem.iter_regions(0x10002, length=8, contiguous=True, trim=True)) == [ + Region(0x10002, 5, 0, 0, 0, 3), + Region(0x10007, 3, 0, 0, 0, 10) + ] + assert list(mem.iter_regions(offset=1, contiguous=True)) == [ + Region(0x10000, 7, 0, 0, 0, 1), + Region(0x10007, 6, 0, 0, 0, 10) + ] + assert list(mem.iter_regions(offset=2, length=0x9f)) == [ + Region(0x10000, 7, 0, 0, 0, 1), + Region(0x10007, 6, 0, 0, 0, 10) + ] + assert list(mem.iter_regions(offset=2, length=0x100)) == [ + Region(0x10000, 7, 0, 0, 0, 1), + Region(0x10007, 6, 0, 0, 0, 10), + Region(0x10100, 8, 0, 0, 0, 16) + ] + assert list(mem.iter_regions(offset=2, length=0x100, trim=True)) == [ + Region(0x10001, 6, 0, 0, 0, 2), + Region(0x10007, 6, 0, 0, 0, 10), + Region(0x10100, 1, 0, 0, 0, 16) + ] + diff --git a/tests/test_procmem.py b/tests/test_procmem.py index a09043f..f400fc8 100644 --- a/tests/test_procmem.py +++ b/tests/test_procmem.py @@ -161,6 +161,10 @@ def test_findbytes(): assert not list(buf.findbytesv(enhex(b"test hAAAA"))) assert list(buf.findbytesv(enhex(b"test\n hAAAA"))) + assert list(buf.findbytesv(enhex(b"is"), length=0x100b)) == [0x40100a] + assert list(buf.findbytesv(enhex(b"is"), length=0x100d)) == [0x40100a, 0x40100c] + assert list(buf.findbytesv(enhex(b"is"), addr=0x40100b, length=0x100d)) == [0x40100c] + payload = b"".join([ b"a" * 0x1000, b"b" * 0x1000, @@ -178,6 +182,25 @@ def test_findbytes(): assert next(p.findbytesv(enhex(b"dddd"))) == 0x410000 +def test_simple_findv(): + payload = b"12ab34cd45ef" + regions = [ + Region(0x10000, 2, 0, 0, 0, 2), + Region(0x10002, 2, 0, 0, 0, 6), + Region(0x10010, 2, 0, 0, 0, 10) + ] + p = procmem(payload, regions=regions) + + assert list(p.findv(b"12")) == [] + assert list(p.findv(b"ab")) == [0x10000] + assert list(p.findv(b"ab", addr=0x10002)) == [] + assert list(p.findv(b"ab34")) == [] + assert list(p.findv(b"abcd")) == [0x10000] + assert list(p.findv(b"abcdef")) == [] + assert list(p.findv(b"cdef")) == [] + assert list(p.findv(b"ef")) == [0x10010] + + def test_findv(): payload = b"".join([ pad.null(pad.null(b"a" * 0x200 + b"pattern", 0x500) + b"pattern2", 0x1000), @@ -197,6 +220,8 @@ def test_findv(): assert list(p.findv(b"pattern", 0x401100, 0x405)) == [0x401200] assert list(p.findv(b"pattern", length=0x10300)) == [0x400200, 0x400500, 0x401200, 0x401500, 0x410200] assert list(p.findv(b"pattern", 0x401508)) == [0x410200, 0x410500] + assert list(p.findv(b"pattern", 0x403508)) == [0x410200, 0x410500] + def test_patchv(): payload = b"".join([