Skip to content

Commit

Permalink
General updates and bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dc3-tsd committed Feb 3, 2023
1 parent e6f05d2 commit 181131f
Show file tree
Hide file tree
Showing 15 changed files with 475 additions and 179 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
# Changelog
All notable changes to this project will be documented in this file.


## [Unreleased]
- Add `movsq` opcode support (@ddash-ct)
- Added utility functions for analyzing strings:
- `find_user_strings()`
- `find_api_resolve_strings()`
- `is_code_string()`
- `is_library_string()`
- `detect_encoding()`
- `force_to_string()`
- Added better support for operands with segment registers (fs/gs)
- Fixed default data type to be an 8 byte qword when forcing extra arguments on a 64bit sample.
- Added `default_data_type` argument on `get_function_args()`/`get_function_arg_values()`/`get_function_signatures()` to change the data type used when forcing extra arguments. (Data type given should be valid for the underlying disassembler.)
- Add support for `wsprintfW` call hook.
- Added `FunctionArgument.location` property, which provides the location of the argument. (stack offset, register, etc.)
- Added `disable_all()` and `enable()` to emulator instances which simulates a whitelist for opcode/function hooks.
- Fixed bug in `idiv` opcode emulation.


## [0.6.1] - 2022-12-20
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ The following utilities are included with Rugosa:
- [Emulation](./docs/CPUEmulation.md)
- [Extra Disssembly Interfaces](./rugosa/disassembly.py)
- [Regex](./docs/Regex.md)
- String Management - *TODO*
- [Strings](./rugosa/strings.py)
- [YARA](./docs/YARA.md)


Expand Down
87 changes: 45 additions & 42 deletions rugosa/emulation/call_hooks/stdlib/libc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,17 @@
Common standard C library builtin functions.
"""

from __future__ import annotations
import logging
import re
from typing import TYPE_CHECKING

from ... import constants
from ...call_hooks import builtin_func

if TYPE_CHECKING:
from rugosa.emulation.cpu_context import ProcessorContext

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -440,18 +445,22 @@ def strstr(cpu_context, func_name, func_args):
return str1_ptr + offset


def _format_string(ctx, fmt, required_args, string_type=constants.STRING):
def _format_string(ctx: ProcessorContext, fmt: str, required_args: int, wide: bool = False) -> str:
"""
Handles formatting the string with the function arguments based on the format.
:param ctx: cpu_context object
:param fmt: format string
:param required_args: num of required arguments for the particular format function to skip
:param wide: Whether the strings are wide
:return: The formatted string
"""
# TODO: parse string instead of bytes
# Format using best attempt here. Basically, locate all the format specifiers, and convert them to a python
# supported format string. For each format string, extract the appropriate data from the context, and append it to
# the values list.
fmt_val_re = re.compile(br"""
fmt_val_re = re.compile(r"""
% # start with percent character
[-+ #0]{0,1} # optional flag character
(\*|[0-9]{1,}){0,} # optional width specifier, though mutually exclusive (either a number or *, not both)
Expand All @@ -464,68 +473,62 @@ def _format_string(ctx, fmt, required_args, string_type=constants.STRING):
logger.debug("Format vals: %r", fmt_vals)

# Re-pull function arguments with correct number of arguments.
func_sig = ctx.get_function_signature()
for _ in range(len(func_sig.arguments) - required_args):
func_sig.remove_argument(-1)
# For an unknown reason, int is not always being read as a QWORD on 64-bit, so this line
# forces the issue to ensure pointer addresses aren't being truncated to 32 bits
data_type = "qword" if ctx.bitness == 64 else "dword"
for _ in range(len(fmt_vals)):
func_sig.add_argument(data_type)
func_args = [arg.value for arg in func_sig.arguments]
func_args = ctx.get_function_arg_values(num_args=required_args + len(fmt_vals))

format_vals = []
arg_pos = required_args # skip destination and format string
for match in fmt_vals:
if b"*" in match:
value = func_args[arg_pos]

if "*" in match:
# Indicates that one of the parameters is a width, which must be pulled and added to the list first
format_vals.append(func_args[arg_pos])
arg_pos += 1

if match.endswith(b"c"): # character (will this be the value or a read from the context???
arg_val = func_args[arg_pos]
if arg_val <= 0xFF: # assume that the argument contains the character
format_vals.append(arg_val)
if match.endswith("c"): # character (will this be the value or a read from the context???
if value <= 0xFF: # assume that the argument contains the character
value = chr(value)
else: # assume it's a pointer that must be dereferenced
format_vals.append(ctx.memory.read_data(arg_val, size=1))
value = chr(ctx.memory.read_data(value, size=1))

elif match.endswith(b"s"): # string value, should be a pointer
_arg = ctx.memory.read_data(func_args[arg_pos], data_type=string_type)
if not len(_arg): # If the argument isn't set during parsing, preserve the formatting
elif match.endswith("s"): # string value, should be a pointer
value = ctx.memory.read_string(value, wide=wide)
if not value: # If the argument isn't set during parsing, preserve the formatting
logger.debug("Pulled 0 byte format string, reverting")
_arg = b"%s"
format_vals.append(_arg)
value = "%s"

else: # all other numerical types???
format_vals.append(func_args[arg_pos])
# all other numerical types???

format_vals.append(value)
arg_pos += 1

result = fmt % tuple(format_vals)
format_vals = tuple(format_vals)
result = fmt % format_vals
logger.debug(f"Formatted string: {fmt!r} % {format_vals!r} -> {result!r}")
return result


@builtin_func
@builtin_func("sprintf")
@builtin_func("wsprintfW") # TODO: technically from winuser.h
def sprintf(ctx, func_name, func_args):
"""
Format a string based on provided format string and parameters.
int sprintf (char *s, const char *format, ...);
For sprintf, there's no way to know up front how many args are needed, but there should always be at least
2 (destination and format). We can use the format string to determine how many arguments we need by
counting the format specifiers.
"""
# Almost guaranteed to get the incorrect number of args. So obtain the format string and count the number of
# format specifiers to determine how many args we need, not including the first 2
wide = func_name.endswith("W")
if len(func_args) < 2: # Ensure that there are at least 2 arguments, dest and format
# Need to try to get at least 2 arguments...
func_args = ctx.get_function_arg_values(num_args=2)

dest = func_args[0]
fmt = ctx.memory.read_data(func_args[1])
dest, fmt_ptr, *_ = func_args
fmt = ctx.memory.read_string(fmt_ptr, wide=wide)
logger.debug("Format string: %s", fmt)
result = _format_string(ctx, fmt, 2)
result = _format_string(ctx, fmt, 2, wide)
logger.debug("Writing formatted value %s to 0x%X", result, dest)
ctx.memory.write(dest, result + b"\0")
ctx.memory.write_string(dest, result + "\0", wide=wide)
return len(result)


Expand All @@ -539,16 +542,15 @@ def snprintf(ctx, func_name, func_args):
Format a string using the provided format string and values, truncated if necessary to length n.
"""
wide = func_name.startswith("sw")
string_type = constants.WIDE_STRING if wide else constants.STRING
if len(func_args) < 3:
func_args = ctx.get_function_arg_values(num_args=3)

dest, n = func_args[:2]
fmt = ctx.memory.read_data(func_args[2], data_type=string_type)
dest, n, fmt_ptr, *_ = func_args
fmt = ctx.memory.read_string(fmt_ptr, wide=wide)
logger.debug("Format string: %s", fmt)
result = _format_string(ctx, fmt, 3, string_type)
logger.debug("Writing formatted value %s to 0x%X", result[:n - 1], dest)
ctx.memory.write(dest, result[:n - 1] + b"\0")
result = _format_string(ctx, fmt, 3, wide)[:n - 1]
logger.debug("Writing formatted value %s to 0x%X", result, dest)
ctx.memory.write_string(dest, result + "\0", wide=wide)
return len(result)


Expand All @@ -565,9 +567,10 @@ def printf(ctx, func_name, func_args):
if len(func_args) < 1:
func_args = ctx.get_function_arg_values(num_args=1)

fmt = ctx.memory.read_data(func_args[0])
fmt_ptr, *_ = func_args
fmt = ctx.memory.read_string(fmt_ptr)
logger.debug("Format string: %s", fmt)
result = _format_string(ctx, fmt, 1)
logger.debug("Writing formatted value %s to stdout", result)
ctx.stdout += result.decode()
ctx.stdout += result
return len(result)
31 changes: 20 additions & 11 deletions rugosa/emulation/cpu_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,9 @@ def get_original_location(self, addr):
else:
return ip, None

def get_function_signature(self, func_ea=None, num_args=None) -> Optional[FunctionSignature]:
def get_function_signature(
self, func_ea: int = None, num_args: int = None, default_data_type: str = None
) -> Optional[FunctionSignature]:
"""
Returns the function signature of the given func_ea with argument values pulled
from this context.
Expand All @@ -493,13 +495,14 @@ def get_function_signature(self, func_ea=None, num_args=None) -> Optional[Functi
The first operand is used if not provided. (helpful for a "call" instruction)
:param int num_args: Force a specific number of arguments in the signature.
If not provided, number of arguments is determined by the disassembler.
Extra arguments not defined by the disassembler are assumed to be 'int' type.
Extra arguments not defined by the disassembler are assumed to be the default_data_type.
Avoid using num_args and adjust the returned FunctionSignature manually
if more customization is needed.
(NOTE: The function signature will be forced on failure if this is set.)
WARNING: Setting the number of arguments will permanently change the
signature on the backend disassembler.
:param str default_data_type: The default data type to use when forcing extra arguments.
(Defaults to "dword" for 32-bit or "qword" for 64-bit)
:return: FunctionSignature object or None if not applicable
Expand All @@ -521,21 +524,23 @@ def get_function_signature(self, func_ea=None, num_args=None) -> Optional[Functi
if num_args is not None:
if num_args < 0:
raise ValueError("num_args is negative")

arguments = signature.arguments
if len(arguments) > num_args:
# TODO: Instead of removing arugments, can we just not pull them all?
for _ in range(len(arguments) - num_args):
signature.remove_argument(-1)

elif len(arguments) < num_args:
# TODO: Is there a way to just see what the argument location would be
# without having to modify the function signature?
if not default_data_type:
default_data_type = "qword" if self.bitness == 64 else "dword"
for _ in range(num_args - len(arguments)):
signature.add_argument("int")
signature.add_argument(default_data_type)

return signature

def get_function_args(self, func_ea=None, num_args=None) -> List[FunctionArgument]:
def get_function_args(
self, func_ea: int = None, num_args: int = None, default_data_type: str = None
) -> List[FunctionArgument]:
"""
Returns the FunctionArg objects for this context based on the
given function.
Expand All @@ -550,20 +555,24 @@ def get_function_args(self, func_ea=None, num_args=None) -> List[FunctionArgumen
Use get_function_signature() and adjust the FunctionSignature manually
if more customization is needed.
(NOTE: The function signature will be forced on failure if this is set.)
:param str default_data_type: The default data type to use when forcing extra arguments.
(Defaults to "dword" for 32-bit or "qword" for 64-bit)
:returns: list of FunctionArg objects
"""
func_sig = self.get_function_signature(func_ea, num_args=num_args)
func_sig = self.get_function_signature(func_ea, num_args=num_args, default_data_type=default_data_type)
if not func_sig:
return []

return func_sig.arguments

def get_function_arg_values(self, func_ea=None, num_args=None) -> List[int]:
def get_function_arg_values(
self, func_ea: int = None, num_args: int = None, default_data_type: str = None
) -> List[int]:
"""
Returns the FunctionArg values for this context based on the given function.
"""
return [arg.value for arg in self.get_function_args(func_ea=func_ea, num_args=num_args)]
return [arg.value for arg in self.get_function_args(func_ea=func_ea, num_args=num_args, default_data_type=default_data_type)]

@property
def function_args(self) -> List[FunctionArgument]:
Expand Down
50 changes: 48 additions & 2 deletions rugosa/emulation/emulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,28 @@ def clear_cache(self):
self._flowchart_cache.clear()
self._memory_cache.clear()

def enable(self, *names: str):
"""
Enables the use of a specific opcode or function hook.
The hooks enabled are pulled from the default implementation
of opcodes/functions and will overwrite any custom hook currently in place.
:param name: Name(s) of opcode/function hook.
NOTE: All the "rep*" opcodes will be enabled if the name is "rep".
"""
for name in names:
name = name.lower()

if name in self._context_class.OPCODES:
self._opcode_hooks[name] = self._context_class.OPCODES[name]
elif name in call_hooks.BUILTINS:
self._call_hooks[name] = call_hooks.BUILTINS[name]
elif name.startswith("rep"):
self.disabled_rep = False
else:
raise ValueError(f'Opcode/function hook named "{name}" not found.')

def disable(self, name: str):
"""
Disables the use of a specific opcode or function hook.
Expand All @@ -125,6 +147,30 @@ def disable(self, name: str):
elif name.startswith("rep"):
self.disabled_rep = True

def disable_all(self, disable_function_hooks: bool = False):
"""
Disables all opcode hooks for the current emulator instance.
This is meant to be used when only a small number of opcodes need to
be emulated and can greatly help speed up emulation.
This removes all opcode hooks currently in place, only function hooks
will be emulated.
Function hooks can be disabled as well if desired.
Enabling specific hooks can be done with :func:`~emulator.Emulator.enable`.
The simplest way to have all the disabled hooks enabled again is to
create a new emulator instance.
"""
self._opcode_hooks = {}
self._instruction_hooks = collections.defaultdict(list)
self.disabled_rep = True
logger.debug("All opcode/instruction hooks disabled")

if disable_function_hooks:
self._call_hooks = {}
logger.debug("All function hooks disabled")

def new_context(self) -> ProcessorContext:
return self._context_class(self)

Expand Down Expand Up @@ -204,7 +250,7 @@ def emulate_call(self, name_or_start_ea, call_depth: int = 0):
if isinstance(name_or_start_ea, str):
name = name_or_start_ea
# NOTE: Using from_name because we need to be sure there is actual instructions to emulate.
func = func_utils.from_name(self.disassembler, name)
func = self.disassembler.get_function_by_name(name)
func_address = func.start
else:
func_address = name_or_start_ea
Expand Down Expand Up @@ -309,7 +355,7 @@ def execute_function(self, address_or_name: Union[int, str], call_depth: int = 0
"""
if isinstance(address_or_name, str):
name = address_or_name
func = func_utils.from_name(self.disassembler, name, ignore_underscore=True)
func = self.disassembler.get_function_by_name(name)
else:
address = address_or_name
func = self.disassembler.get_function(address)
Expand Down
Loading

0 comments on commit 181131f

Please sign in to comment.