Skip to content

Commit

Permalink
Merge pull request #35 from CybercentreCanada/upstream_fixes
Browse files Browse the repository at this point in the history
Leverage multiprocessing to handle importing of extractors and their …
  • Loading branch information
cccs-rs authored Nov 27, 2024
2 parents 43c86e9 + 0e34751 commit 27f29d2
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 75 deletions.
64 changes: 54 additions & 10 deletions configextractor/frameworks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,81 @@


class Extractor:
def __init__(self, id, framework, module, module_path, root_directory, yara_rule, venv=None) -> None:
def __init__(self, id, author, description, sharing, framework, module_path, yara_rule, venv=None) -> None:
self.id = id
self.author = author
self.description = description
self.framework = framework
self.module = module
self.module_path = module_path
self.root_directory = root_directory
self.rule = yara_rule
self.sharing = sharing
self.venv = venv


class Framework:
def __init__(self, logger: Logger, yara_attr_name=None):
def __init__(
self,
logger: Logger,
author_attr_name=None,
description_attr_name=None,
sharing_attr_name=None,
yara_attr_name=None,
):
self.log = logger
self.author_attr_name = author_attr_name
self.description_attr_name = description_attr_name
self.sharing_attr_name = sharing_attr_name
self.yara_attr_name = yara_attr_name
self.venv_script = ""
self.yara_rule = ""

@staticmethod
# Get classification of module
def get_classification(extractor: Extractor) -> str:
return None
return extractor.sharing

@staticmethod
# Get name of module
def get_name(extractor: Extractor):
return extractor.module.__name__.split(".")[-1]
return extractor.id.split(".")[-1]

# Define a template for results from this Extractor
def result_template(self, extractor: Extractor, yara_matches: List[yara.Match]) -> Dict[str, str]:
return dict(id=extractor.id, yara_hits=[y.rule for y in yara_matches])
return dict(
author=extractor.author,
description=extractor.description,
id=extractor.id,
yara_hits=[y.rule for y in yara_matches],
)

def extract_metadata_from_module(self, decoder: object) -> Dict[str, str]:
return {
"author": self.extract_author(decoder),
"description": self.extract_description(decoder),
"sharing": self.extract_sharing(decoder),
"yara_rule": self.extract_yara(decoder),
}

# Extract author from module
def extract_author(self, decoder: object) -> str:
if self.author_attr_name and hasattr(decoder, self.author_attr_name):
# Author information found
return getattr(decoder, self.author_attr_name)

# Extract description from module
def extract_description(self, decoder: object) -> str:
if self.description_attr_name and hasattr(decoder, self.description_attr_name):
# Extractor description found
return getattr(decoder, self.description_attr_name)

# Extract sharing from module
def extract_sharing(self, decoder: object) -> str:
if self.sharing_attr_name and hasattr(decoder, self.sharing_attr_name):
# Sharing information found
return getattr(decoder, self.sharing_attr_name)

# Extract YARA rules from module
def extract_yara_from_module(self, decoder: object) -> str:
def extract_yara(self, decoder: object) -> str:
if self.yara_attr_name and hasattr(decoder, self.yara_attr_name):
# YARA rule found
return getattr(decoder, self.yara_attr_name)
Expand All @@ -52,9 +94,11 @@ def run(self, sample_path: str, parsers: Dict[Extractor, List[yara.Match]]) -> L

def run_in_venv(self, sample_path: str, extractor: Extractor) -> Dict[str, dict]:
# Run in extractor with sample in virtual enviroment using the MACO utility
return utils.run_in_venv(
module_name, extractor_class = extractor.id.rsplit(".", 1)
return utils.run_extractor(
sample_path,
extractor.module,
module_name,
extractor_class,
extractor.module_path,
extractor.venv,
self.venv_script,
Expand Down
28 changes: 4 additions & 24 deletions configextractor/frameworks/maco.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,29 @@
from typing import Any, Dict, List, Union

from maco.model import ExtractorModel
from maco.utils import VENV_SCRIPT as MACO_VENV_SCRIPT, maco_extractor_validation, Base64Decoder, MACO_YARA_RULE
from maco.utils import MACO_YARA_RULE, Base64Decoder, maco_extractor_validation
from maco.utils import VENV_SCRIPT as MACO_VENV_SCRIPT

from configextractor.frameworks.base import Extractor, Framework


class MACO(Framework):
def __init__(self, logger: Logger):
super().__init__(logger, "yara_rule")
super().__init__(logger, "author", "__doc__", "sharing", "yara_rule")
self.venv_script = MACO_VENV_SCRIPT
self.yara_rule = MACO_YARA_RULE

@staticmethod
def get_classification(extractor: Extractor):
if hasattr(extractor.module, "sharing"):
return extractor.module.sharing

def validate(self, module: Any) -> bool:
return maco_extractor_validation(module)

def result_template(self, extractor: Extractor, yara_matches: List) -> Dict[str, str]:
template = super().result_template(extractor, yara_matches)
template.update(
{
"author": extractor.module.author,
"description": extractor.module.__doc__,
}
)
return template

def run(self, sample_path: str, parsers: Dict[Extractor, List[str]]) -> List[dict]:
results = list()
for extractor, yara_matches in parsers.items():
try:
result = self.result_template(extractor, yara_matches)

# Run MaCo parser with YARA matches
r: ExtractorModel = None
if extractor.venv:
# Run in special mode using the virtual environment detected
r = self.run_in_venv(sample_path, extractor)
else:
with open(sample_path, "rb") as f:
r = extractor.module().run(f, matches=yara_matches)
r: ExtractorModel = self.run_in_venv(sample_path, extractor)

if not (r or yara_matches):
# Nothing to report
Expand Down
19 changes: 4 additions & 15 deletions configextractor/frameworks/mwcp.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
# MWCP framework

import inspect
import re as regex
from logging import Logger
from typing import Any, Dict, List
from typing import Any

import mwcp
import re as regex
from maco.model import ConnUsageEnum, Encryption, ExtractorModel
from mwcp import Parser

from configextractor.frameworks.base import Extractor, Framework
from configextractor.frameworks.base import Framework

IP_REGEX_ONLY = r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"

Expand Down Expand Up @@ -200,7 +199,7 @@ def handle_encryption(meta: dict) -> dict:

class MWCP(Framework):
def __init__(self, logger: Logger):
super().__init__(logger, "yara_rule")
super().__init__(logger, "AUTHOR", "DESCRIPTION", None, "yara_rule")
self.venv_script = """
import importlib
import os
Expand All @@ -223,16 +222,6 @@ def validate(self, module: Any) -> bool:
# 'DESCRIPTION' has to be implemented otherwise will raise an exception according to MWCP
return hasattr(module, "DESCRIPTION") and module.DESCRIPTION

def result_template(self, extractor: Extractor, yara_matches: List) -> Dict[str, str]:
template = super().result_template(extractor, yara_matches)
template.update(
{
"author": extractor.module.AUTHOR,
"description": extractor.module.DESCRIPTION,
}
)
return template

def run(self, sample_path, parsers):
results = list()

Expand Down
83 changes: 58 additions & 25 deletions configextractor/main.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,37 @@
# Main module for ConfigExtractor library
import cart
import inspect
import re as regex
import tempfile

from collections import defaultdict
from logging import Logger, getLogger
from maco import utils, yara
from typing import Dict, List
from multiprocessing import Manager, Process
from multiprocessing.managers import ListProxy
from traceback import format_exc
from types import ModuleType
from typing import Callable, Dict, List
from urllib.parse import urlparse

import cart
from maco import utils, yara

from configextractor.frameworks import MACO, MWCP
from configextractor.frameworks.base import Extractor, Framework


def import_extractors(
root_directory: str,
scanner: yara.Rules,
extractor_module_callback: Callable[[ModuleType, str], None],
logger: Logger,
create_venv: bool,
exceptions: ListProxy,
):
try:
utils.import_extractors(root_directory, scanner, extractor_module_callback, logger, create_venv)
except Exception:
exceptions.append(format_exc())


class ConfigExtractor:
def __init__(
self,
Expand All @@ -31,10 +49,11 @@ def __init__(
}

self.parsers: Dict[str, Extractor] = dict()
namespaced_yara_rules: Dict[str, List[str]] = dict()
block_regex = regex.compile("|".join(parser_blocklist)) if parser_blocklist else None
scanner = yara.compile("\n".join([fw_class.yara_rule for fw_class in self.FRAMEWORK_LIBRARY_MAPPING.values()]))
for parsers_dir in parsers_dirs:
with Manager() as manager:
parsers = manager.dict()
exceptions = manager.list()

def extractor_module_callback(module, venv):
# Check to see if we're blocking this potential extractor
Expand All @@ -59,23 +78,37 @@ def extractor_module_callback(module, venv):
if block_regex and block_regex.match(module_id):
return

rules = fw_class.extract_yara_from_module(member)
if rules:
namespaced_yara_rules[module_id] = rules

self.parsers[module_id] = Extractor(
module_id,
fw_name,
member,
module.__file__,
parsers_dir,
rules,
venv,
parsers[module_id] = dict(
id=module_id,
framework=fw_name,
module_path=module.__file__,
venv=venv,
**fw_class.extract_metadata_from_module(member),
)

utils.import_extractors(parsers_dir, scanner, extractor_module_callback, logger, create_venv)

self.yara = yara.compile(sources={ns: rules for ns, rules in namespaced_yara_rules.items()})
# Launch importing extractors as separate processes
processes = []
for parsers_dir in parsers_dirs:
p = Process(
target=import_extractors,
args=(parsers_dir, scanner, extractor_module_callback, logger, create_venv, exceptions),
)
processes.append(p)
p.start()

# Wait for all the processes to terminate
for p in processes:
p.join()

exceptions = list(exceptions)
if exceptions:
raise Exception(f"Exception occurred while importing extractors: {exceptions}")

self.parsers = {id: Extractor(**extractor_kwargs) for id, extractor_kwargs in dict(parsers).items()}

self.yara = yara.compile(
sources={name: extractor.rule for name, extractor in self.parsers.items() if extractor.rule}
)
for fw_name in self.FRAMEWORK_LIBRARY_MAPPING:
self.log.debug(
f"# of YARA-dependent parsers under {fw_name}: "
Expand Down Expand Up @@ -141,16 +174,16 @@ def run_parsers(self, sample, parser_blocklist=[]):
for yara_match in self.yara.match(sample_copy.name):
# Retrieve relevant parser information
extractor = self.parsers[yara_match.namespace]
if block_regex and block_regex.match(extractor.module.__name__):
self.log.info(f"Blocking {extractor.module.__name__} based on passed blocklist regex list")
if block_regex and block_regex.match(extractor.id):
self.log.info(f"Blocking {extractor.id} based on passed blocklist regex list")
continue
# Pass in yara.Match objects since some framework can leverage it
parsers_to_run[extractor.framework][extractor].append(yara_match)

# Add standalone parsers that should run on any file
for parser in [p for p in self.parsers.values() if not p.rule]:
if block_regex and block_regex.match(parser.module.__name__):
self.log.info(f"Blocking {parser.module.__name__} based on passed blocklist regex list")
if block_regex and block_regex.match(parser.id):
self.log.info(f"Blocking {parser.id} based on passed blocklist regex list")
continue
parsers_to_run[parser.framework][parser].extend([])

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cart
click
maco >= 1.2.0
maco >= 1.2.1
mwcfg
mwcp
setuptools

0 comments on commit 27f29d2

Please sign in to comment.