Skip to content

Commit bcbf49e

Browse files
authored
Merge pull request #695 from onekey-sec/skip-extension
Add ability to skip files based on extension.
2 parents e736358 + d234764 commit bcbf49e

File tree

3 files changed

+51
-1
lines changed

3 files changed

+51
-1
lines changed

tests/test_cli.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,34 @@ def test_keep_extracted_chunks(
302302
process_file_mock.call_args.args[0].keep_extracted_chunks
303303
== keep_extracted_chunks
304304
), fail_message
305+
306+
307+
@pytest.mark.parametrize(
308+
"skip_extension, extracted_files_count",
309+
[
310+
pytest.param([], 5, id="skip-extension-empty"),
311+
pytest.param([""], 5, id="skip-zip-extension-empty-suffix"),
312+
pytest.param([".zip"], 1, id="skip-extension-zip"),
313+
pytest.param([".rlib"], 5, id="skip-extension-rlib"),
314+
],
315+
)
316+
def test_skip_extension(
317+
skip_extension: List[str], extracted_files_count: int, tmp_path: Path
318+
):
319+
runner = CliRunner()
320+
in_path = (
321+
Path(__file__).parent
322+
/ "integration"
323+
/ "archive"
324+
/ "zip"
325+
/ "regular"
326+
/ "__input__"
327+
/ "apple.zip"
328+
)
329+
args = []
330+
for suffix in skip_extension:
331+
args += ["--skip-extension", suffix]
332+
params = [*args, "--extract-dir", str(tmp_path), str(in_path)]
333+
result = runner.invoke(unblob.cli.cli, params)
334+
assert extracted_files_count == len(list(tmp_path.rglob("*")))
335+
assert result.exit_code == 0

unblob/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .processing import (
2323
DEFAULT_DEPTH,
2424
DEFAULT_PROCESS_NUM,
25+
DEFAULT_SKIP_EXTENSION,
2526
DEFAULT_SKIP_MAGIC,
2627
ExtractionConfig,
2728
process_file,
@@ -166,6 +167,15 @@ def __init__(
166167
show_default=True,
167168
multiple=True,
168169
)
170+
@click.option(
171+
"--skip-extension",
172+
"skip_extension",
173+
type=click.STRING,
174+
default=DEFAULT_SKIP_EXTENSION,
175+
help="Skip processing files with given extension",
176+
show_default=True,
177+
multiple=True,
178+
)
169179
@click.option(
170180
"-p",
171181
"--process-num",
@@ -229,6 +239,7 @@ def cli(
229239
depth: int,
230240
entropy_depth: int,
231241
skip_magic: Iterable[str],
242+
skip_extension: Iterable[str],
232243
skip_extraction: bool, # noqa: FBT001
233244
keep_extracted_chunks: bool, # noqa: FBT001
234245
handlers: Handlers,
@@ -254,6 +265,7 @@ def cli(
254265
entropy_plot=bool(verbose >= 3),
255266
skip_extraction=skip_extraction,
256267
skip_magic=skip_magic,
268+
skip_extension=skip_extension,
257269
process_num=process_num,
258270
handlers=handlers,
259271
dir_handlers=dir_handlers,

unblob/processing.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
"Windows Embedded CE binary image",
7878
"Intel serial flash for PCH ROM",
7979
)
80+
DEFAULT_SKIP_EXTENSION = (".rlib",)
8081

8182

8283
@attr.define(kw_only=True)
@@ -87,6 +88,7 @@ class ExtractionConfig:
8788
entropy_plot: bool = False
8889
max_depth: int = DEFAULT_DEPTH
8990
skip_magic: Iterable[str] = DEFAULT_SKIP_MAGIC
91+
skip_extension: Iterable[str] = DEFAULT_SKIP_EXTENSION
9092
skip_extraction: bool = False
9193
process_num: int = DEFAULT_PROCESS_NUM
9294
keep_extracted_chunks: bool = False
@@ -292,9 +294,14 @@ def _process_task(self, result: TaskResult, task: Task):
292294
should_skip_file = any(
293295
magic.startswith(pattern) for pattern in self._config.skip_magic
294296
)
297+
should_skip_file |= task.path.suffix in self._config.skip_extension
295298

296299
if should_skip_file:
297-
log.debug("Ignoring file based on magic", magic=magic)
300+
log.debug(
301+
"Ignoring file based on magic or extension.",
302+
magic=magic,
303+
extension=task.path.suffix,
304+
)
298305
return
299306

300307
_FileTask(self._config, task, stat_report.size, result).process()

0 commit comments

Comments
 (0)