diff --git a/CHANGELOG.md b/CHANGELOG.md index 13f3a4db..d2fb1a8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120 - Suuport for Python 3.12, drop Python 3.7 #118 - Replace "iso-369" iso639-lang by "iso639-lang" library +- Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2) +- When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value) ## [3.2.0] - 2023-12-16 diff --git a/contrib/README.md b/contrib/README.md new file mode 100644 index 00000000..6b30c80e --- /dev/null +++ b/contrib/README.md @@ -0,0 +1,3 @@ + +This folder contains some tooling around zimscraperlib: +- `encode_video.py`: a small utility to encode a video with an existing video preset, just like a scraper would do diff --git a/contrib/encode_video.py b/contrib/encode_video.py new file mode 100644 index 00000000..8b256e68 --- /dev/null +++ b/contrib/encode_video.py @@ -0,0 +1,42 @@ +import sys +from pathlib import Path +from typing import List + +from zimscraperlib import logger +from zimscraperlib.video import presets, reencode + + +def encode_video(src_path: Path, dst_path: Path, preset: str): + if not src_path.exists(): + raise ValueError(f"{src_path} does not exists") + try: + preset_cls = getattr(presets, preset) + except AttributeError: + logger.error(f"{preset} preset not found") + raise + logger.info(f"Encoding video {src_path} with {preset} version {preset_cls.VERSION}") + success, process = reencode( + src_path=src_path, + dst_path=dst_path, + ffmpeg_args=preset_cls().to_ffmpeg_args(), + with_process=True, + ) # pyright: ignore[reportGeneralTypeIssues] (returned type is variable, depending on `with_process` value) + if not success: + logger.error(f"conversion failed:\n{process.stdout}") + + +def run(args: List[str] = sys.argv): + if len(args) < 4: # noqa: PLR2004 + print(f"Usage: {args[0]} ") # noqa: T201 + print( # noqa: T201 + "\t\tpath to the video to encode." + "\t\tpath to the store the reencoded video." + "\t\tname of preset to use." + ) + return 1 + encode_video(Path(args[1]), Path(args[2]), args[3]) + return 0 + + +if __name__ == "__main__": + sys.exit(run()) diff --git a/pyproject.toml b/pyproject.toml index 1184b0b7..24d64bf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,7 +113,7 @@ target-version = ['py38'] [tool.ruff] target-version = "py38" line-length = 88 -src = ["src"] +src = ["src", "contrib"] [tool.ruff.lint] select = [ @@ -235,7 +235,7 @@ exclude_lines = [ ] [tool.pyright] -include = ["src", "tests", "tasks.py"] +include = ["contrib", "src", "tests", "tasks.py"] exclude = [".env/**", ".venv/**"] extraPaths = ["src"] pythonVersion = "3.8" diff --git a/src/zimscraperlib/video/encoding.py b/src/zimscraperlib/video/encoding.py index 20509e7a..f66618af 100644 --- a/src/zimscraperlib/video/encoding.py +++ b/src/zimscraperlib/video/encoding.py @@ -6,11 +6,35 @@ import shutil import subprocess import tempfile +from typing import List, Optional from zimscraperlib import logger from zimscraperlib.logging import nicer_args_join +def _build_ffmpeg_args( + src_path: pathlib.Path, + tmp_path: pathlib.Path, + ffmpeg_args: List[str], + threads: Optional[int], +) -> List[str]: + if threads: + if "-threads" in ffmpeg_args: + raise AttributeError("Cannot set the number of threads, already set") + else: + ffmpeg_args += ["-threads", str(threads)] + args = [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + f"file:{src_path}", + *ffmpeg_args, + f"file:{tmp_path}", + ] + return args + + def reencode( src_path, dst_path, @@ -18,6 +42,7 @@ def reencode( delete_src=False, # noqa: FBT002 with_process=False, # noqa: FBT002 failsafe=True, # noqa: FBT002 + threads: Optional[int] = 1, ): """Runs ffmpeg with given ffmpeg_args @@ -25,6 +50,7 @@ def reencode( src_path - Path to source file dst_path - Path to destination file ffmpeg_args - A list of ffmpeg arguments + threads - Number of encoding threads used by ffmpeg delete_src - Delete source file after convertion with_process - Optionally return the output from ffmpeg (stderr and stdout) failsafe - Run in failsafe mode @@ -32,15 +58,12 @@ def reencode( with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = pathlib.Path(tmp_dir).joinpath(f"video.tmp{dst_path.suffix}") - args = [ - "/usr/bin/env", - "ffmpeg", - "-y", - "-i", - f"file:{src_path}", - *ffmpeg_args, - f"file:{tmp_path}", - ] + args = _build_ffmpeg_args( + src_path=src_path, + tmp_path=tmp_path, + ffmpeg_args=ffmpeg_args, + threads=threads, + ) logger.debug( f"Encode {src_path} -> {dst_path} video format = {dst_path.suffix}" ) diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index da55da2d..37b7bbba 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -32,11 +32,10 @@ class VideoWebmLow(Config): """Low Quality webm video 480:h format with height adjusted to keep aspect ratio - 300k video bitrate - 48k audio bitrate - highly degraded quality (30, 42)""" + 128k target video bitrate but stay within quality boundaries. + 48k audio bitrate""" - VERSION = 1 + VERSION = 2 ext = "webm" mimetype = f"{preset_type}/webm" @@ -44,11 +43,9 @@ class VideoWebmLow(Config): options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "libvpx", # video codec "-quality": "best", # codec preset - "-b:v": "300k", # target video bitrate - "-maxrate": "300k", # max video bitrate - "-minrate": "300k", # min video bitrate - "-qmin": "30", # min quantizer scale - "-qmax": "42", # max quantizer scale + "-b:v": "128k", # Adjust quantizer within min/max to target this bitrate + "-qmin": "18", # Reduce the bitrate on very still videos + "-qmax": "40", # Increase the bitrate on very busy videos "-vf": "scale='480:trunc(ow/a/2)*2'", # frame size "-codec:a": "libvorbis", # audio codec "-ar": "44100", # audio sampling rate diff --git a/tests/video/test_encoding.py b/tests/video/test_encoding.py new file mode 100644 index 00000000..292f660a --- /dev/null +++ b/tests/video/test_encoding.py @@ -0,0 +1,94 @@ +import re +from pathlib import Path +from typing import List, Optional + +import pytest + +from zimscraperlib.video.encoding import _build_ffmpeg_args + + +@pytest.mark.parametrize( + "src_path,tmp_path,ffmpeg_args,threads,expected", + [ + ( + Path("path1/file1.mp4"), + Path("path1/fileout.mp4"), + [ + "-codec:v", + "libx265", + ], + None, + [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + "file:path1/file1.mp4", + "-codec:v", + "libx265", + "file:path1/fileout.mp4", + ], + ), + ( + Path("path2/file2.mp4"), + Path("path12/tmpfile.mp4"), + [ + "-b:v", + "300k", + ], + 1, + [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + "file:path2/file2.mp4", + "-b:v", + "300k", + "-threads", + "1", + "file:path12/tmpfile.mp4", + ], + ), + ( + Path("path2/file2.mp4"), + Path("path12/tmpfile.mp4"), + [ + "-b:v", + "300k", + "-threads", + "1", + ], + 1, + None, + ), + ], +) +def test_build_ffmpeg_args( + src_path: Path, + tmp_path: Path, + ffmpeg_args: List[str], + threads: Optional[int], + expected: Optional[List[str]], +): + if expected: + assert ( + _build_ffmpeg_args( + src_path=src_path, + tmp_path=tmp_path, + ffmpeg_args=ffmpeg_args, + threads=threads, + ) + == expected + ) + else: + with pytest.raises( + AttributeError, + match=re.escape("Cannot set the number of threads, already set"), + ): + _build_ffmpeg_args( + src_path=src_path, + tmp_path=tmp_path, + ffmpeg_args=ffmpeg_args, + threads=threads, + ) diff --git a/tests/video/test_video.py b/tests/video/test_video.py index 31035273..5b151f2e 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -141,20 +141,18 @@ def test_preset_has_mime_and_ext(): def test_preset_video_webm_low(): config = VideoWebmLow() - assert config.VERSION == 1 + assert config.VERSION == 2 args = config.to_ffmpeg_args() - assert len(args) == 24 + assert len(args) == 20 options_map = [ ("codec:v", "libvpx"), ("codec:a", "libvorbis"), - ("maxrate", "300k"), - ("minrate", "300k"), - ("b:v", "300k"), + ("b:v", "128k"), ("ar", "44100"), ("b:a", "48k"), ("quality", "best"), - ("qmin", "30"), - ("qmax", "42"), + ("qmin", "18"), + ("qmax", "40"), ("vf", "scale='480:trunc(ow/a/2)*2'"), ] for option, val in options_map: