diff --git a/.github/workflows/Publish.yaml b/.github/workflows/Publish.yaml new file mode 100644 index 00000000..b7e1859b --- /dev/null +++ b/.github/workflows/Publish.yaml @@ -0,0 +1,28 @@ +name: Build and upload to PyPI + +on: + release: + types: [published] + +jobs: + publish: + runs-on: ubuntu-22.04 + permissions: + id-token: write # mandatory for PyPI trusted publishing + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Build packages + run: | + pip install -U pip build + python -m build --sdist --wheel + + - name: Upload to PyPI + uses: pypa/gh-action-pypi-publish@release/v1.8 diff --git a/.github/workflows/QA.yaml b/.github/workflows/QA.yaml new file mode 100644 index 00000000..48ccee5a --- /dev/null +++ b/.github/workflows/QA.yaml @@ -0,0 +1,34 @@ +name: QA + +on: + pull_request: + push: + branches: + - main + +jobs: + check-qa: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Install dependencies (and project) + run: | + pip install -U pip + pip install -e .[lint,scripts,test,check] + + - name: Check black formatting + run: inv lint-black + + - name: Check ruff + run: inv lint-ruff + + - name: Check pyright + run: inv check-pyright diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml new file mode 100644 index 00000000..4b5d00c6 --- /dev/null +++ b/.github/workflows/Tests.yaml @@ -0,0 +1,60 @@ +name: Tests + +on: + pull_request: + push: + branches: + - main + +jobs: + run-tests: + strategy: + matrix: + os: [ubuntu-22.04] + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + + - name: install ffmpeg and gifsicle + run: sudo apt update && sudo apt install ffmpeg gifsicle + + - name: add required locales for tests + run: sudo locale-gen fr_FR.UTF-8 pt_BR.UTF-8 && sudo update-locale + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + architecture: x64 + + - name: Install dependencies (and project) + run: | + pip install -U pip + pip install -e .[test,scripts] + + - name: Run the tests + run: inv coverage --args "--runslow -vvv" + + - name: Upload coverage report to codecov + if: matrix.python == '3.12' + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + + build_python: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version-file: pyproject.toml + architecture: x64 + + - name: Ensure we can build Python targets + run: | + pip install -U pip build + python3 -m build --sdist --wheel diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 953a6867..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: CI - -on: [push, pull_request] - -jobs: - unit-tests: - runs-on: ubuntu-22.04 - strategy: - max-parallel: 3 - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - steps: - - name: install ffmpeg and gifsicle - run: sudo apt update && sudo apt install ffmpeg gifsicle - - name: add required locales for tests - run: sudo locale-gen fr_FR.UTF-8 pt_BR.UTF-8 && sudo update-locale - - uses: actions/checkout@v1 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: pyvar - run: echo "pyversion=${{ matrix.python-version }}" >> $GITHUB_ENV - - name: set image name - run: echo "toxpyv=py${pyversion//\./}" >> $GITHUB_ENV - - name: verify envname - run : echo $toxpyv - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install tox - - name: Test with tox - run: tox -e $toxpyv - - qa: - runs-on: ubuntu-22.04 - steps: - - name: install ffmpeg and gifsicle - run: sudo apt update && sudo apt install ffmpeg gifsicle - - name: add required locales for tests - run: sudo locale-gen fr_FR.UTF-8 pt_BR.UTF-8 && sudo update-locale - - uses: actions/checkout@v1 - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: "3.8" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -U tox - - name: Run tox - run: tox -e py38,black,isort,flake8,coverage - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index d72c5d8d..00000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: release -on: - release: - types: [published] - tags: - - v* - -env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - -jobs: - release: - environment: release - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: "3.11" - architecture: x64 - - - name: Build sdist and wheel - run: | - pip install --upgrade pip build wheel - python3 -m build - - - name: Push release to PyPI - if: github.event_name == 'release' - run: | - pip install --upgrade twine - twine check dist/* - twine upload dist/* diff --git a/.gitignore b/.gitignore index 9e773ced..288bff6b 100644 --- a/.gitignore +++ b/.gitignore @@ -248,3 +248,7 @@ $RECYCLE.BIN/ *.lnk # End of https://www.toptal.com/developers/gitignore/api/python,macos,windows,linux + +# ignore all vscode, this is not standard configuration in this place +.vscode +src/libzim-stubs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bef63f03..45c2335e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,27 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer -- repo: https://github.com/pycqa/isort - rev: "5.12.0" - hooks: - - id: isort - args: ["--profile", "black", "--filter-files"] -- repo: https://github.com/psf/black - rev: "23.1.0" - hooks: - - id: black -- repo: https://github.com/pycqa/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - args: ["--max-line-length", "88", "--extend-ignore=E203"] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer +- repo: https://github.com/psf/black + rev: "24.1.1" + hooks: + - id: black +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.1 + hooks: + - id: ruff +- repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.350 + hooks: + - id: pyright + name: pyright (system) + description: 'pyright static type checker' + entry: pyright + language: system + 'types_or': [python, pyi] + require_serial: true + minimum_pre_commit_version: '2.9.2' diff --git a/CHANGELOG.md b/CHANGELOG.md index a9bfade3..13f3a4db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0). +## [Unreleased] + +### Changed + +- Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120 +- Suuport for Python 3.12, drop Python 3.7 #118 +- Replace "iso-369" iso639-lang by "iso639-lang" library + ## [3.2.0] - 2023-12-16 ### Added diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 68276b73..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -graft src -include *.md -include requirements.txt -include LICENSE diff --git a/README.md b/README.md index ab3478a9..1d8870e7 100644 --- a/README.md +++ b/README.md @@ -47,27 +47,29 @@ sudo apt install libmagic1 wget ffmpeg \ # Contribution +This project adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing) + ```shell -pip -r requirements.txt -pip install tox pre-commit +pip install hatch +pip install ".[dev]" pre-commit install # For tests -tox +invoke coverage ``` # Users Non-exhaustive list of scrapers using it (check status when updating API): -* [openzim/youtube](https://github.com/openzim/youtube) +* [openzim/freecodecamp](https://github.com/openzim/freecodecamp) +* [openzim/gutenberg](https://github.com/openzim/gutenberg) +* [openzim/ifixit](https://github.com/openzim/ifixit) +* [openzim/kolibri](https://github.com/openzim/kolibri) * [openzim/nautilus](https://github.com/openzim/nautilus) - -# releasing - -* Update your dependencies: `pip install -U setuptools wheel twine` -* Make sure CHANGELOG.md is up-to-date -* Bump version on `src/zimscraperlib/VERSION` -* Build packages `python ./setup.py sdist bdist_wheel` -* Upload to PyPI `twine upload dist/zimscraperlib-2.0.0*`. -* Commit your Changelog + version bump changes -* Tag version on git `git tag -a v2.0.0` +* [openzim/nautilus](https://github.com/openzim/nautilus) +* [openzim/openedx](https://github.com/openzim/openedx) +* [openzim/sotoki](https://github.com/openzim/sotoki) +* [openzim/ted](https://github.com/openzim/ted) +* [openzim/warc2zim](https://github.com/openzim/warc2zim) +* [openzim/wikihow](https://github.com/openzim/wikihow) +* [openzim/youtube](https://github.com/openzim/youtube) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..779791ea --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,242 @@ +[build-system] +requires = ["hatchling", "hatch-openzim"] +build-backend = "hatchling.build" + +[project] +name = "zimscraperlib" +requires-python = ">=3.8,<3.13" +description = "Collection of python tools to re-use common code across scrapers" +readme = "README.md" +dependencies = [ + "iso639-lang>=2.2.3,<3.0", + "requests>=2.25.1,<3.0", + "colorthief==0.2.1", + "python-resize-image>=1.1.19,<1.2", + "Babel>=2.9,<3.0", + "file-magic>=0.4.0,<0.5", + "libzim>=3.4.0,<4.0", + "beautifulsoup4>=4.9.3,<4.10", # upgrade to 4.10 and later to be done + "lxml>=4.6.3,<4.10", # upgrade to 4.10 and later to be done + "optimize-images>=1.3.6,<1.6", + # youtube-dl should be updated as frequently as possible + "yt-dlp" +] +dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] + +[tool.hatch.metadata.hooks.openzim-metadata] +kind = "scraper" +# not yet supported in hatch-openzim 0.1 +# additional-classifiers = [ +# "Development Status :: 5 - Production/Stable", +# "Intended Audience :: Developers", +# ] + +[project.optional-dependencies] +scripts = [ + "invoke==2.2.0", +] +lint = [ + "black==24.1.1", + "ruff==0.2.1", +] +check = [ + "pyright==1.1.350", +] +test = [ + "pytest==8.0.0", + "pytest-mock==3.12.0", + "coverage==7.4.1", +] +dev = [ + "pre-commit==3.5.0", + "debugpy==1.8.0", + "zimscraperlib[scripts]", + "zimscraperlib[lint]", + "zimscraperlib[test]", + "zimscraperlib[check]", +] + +[project.scripts] +fix_ogvjs_dist = "zimscraperlib.fix_ogvjs_dist:run" + +[tool.hatch.version] +path = "src/zimscraperlib/__about__.py" + +[tool.hatch.build] +exclude = [ + "/.github", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/zimscraperlib"] + +[tool.hatch.envs.default] +features = ["dev"] + +[tool.hatch.envs.test] +features = ["scripts", "test"] + +[[tool.hatch.envs.test.matrix]] +python = ["3.8", "3.9", "3.10", "3.11"] + +[tool.hatch.envs.test.scripts] +run = "inv test --args '{args}'" +run-cov = "inv test-cov --args '{args}'" +report-cov = "inv report-cov" +coverage = "inv coverage --args '{args}'" +html = "inv coverage --html --args '{args}'" + +[tool.hatch.envs.lint] +template = "lint" +skip-install = false +features = ["scripts", "lint"] + +[tool.hatch.envs.lint.scripts] +black = "inv lint-black --args '{args}'" +ruff = "inv lint-ruff --args '{args}'" +all = "inv lintall --args '{args}'" +fix-black = "inv fix-black --args '{args}'" +fix-ruff = "inv fix-ruff --args '{args}'" +fixall = "inv fixall --args '{args}'" + +[tool.hatch.envs.check] +features = ["scripts", "check"] + +[tool.hatch.envs.check.scripts] +pyright = "inv check-pyright --args '{args}'" +all = "inv checkall --args '{args}'" + +[tool.black] +line-length = 88 +target-version = ['py38'] + +[tool.ruff] +target-version = "py38" +line-length = 88 +src = ["src"] + +[tool.ruff.lint] +select = [ + "A", # flake8-builtins + # "ANN", # flake8-annotations + "ARG", # flake8-unused-arguments + # "ASYNC", # flake8-async + "B", # flake8-bugbear + # "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "C90", # mccabe + # "COM", # flake8-commas + # "D", # pydocstyle + # "DJ", # flake8-django + "DTZ", # flake8-datetimez + "E", # pycodestyle (default) + "EM", # flake8-errmsg + # "ERA", # eradicate + # "EXE", # flake8-executable + "F", # Pyflakes (default) + # "FA", # flake8-future-annotations + "FBT", # flake8-boolean-trap + # "FLY", # flynt + # "G", # flake8-logging-format + "I", # isort + "ICN", # flake8-import-conventions + # "INP", # flake8-no-pep420 + # "INT", # flake8-gettext + "ISC", # flake8-implicit-str-concat + "N", # pep8-naming + # "NPY", # NumPy-specific rules + # "PD", # pandas-vet + # "PGH", # pygrep-hooks + # "PIE", # flake8-pie + # "PL", # Pylint + "PLC", # Pylint: Convention + "PLE", # Pylint: Error + "PLR", # Pylint: Refactor + "PLW", # Pylint: Warning + # "PT", # flake8-pytest-style + # "PTH", # flake8-use-pathlib + # "PYI", # flake8-pyi + "Q", # flake8-quotes + # "RET", # flake8-return + # "RSE", # flake8-raise + "RUF", # Ruff-specific rules + "S", # flake8-bandit + # "SIM", # flake8-simplify + # "SLF", # flake8-self + "T10", # flake8-debugger + "T20", # flake8-print + # "TCH", # flake8-type-checking + # "TD", # flake8-todos + "TID", # flake8-tidy-imports + # "TRY", # tryceratops + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Remove flake8-errmsg since we consider they bloat the code and provide limited value + "EM", + # Allow boolean positional values in function calls, like `dict.get(... True)` + "FBT003", + # Ignore checks for possible passwords + "S105", "S106", "S107", + # Ignore warnings on subprocess.run / popen + "S603", + # Ignore complexity + "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + # To be fixed once more recent Python versions are available + "UP006", "UP007" +] +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.lint.isort] +known-first-party = ["zimscraperlib"] + +[tool.ruff.lint.flake8-bugbear] +# add exceptions to B008 for fastapi. +extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] +# _libkiwix mimics libkiwix C++ code, names obey C++ conventions +"src/zimscraperlib/zim/_libkiwix.py" = ["N802", "N803", "N806"] + +[tool.pytest.ini_options] +minversion = "7.3" +testpaths = ["tests"] +pythonpath = [".", "src"] + +[tool.coverage.paths] +zimscraperlib = ["src/zimscraperlib"] +tests = ["tests"] + +[tool.coverage.run] +source_pkgs = ["zimscraperlib"] +branch = true +parallel = true +omit = [ + "src/zimscraperlib/__about__.py", +] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[tool.pyright] +include = ["src", "tests", "tasks.py"] +exclude = [".env/**", ".venv/**"] +extraPaths = ["src"] +pythonVersion = "3.8" +typeCheckingMode="basic" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c7fc799c..00000000 --- a/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -iso-639==0.4.5 -requests>=2.25.1,<3.0 -colorthief==0.2.1 -python-resize-image>=1.1.19,<1.2 -Babel>=2.9,<3.0 -file-magic>=0.4.0,<0.5 -libzim>=3.4.0,<3.5 -beautifulsoup4>=4.9.3,<4.10 -lxml>=4.6.3,<4.10 -optimize-images>=1.3.6,<1.6 -# youtube-dl should be updated as frequently as possible -yt-dlp diff --git a/setup.py b/setup.py deleted file mode 100644 index 9a0d0a65..00000000 --- a/setup.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# vim: ai ts=4 sts=4 et sw=4 nu - -import pathlib - -from setuptools import find_packages, setup - -root_dir = pathlib.Path(__file__).parent - - -def read(*names, **kwargs): - with open(root_dir.joinpath(*names), "r") as fh: - return fh.read() - - -setup( - name="zimscraperlib", - version=read("src", "zimscraperlib", "VERSION").strip(), - description="Collection of python tools to re-use common code across scrapers", - long_description=read("README.md"), - long_description_content_type="text/markdown", - author="kiwix", - author_email="reg@kiwix.org", - url="https://github.com/openzim/python_scraperlib", - keywords="kiwix zim offline", - license="GPLv3+", - packages=find_packages("src"), - package_dir={"": "src"}, - install_requires=[ - line.strip() - for line in read("requirements.txt").splitlines() - if not line.strip().startswith("#") - ], - setup_requires=["pytest-runner"], - zip_safe=False, - include_package_data=True, - entry_points={ - "console_scripts": ["fix_ogvjs_dist=zimscraperlib.fix_ogvjs_dist:run"] - }, - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Programming Language :: Python", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - ], - python_requires=">=3.7", -) diff --git a/src/zimscraperlib/VERSION b/src/zimscraperlib/VERSION deleted file mode 100644 index 944880fa..00000000 --- a/src/zimscraperlib/VERSION +++ /dev/null @@ -1 +0,0 @@ -3.2.0 diff --git a/src/zimscraperlib/__about__.py b/src/zimscraperlib/__about__.py new file mode 100644 index 00000000..c178c960 --- /dev/null +++ b/src/zimscraperlib/__about__.py @@ -0,0 +1 @@ +__version__ = "3.2.1-dev0" diff --git a/src/zimscraperlib/__init__.py b/src/zimscraperlib/__init__.py index d568bb0b..1ff4f461 100644 --- a/src/zimscraperlib/__init__.py +++ b/src/zimscraperlib/__init__.py @@ -1,12 +1,11 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import logging as stdlogging import os -from .constants import NAME -from .logging import getLogger +from zimscraperlib.constants import NAME +from zimscraperlib.logging import getLogger debug = os.getenv("ZIMSCRAPERLIB_DEBUG") logger = getLogger(NAME, level=stdlogging.DEBUG if debug else stdlogging.INFO) diff --git a/src/zimscraperlib/constants.py b/src/zimscraperlib/constants.py index a42c16af..ca2ec163 100644 --- a/src/zimscraperlib/constants.py +++ b/src/zimscraperlib/constants.py @@ -1,16 +1,15 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import base64 import pathlib import re +from zimscraperlib.__about__ import __version__ + ROOT_DIR = pathlib.Path(__file__).parent NAME = pathlib.Path(__file__).parent.name -with open(ROOT_DIR.joinpath("VERSION"), "r") as fh: - VERSION = fh.read().strip() -SCRAPER = f"{NAME} {VERSION}" +SCRAPER = f"{NAME} {__version__}" UTF8 = "UTF-8" diff --git a/src/zimscraperlib/download.py b/src/zimscraperlib/download.py index 913f0990..2b8268f6 100644 --- a/src/zimscraperlib/download.py +++ b/src/zimscraperlib/download.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu from __future__ import annotations @@ -8,12 +7,12 @@ import pathlib import subprocess from concurrent.futures import Future, ThreadPoolExecutor -from typing import Dict, Optional, Union +from typing import ClassVar, Dict, Optional, Union import requests import yt_dlp as youtube_dl -from . import logger +from zimscraperlib import logger class YoutubeDownloader: @@ -43,7 +42,7 @@ def download( self, url: str, options: Optional[Dict], - wait: Optional[bool] = True, + wait: Optional[bool] = True, # noqa: FBT002 ) -> Union[bool, Future]: """Downloads video using initialized executor. @@ -53,19 +52,21 @@ def download( Returns download result of future (wait=False)""" - future = self.executor.submit(self._run_youtube_dl, url, options) + future = self.executor.submit( + self._run_youtube_dl, url, options # pyright: ignore + ) if not wait: return future if not future.exception(): # return the result - return future.result() + return future.result() # pyright: ignore # raise the exception - raise future.exception() + raise future.exception() # pyright: ignore class YoutubeConfig(dict): - options = {} - defaults = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = {} + defaults: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "writethumbnail": True, "write_all_thumbnails": True, "writesubtitles": True, @@ -93,6 +94,8 @@ def get_options( ): if "outtmpl" not in options: outtmpl = cls.options.get("outtmpl", cls.defaults["outtmpl"]) + if not isinstance(outtmpl, str): + raise ValueError(f"outtmpl must be a a str, {type(outtmpl)} found") if filepath: outtmpl = str(filepath) # send output to target_dir @@ -106,14 +109,14 @@ def get_options( class BestWebm(YoutubeConfig): - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "preferredcodec": "webm", "format": "best[ext=webm]/bestvideo[ext=webm]+bestaudio[ext=webm]/best", } class BestMp4(YoutubeConfig): - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "preferredcodec": "mp4", "format": "best[ext=mp4]/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best", } @@ -138,8 +141,10 @@ def save_large_file(url: str, fpath: pathlib.Path) -> None: ) -def _get_retry_adapter(max_retries: Optional[int] = 5) -> requests.adapters.BaseAdapter: - retries = requests.packages.urllib3.util.retry.Retry( +def _get_retry_adapter( + max_retries: Optional[int] = 5, +) -> requests.adapters.BaseAdapter: # pyright: ignore + retries = requests.packages.urllib3.util.retry.Retry( # pyright: ignore total=max_retries, # total number of retries connect=max_retries, # connection errors read=max_retries, # read errors @@ -156,7 +161,7 @@ def _get_retry_adapter(max_retries: Optional[int] = 5) -> requests.adapters.Base ], # force retry on the following codes ) - return requests.adapters.HTTPAdapter(max_retries=retries) + return requests.adapters.HTTPAdapter(max_retries=retries) # pyright: ignore def get_session(max_retries: Optional[int] = 5) -> requests.Session: @@ -172,11 +177,11 @@ def stream_file( byte_stream: Optional[io.BytesIO] = None, block_size: Optional[int] = 1024, proxies: Optional[dict] = None, - only_first_block: Optional[bool] = False, + only_first_block: Optional[bool] = False, # noqa: FBT002 max_retries: Optional[int] = 5, headers: Optional[Dict[str, str]] = None, session: Optional[requests.Session] = None, -) -> tuple[int, requests.structures.CaseInsensitiveDict]: +) -> tuple[int, requests.structures.CaseInsensitiveDict]: # pyright: ignore """Stream data from a URL to either a BytesIO object or a file Arguments - fpath - Path of the file where data is sent @@ -212,7 +217,7 @@ def stream_file( for data in resp.iter_content(block_size): total_downloaded += len(data) - fp.write(data) + fp.write(data) # pyright: ignore # stop downloading/reading if we're just testing first block if only_first_block: @@ -221,7 +226,7 @@ def stream_file( logger.debug(f"Downloaded {total_downloaded} bytes from {url}") if fpath: - fp.close() + fp.close() # pyright: ignore else: - fp.seek(0) + fp.seek(0) # pyright: ignore return total_downloaded, resp.headers diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py index fb5b804d..509675eb 100644 --- a/src/zimscraperlib/filesystem.py +++ b/src/zimscraperlib/filesystem.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ Files manipulation tools @@ -41,7 +40,7 @@ def get_content_mimetype(content: bytes) -> str: def delete_callback( fpath: Union[str, pathlib.Path], callback: Optional[Callable] = None, - *callback_args: Any + *callback_args: Any, ): """helper deleting passed filepath, optionnaly calling an additional callback""" diff --git a/src/zimscraperlib/fix_ogvjs_dist.py b/src/zimscraperlib/fix_ogvjs_dist.py index 2dca4625..72cb8972 100755 --- a/src/zimscraperlib/fix_ogvjs_dist.py +++ b/src/zimscraperlib/fix_ogvjs_dist.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu @@ -8,7 +7,7 @@ import logging import pathlib import sys -from typing import Union +from typing import List, Union logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -19,7 +18,7 @@ def fix_source_dir(source_vendors_path: Union[pathlib.Path, str]): root = pathlib.Path(source_vendors_path) logger.info("fixing videosjs-ogvjs.js") plugin_path = root.joinpath("videojs-ogvjs.js") - with open(plugin_path, "r") as fp: + with open(plugin_path) as fp: content = fp.read() content = content.replace( @@ -34,16 +33,17 @@ def fix_source_dir(source_vendors_path: Union[pathlib.Path, str]): logger.info("all done.") -def run(): - if len(sys.argv) < 2: - print(f"Usage: {sys.argv[0]} ") - print( +def run(args: List[str] = sys.argv): + if len(args) < 2: # noqa: PLR2004 + print(f"Usage: {args[0]} ") # noqa: T201 + print( # noqa: T201 "\t\tpath to your folder containing " "ogvjs/videojs/videojs-ogvjs." ) - sys.exit(1) - return sys.exit(fix_source_dir(sys.argv[1])) + return 1 + fix_source_dir(args[1]) + return 0 if __name__ == "__main__": - run() + sys.exit(run()) diff --git a/src/zimscraperlib/html.py b/src/zimscraperlib/html.py index 9823e5d0..c87dcf32 100644 --- a/src/zimscraperlib/html.py +++ b/src/zimscraperlib/html.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ Tools to work with HTML contents """ @@ -9,7 +8,7 @@ from bs4 import BeautifulSoup -from .types import ARTICLE_MIME +from zimscraperlib.types import ARTICLE_MIME def find_title_in(content: Union[str, BinaryIO, TextIO], mime_type: str) -> str: @@ -19,7 +18,7 @@ def find_title_in(content: Union[str, BinaryIO, TextIO], mime_type: str) -> str: if mime_type != ARTICLE_MIME: return "" try: - return BeautifulSoup(content, "lxml").find("title").text + return BeautifulSoup(content, "lxml").find("title").text # pyright: ignore except Exception: return "" @@ -27,7 +26,7 @@ def find_title_in(content: Union[str, BinaryIO, TextIO], mime_type: str) -> str: def find_title_in_file(fpath: pathlib.Path, mime_type: str) -> str: """Extracted title from an HTML file""" try: - with open(fpath, "r") as fh: + with open(fpath) as fh: return find_title_in(fh, mime_type) except Exception: return "" @@ -45,22 +44,22 @@ def find_language_in(content: Union[str, BinaryIO, TextIO], mime_type: str) -> s for key in keylist: node = soup.find(nodename) if node: - if not node.has_attr(key): + if not node.has_attr(key): # pyright: ignore continue if ( nodename == "meta" - and not node.attrs.get("http-equiv", "").lower() + and not node.attrs.get("http-equiv", "").lower() # pyright: ignore == "content-language" ): continue - return node.attrs[key] + return node.attrs[key] # pyright: ignore return "" def find_language_in_file(fpath: pathlib.Path, mime_type: str) -> str: """Extracted language from an HTML file""" try: - with open(fpath, "r") as fh: + with open(fpath) as fh: return find_language_in(fh, mime_type) except Exception: return "" diff --git a/src/zimscraperlib/i18n.py b/src/zimscraperlib/i18n.py index 0e721680..0c9e1948 100644 --- a/src/zimscraperlib/i18n.py +++ b/src/zimscraperlib/i18n.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import gettext @@ -9,12 +8,13 @@ from typing import Dict, Optional, Tuple, Union import babel -from iso639 import languages as iso639_languages +import iso639 +import iso639.exceptions ISO_LEVELS = ["1", "2b", "2t", "3", "5"] -class NotFound(ValueError): +class NotFound(ValueError): # noqa: N818 pass @@ -68,27 +68,45 @@ def get_iso_lang_data(lang: str) -> Tuple[Dict, Union[Dict, None]]: iso_types = [] - for code_type in [f"part{lang_}" for lang_ in ISO_LEVELS] + ["name"]: - try: - iso639_languages.get(**{code_type: lang}) - iso_types.append(code_type) - except KeyError: - pass - - if not iso_types: - raise NotFound("Not a valid iso language name/code") - - language = iso639_languages.get(**{iso_types[0]: lang}) + try: + isolang = iso639.Lang(lang) + except ( + iso639.exceptions.InvalidLanguageValue, + iso639.exceptions.DeprecatedLanguageValue, + ) as exc: + raise NotFound("Not a valid iso language name/code") from exc + + def replace_types(new_type: str) -> str: + # convert new iso_types from iso639-lang Pypi package to old iso_types from + # iso-639 package, since we were returning these values for a long time + if new_type == "pt1": + return "part1" + elif new_type == "pt2b": + return "part2b" + elif new_type == "pt2t": + return "part2t" + elif new_type == "pt3": + return "part3" + elif new_type == "pt5": + return "part5" + return new_type + + for code_type in [f"pt{lang_}" for lang_ in ISO_LEVELS] + ["name"]: + # the `if` condition below is a bit hackish but it is the only way to know + # if the passed value is matching a code type or not with new python-i639 + # library and we do not expect weird things to happen here + if str(getattr(isolang, code_type)).lower() == lang.lower(): + iso_types.append(replace_types(code_type)) lang_data = { - f"iso-639-{lang_}": getattr(language, f"part{lang_}") for lang_ in ISO_LEVELS + f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS } - lang_data.update({"english": language.name, "iso_types": iso_types}) + lang_data.update({"english": isolang.name, "iso_types": iso_types}) - if language.macro: + if isolang.macro(): return ( lang_data, - get_iso_lang_data(language.macro)[0], + get_iso_lang_data(isolang.macro().name)[0], ) # first item in the returned tuple return lang_data, None @@ -103,7 +121,9 @@ def find_language_names( lang_data = get_language_details(query, failsafe=True) or {} try: query_locale = babel.Locale.parse(query) - return query_locale.get_display_name(), query_locale.get_display_name("en") + return query_locale.get_display_name(), query_locale.get_display_name( + "en" + ) # pyright: ignore except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError): pass @@ -111,7 +131,9 @@ def find_language_names( for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]: try: query_locale = babel.Locale.parse(lang_data.get(iso_level)) - return query_locale.get_display_name(), query_locale.get_display_name("en") + return query_locale.get_display_name(), query_locale.get_display_name( + "en" + ) # pyright: ignore except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError): pass default = lang_data.get("english", query) @@ -127,7 +149,9 @@ def update_with_macro(lang_data: Dict, macro_data: Dict): return lang_data -def get_language_details(query: str, failsafe: Optional[bool] = False) -> Dict: +def get_language_details( + query: str, failsafe: Optional[bool] = False # noqa: FBT002 +) -> Dict: """language details dict from query. Raises NotFound or return `und` language details if failsafe @@ -143,7 +167,7 @@ def get_language_details(query: str, failsafe: Optional[bool] = False) -> Dict: """ - if query.isalpha() and (2 <= len(query) <= 3): + if query.isalpha() and (2 <= len(query) <= 3): # noqa: PLR2004 # possibility of iso-639 code adjusted_query = query native_query = query @@ -165,10 +189,10 @@ def get_language_details(query: str, failsafe: Optional[bool] = False) -> Dict: lang_data, macro_data = get_iso_lang_data(adjusted_query) except NotFound as exc: if failsafe: - return None + return None # pyright: ignore raise exc - iso_data = update_with_macro(lang_data, macro_data) + iso_data = update_with_macro(lang_data, macro_data) # pyright: ignore native_name, english_name = find_language_names(native_query, iso_data) iso_data.update( { diff --git a/src/zimscraperlib/image/convertion.py b/src/zimscraperlib/image/convertion.py index 029742c0..31674236 100644 --- a/src/zimscraperlib/image/convertion.py +++ b/src/zimscraperlib/image/convertion.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pathlib @@ -7,10 +6,10 @@ import PIL -from ..constants import ALPHA_NOT_SUPPORTED -from .probing import format_for -from .transformation import resize_image -from .utils import save_image +from zimscraperlib.constants import ALPHA_NOT_SUPPORTED +from zimscraperlib.image.probing import format_for +from zimscraperlib.image.transformation import resize_image +from zimscraperlib.image.utils import save_image def convert_image( @@ -26,12 +25,14 @@ def convert_image( to RGB. ex: RGB, ARGB, CMYK (and other PIL colorspaces)""" colorspace = params.get("colorspace") # requested colorspace - fmt = params.pop("fmt").upper() if "fmt" in params else None # requested format + fmt = ( + params.pop("fmt").upper() if "fmt" in params else None # pyright: ignore + ) # requested format if not fmt: fmt = format_for(dst) - with PIL.Image.open(src) as image: + with PIL.Image.open(src) as image: # pyright: ignore if image.mode == "RGBA" and fmt in ALPHA_NOT_SUPPORTED or colorspace: - image = image.convert(colorspace or "RGB") + image = image.convert(colorspace or "RGB") # noqa: PLW2901 save_image(image, dst, fmt, **params) @@ -40,13 +41,13 @@ def create_favicon(src: pathlib.Path, dst: pathlib.Path) -> None: if dst.suffix != ".ico": raise ValueError("favicon extension must be ICO") - img = PIL.Image.open(src) + img = PIL.Image.open(src) # pyright: ignore w, h = img.size # resize image to square first if w != h: size = min([w, h]) resized = dst.parent.joinpath(f"{src.stem}.tmp.{src.suffix}") resize_image(src, size, size, resized, "contain") - img = PIL.Image.open(resized) + img = PIL.Image.open(resized) # pyright: ignore # now convert to ICO save_image(img, dst, "ICO") diff --git a/src/zimscraperlib/image/optimization.py b/src/zimscraperlib/image/optimization.py index 0a21e922..bed4c8a5 100644 --- a/src/zimscraperlib/image/optimization.py +++ b/src/zimscraperlib/image/optimization.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu @@ -36,9 +35,9 @@ from optimize_images.img_dynamic_quality import jpeg_dynamic_quality from PIL import Image -from .convertion import convert_image -from .probing import format_for -from .utils import save_image +from zimscraperlib.image.convertion import convert_image +from zimscraperlib.image.probing import format_for +from zimscraperlib.image.utils import save_image def ensure_matches( @@ -54,12 +53,12 @@ def ensure_matches( def optimize_png( src: Union[pathlib.Path, io.BytesIO], dst: Optional[pathlib.Path] = None, - reduce_colors: Optional[bool] = False, + reduce_colors: Optional[bool] = False, # noqa: FBT002 max_colors: Optional[int] = 256, - fast_mode: Optional[bool] = True, - remove_transparency: Optional[bool] = False, + fast_mode: Optional[bool] = True, # noqa: FBT002 + remove_transparency: Optional[bool] = False, # noqa: FBT002 background_color: Optional[Tuple[int, int, int]] = (255, 255, 255), - **options, + **options, # noqa: ARG001 ) -> Union[pathlib.Path, io.BytesIO]: """method to optimize PNG files using a pure python external optimizer @@ -77,34 +76,34 @@ def optimize_png( if remove_transparency is True (tuple containing RGB values) values: (255, 255, 255) | (221, 121, 108) | (XX, YY, ZZ)""" - ensure_matches(src, "PNG") + ensure_matches(src, "PNG") # pyright: ignore img = Image.open(src) if remove_transparency: - img = remove_alpha(img, background_color) + img = remove_alpha(img, background_color) # pyright: ignore if reduce_colors: - img, _, _ = do_reduce_colors(img, max_colors) + img, _, _ = do_reduce_colors(img, max_colors) # pyright: ignore if not fast_mode and img.mode == "P": img, _ = rebuild_palette(img) if dst is None: - dst = io.BytesIO() - img.save(dst, optimize=True, format="PNG") + dst = io.BytesIO() # pyright: ignore + img.save(dst, optimize=True, format="PNG") # pyright: ignore if isinstance(dst, io.BytesIO): dst.seek(0) - return dst + return dst # pyright: ignore def optimize_jpeg( src: Union[pathlib.Path, io.BytesIO], dst: Optional[pathlib.Path] = None, quality: Optional[int] = 85, - fast_mode: Optional[bool] = True, - keep_exif: Optional[bool] = True, - **options, + fast_mode: Optional[bool] = True, # noqa: FBT002 + keep_exif: Optional[bool] = True, # noqa: FBT002 + **options, # noqa: ARG001 ) -> Union[pathlib.Path, io.BytesIO]: """method to optimize JPEG files using a pure python external optimizer quality: JPEG quality (integer between 1 and 100) @@ -115,7 +114,7 @@ def optimize_jpeg( get dynamic quality value to ensure better compression values: True | False""" - ensure_matches(src, "JPEG") + ensure_matches(src, "JPEG") # pyright: ignore img = Image.open(src) orig_size = ( @@ -131,7 +130,7 @@ def optimize_jpeg( had_exif = True # only use progressive if file size is bigger - use_progressive_jpg = orig_size > 10240 # 10KiB + use_progressive_jpg = orig_size > 10240 # 10KiB # noqa: PLR2004 if fast_mode: quality_setting = quality @@ -139,10 +138,10 @@ def optimize_jpeg( quality_setting, _ = jpeg_dynamic_quality(img) if dst is None: - dst = io.BytesIO() + dst = io.BytesIO() # pyright: ignore img.save( - dst, + dst, # pyright: ignore quality=quality_setting, optimize=True, progressive=use_progressive_jpg, @@ -154,25 +153,27 @@ def optimize_jpeg( if keep_exif and had_exif: piexif.transplant( - exif_src=str(src.resolve()) - if isinstance(src, pathlib.Path) - else src.getvalue(), - image=str(dst.resolve()) - if isinstance(dst, pathlib.Path) - else dst.getvalue(), + exif_src=( + str(src.resolve()) if isinstance(src, pathlib.Path) else src.getvalue() + ), + image=( + str(dst.resolve()) + if isinstance(dst, pathlib.Path) + else dst.getvalue() # pyright: ignore + ), new_file=dst, ) - return dst + return dst # pyright: ignore def optimize_webp( src: Union[pathlib.Path, io.BytesIO], dst: Optional[pathlib.Path] = None, - lossless: Optional[bool] = False, + lossless: Optional[bool] = False, # noqa: FBT002 quality: Optional[int] = 60, method: Optional[int] = 6, - **options, + **options, # noqa: ARG001 ) -> Union[pathlib.Path, io.BytesIO]: """method to optimize WebP using Pillow options lossless: Whether to use lossless compression (boolean) @@ -187,7 +188,7 @@ def optimize_webp( refer to the link for more details https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp""" - ensure_matches(src, "WEBP") + ensure_matches(src, "WEBP") # pyright: ignore params = { "lossless": lossless, "quality": quality, @@ -196,17 +197,17 @@ def optimize_webp( webp_image = Image.open(src) if dst is None: - dst = io.BytesIO() - webp_image.save(dst, format="WEBP", **params) - dst.seek(0) + dst = io.BytesIO() # pyright: ignore + webp_image.save(dst, format="WEBP", **params) # pyright: ignore + dst.seek(0) # pyright: ignore else: try: - save_image(webp_image, dst, fmt="WEBP", **params) + save_image(webp_image, dst, fmt="WEBP", **params) # pyright: ignore except Exception as exc: - if src.resolve() != dst.resolve() and dst.exists(): - dst.unlink() # pragma: nocover + if src.resolve() != dst.resolve() and dst.exists(): # pyright: ignore + dst.unlink() # pragma: no cover raise exc - return dst + return dst # pyright: ignore def optimize_gif( @@ -214,10 +215,10 @@ def optimize_gif( dst: pathlib.Path, optimize_level: Optional[int] = 1, lossiness: Optional[int] = None, - interlace: Optional[bool] = True, - no_extensions: Optional[bool] = True, + interlace: Optional[bool] = True, # noqa: FBT002 + no_extensions: Optional[bool] = True, # noqa: FBT002 max_colors: Optional[int] = None, - **options, + **options, # noqa: ARG001 ) -> pathlib.Path: """method to optimize GIFs using gifsicle >= 1.92 optimize_level: Optimization level; @@ -238,7 +239,7 @@ def optimize_gif( ensure_matches(src, "GIF") # use gifsicle - args = ["gifsicle"] + args = ["/usr/bin/env", "gifsicle"] if optimize_level: args += [f"-O{optimize_level}"] if max_colors: @@ -251,11 +252,11 @@ def optimize_gif( args += ["--interlace"] args += [str(src)] with open(dst, "w") as out_file: - gifsicle = subprocess.run(args, stdout=out_file) + gifsicle = subprocess.run(args, stdout=out_file, check=False) # remove dst if gifsicle failed and src is different from dst if gifsicle.returncode != 0 and src.resolve() != dst.resolve() and dst.exists(): - dst.unlink() # pragma: nocover + dst.unlink() # pragma: no cover # raise error if unsuccessful gifsicle.check_returncode() @@ -265,10 +266,10 @@ def optimize_gif( def optimize_image( src: pathlib.Path, dst: pathlib.Path, - delete_src: Optional[bool] = False, - convert: Optional[Union[bool, str]] = False, + delete_src: Optional[bool] = False, # noqa: FBT002 + convert: Optional[Union[bool, str]] = False, # noqa: FBT002 **options, -) -> bool: +) -> bool: # pyright: ignore """Optimize image, automatically selecting correct optimizer delete_src: whether to remove src file upon success (boolean) @@ -282,12 +283,12 @@ def optimize_image( # if requested, convert src to requested format into dst path if convert and src_format != dst_format: src_format = dst_format = convert if isinstance(convert, str) else dst_format - convert_image(src, dst, fmt=src_format) + convert_image(src, dst, fmt=src_format) # pyright: ignore src_img = pathlib.Path(dst) else: src_img = pathlib.Path(src) - { + { # pyright: ignore "JPEG": optimize_jpeg, "PNG": optimize_png, "GIF": optimize_gif, diff --git a/src/zimscraperlib/image/presets.py b/src/zimscraperlib/image/presets.py index b79294b2..8eab6991 100644 --- a/src/zimscraperlib/image/presets.py +++ b/src/zimscraperlib/image/presets.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu +from typing import ClassVar, Dict, Optional, Union + """ presets for ImageOptimizer in zimscraperlib.image.optimization module """ preset_type = "image" @@ -19,7 +20,7 @@ class WebpLow: ext = "webp" mimetype = f"{preset_type}/webp" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "lossless": False, "quality": 40, "method": 6, @@ -38,7 +39,7 @@ class WebpMedium: ext = "webp" mimetype = f"{preset_type}/webp" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "lossless": False, "quality": 50, "method": 6, @@ -57,7 +58,7 @@ class WebpHigh: ext = "webp" mimetype = f"{preset_type}/webp" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "lossless": False, "quality": 90, "method": 6, @@ -78,7 +79,7 @@ class GifLow: ext = "gif" mimetype = f"{preset_type}/gif" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "optimize_level": 3, "max_colors": 256, "lossiness": 80, @@ -101,7 +102,7 @@ class GifMedium: ext = "gif" mimetype = f"{preset_type}/gif" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "optimize_level": 3, "lossiness": 20, "no_extensions": True, @@ -123,7 +124,7 @@ class GifHigh: ext = "gif" mimetype = f"{preset_type}/gif" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "optimize_level": 2, "lossiness": None, "no_extensions": True, @@ -142,7 +143,7 @@ class PngLow: ext = "png" mimetype = f"{preset_type}/png" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "reduce_colors": True, "remove_transparency": False, "max_colors": 256, @@ -161,7 +162,7 @@ class PngMedium: ext = "png" mimetype = f"{preset_type}/png" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "reduce_colors": False, "remove_transparency": False, "fast_mode": False, @@ -179,7 +180,7 @@ class PngHigh: ext = "png" mimetype = f"{preset_type}/png" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "reduce_colors": False, "remove_transparency": False, "fast_mode": True, @@ -198,7 +199,7 @@ class JpegLow: ext = "png" mimetype = f"{preset_type}/png" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "quality": 45, "keep_exif": False, "fast_mode": True, @@ -217,7 +218,7 @@ class JpegMedium: ext = "jpg" mimetype = f"{preset_type}/jpeg" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "quality": 65, "keep_exif": False, "fast_mode": True, @@ -236,7 +237,7 @@ class JpegHigh: ext = "jpg" mimetype = f"{preset_type}/jpeg" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "quality": 80, "keep_exif": True, "fast_mode": True, diff --git a/src/zimscraperlib/image/probing.py b/src/zimscraperlib/image/probing.py index 173aca63..ac3badca 100644 --- a/src/zimscraperlib/image/probing.py +++ b/src/zimscraperlib/image/probing.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import colorsys @@ -13,7 +12,7 @@ def get_colors( - src: pathlib.Path, use_palette: Optional[bool] = True + src: pathlib.Path, use_palette: Optional[bool] = True # noqa: FBT002 ) -> Tuple[str, str]: """(main, secondary) HTML color codes from an image path""" @@ -23,8 +22,10 @@ def rgb_to_hex(r: int, g: int, b: int) -> str: def solarize(r: int, g: int, b: int) -> Tuple[int, int, int]: # calculate solarized color for main - h, l, s = colorsys.rgb_to_hls(float(r) / 256, float(g) / 256, float(b) / 256) - r2, g2, b2 = [int(x * 256) for x in colorsys.hls_to_rgb(h, 0.95, s)] + h, l, s = colorsys.rgb_to_hls( # noqa: E741 + float(r) / 256, float(g) / 256, float(b) / 256 + ) + r2, g2, b2 = (int(x * 256) for x in colorsys.hls_to_rgb(h, 0.95, s)) return r2, g2, b2 ct = colorthief.ColorThief(src) @@ -46,20 +47,25 @@ def solarize(r: int, g: int, b: int) -> Tuple[int, int, int]: def is_hex_color(text: str) -> bool: """whether supplied text is a valid hex-formated color code""" - return re.search(r"^#(?:[0-9a-fA-F]{3}){1,2}$", text) + return re.search(r"^#(?:[0-9a-fA-F]{3}){1,2}$", text) # pyright: ignore -def format_for(src: Union[pathlib.Path, io.BytesIO], from_suffix: bool = True) -> str: +def format_for( + src: Union[pathlib.Path, io.BytesIO], + from_suffix: bool = True, # noqa: FBT001, FBT002 +) -> str: """Pillow format of a given filename, either Pillow-detected or from suffix""" if not from_suffix: with PIL.Image.open(src) as img: - return img.format + return img.format # pyright: ignore - from PIL.Image import EXTENSION as ext_fmt_map + from PIL.Image import EXTENSION as ext_fmt_map # noqa: N811 from PIL.Image import init as init_pil init_pil() - return ext_fmt_map[src.suffix] # might raise KeyError on unknown extension + return ext_fmt_map[ + src.suffix # pyright: ignore + ] # might raise KeyError on unknown extension def is_valid_image( diff --git a/src/zimscraperlib/image/transformation.py b/src/zimscraperlib/image/transformation.py index 372f29ba..4db56cc6 100644 --- a/src/zimscraperlib/image/transformation.py +++ b/src/zimscraperlib/image/transformation.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import io @@ -9,8 +8,8 @@ import PIL from resizeimage import resizeimage -from ..constants import ALPHA_NOT_SUPPORTED -from .utils import save_image +from zimscraperlib.constants import ALPHA_NOT_SUPPORTED +from zimscraperlib.image.utils import save_image def resize_image( @@ -19,14 +18,14 @@ def resize_image( height: Optional[int] = None, dst: Optional[Union[pathlib.Path, io.BytesIO]] = None, method: Optional[str] = "width", - allow_upscaling: Optional[bool] = True, + allow_upscaling: Optional[bool] = True, # noqa: FBT002 **params: Optional[dict], ) -> None: """resize an image to requested dimensions methods: width, height, cover, thumbnail allow upscaling: upscale image first, preserving aspect ratio if required""" - with PIL.Image.open(src) as image: + with PIL.Image.open(src) as image: # pyright: ignore # preserve image format as resize() does not transmit it into new object image_format = image.format image_mode = image.mode @@ -35,9 +34,13 @@ def resize_image( if allow_upscaling: height_width_ratio = float(image.size[1]) / float(image.size[0]) if image.size[0] < width: - image = image.resize((width, int(width * height_width_ratio))) + image = image.resize( # noqa: PLW2901 + (width, int(width * height_width_ratio)) + ) if height and image.size[1] < height: - image = image.resize((int(height / height_width_ratio), height)) + image = image.resize( # noqa: PLW2901 + (int(height / height_width_ratio), height) + ) # resize using the requested method if method == "width": @@ -55,4 +58,9 @@ def resize_image( if dst is None and isinstance(src, io.BytesIO): src.seek(0) - save_image(resized, dst if dst is not None else src, image_format, **params) + save_image( + resized, + dst if dst is not None else src, # pyright: ignore + image_format, + **params, + ) diff --git a/src/zimscraperlib/image/utils.py b/src/zimscraperlib/image/utils.py index 712705f0..2568492d 100644 --- a/src/zimscraperlib/image/utils.py +++ b/src/zimscraperlib/image/utils.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pathlib @@ -9,9 +8,12 @@ def save_image( - src: Image, dst: pathlib.Path, fmt: Optional[str] = None, **params: Optional[dict] + src: Image, # pyright: ignore + dst: pathlib.Path, + fmt: Optional[str] = None, + **params: Optional[dict], ) -> None: """PIL.Image.save() wrapper setting default parameters""" - args = {"JPEG": {"quality": 100}, "PNG": {}}.get(fmt, {}) + args = {"JPEG": {"quality": 100}, "PNG": {}}.get(fmt, {}) # pyright: ignore args.update(params or {}) - src.save(dst, fmt, **args) + src.save(dst, fmt, **args) # pyright: ignore diff --git a/src/zimscraperlib/inputs.py b/src/zimscraperlib/inputs.py index ad6878c8..4ab9e6ef 100644 --- a/src/zimscraperlib/inputs.py +++ b/src/zimscraperlib/inputs.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pathlib @@ -7,17 +6,21 @@ import tempfile from typing import Optional, Tuple, Union -from . import logger -from .constants import MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH -from .constants import MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH -from .download import stream_file +from zimscraperlib import logger +from zimscraperlib.constants import ( + MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH, +) +from zimscraperlib.constants import ( + MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH, +) +from zimscraperlib.download import stream_file def handle_user_provided_file( source: Optional[Union[pathlib.Path, str]] = None, dest: Optional[pathlib.Path] = None, in_dir: Optional[pathlib.Path] = None, - nocopy: bool = False, + nocopy: bool = False, # noqa: FBT001, FBT002 ) -> Union[pathlib.Path, None]: """path to downloaded or copied a user provided file (URL or path) @@ -43,7 +46,7 @@ def handle_user_provided_file( else: source = pathlib.Path(source).expanduser().resolve() if not source.exists(): - raise IOError(f"{source} could not be found.") + raise OSError(f"{source} could not be found.") if nocopy: return source diff --git a/src/zimscraperlib/logging.py b/src/zimscraperlib/logging.py index b2f2996e..4a0f1881 100644 --- a/src/zimscraperlib/logging.py +++ b/src/zimscraperlib/logging.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import io @@ -9,23 +8,23 @@ from logging.handlers import RotatingFileHandler from typing import Iterable, Optional -from .constants import NAME +from zimscraperlib.constants import NAME DEFAULT_FORMAT = "[%(name)s::%(asctime)s] %(levelname)s:%(message)s" VERBOSE_DEPENDENCIES = ["urllib3", "PIL", "boto3", "botocore", "s3transfer"] -def getLogger( +def getLogger( # noqa: N802 name: str, level: Optional[int] = logging.INFO, - console: Optional[io.TextIOBase] = sys.stdout, + console: Optional[io.TextIOBase] = sys.stdout, # pyright: ignore log_format: Optional[str] = DEFAULT_FORMAT, - file: Optional[pathlib.Path] = False, + file: Optional[pathlib.Path] = False, # noqa: FBT002 # pyright: ignore file_level: Optional[int] = None, file_format: Optional[str] = None, file_max: Optional[int] = 2**20, file_nb_backup: Optional[int] = 1, - deps_level: Optional[int] = logging.WARNING, + deps_level: Optional[int] = logging.WARNING, # noqa: ARG001 additional_deps: Optional[Iterable] = None, ): """configured logger for most usages @@ -44,11 +43,11 @@ def getLogger( additional_deps = [] # align zimscraperlib logging level to that of scraper - logging.Logger(NAME).setLevel(level) + logging.Logger(NAME).setLevel(level) # pyright: ignore # set arbitrary level for some known verbose dependencies # prevents them from polluting logs - for logger_name in set(VERBOSE_DEPENDENCIES + additional_deps): + for logger_name in set(VERBOSE_DEPENDENCIES + additional_deps): # pyright: ignore logging.getLogger(logger_name).setLevel(logging.WARNING) logger = logging.Logger(name) @@ -58,15 +57,17 @@ def getLogger( if console: console_handler = logging.StreamHandler(console) console_handler.setFormatter(logging.Formatter(log_format)) - console_handler.setLevel(level) + console_handler.setLevel(level) # pyright: ignore logger.addHandler(console_handler) if file: - file_handler = RotatingFileHandler( - file, maxBytes=file_max, backupCount=file_nb_backup + file_handler = RotatingFileHandler( # pyright: ignore + file, + maxBytes=file_max, # pyright: ignore + backupCount=file_nb_backup, # pyright: ignore ) file_handler.setFormatter(logging.Formatter(file_format or log_format)) - file_handler.setLevel(file_level or level) + file_handler.setLevel(file_level or level) # pyright: ignore logger.addHandler(file_handler) return logger @@ -74,7 +75,7 @@ def getLogger( def nicer_args_join(args: Iterable) -> str: """slightly better concateated list of subprocess args for display""" - nargs = args[0:1] - for arg in args[1:]: - nargs.append(arg if arg.startswith("-") else '"{}"'.format(arg)) + nargs = args[0:1] # pyright: ignore + for arg in args[1:]: # pyright: ignore + nargs.append(arg if arg.startswith("-") else f'"{arg}"') return " ".join(nargs) diff --git a/src/zimscraperlib/misc.py b/src/zimscraperlib/misc.py index 55f32d8c..6cba114d 100644 --- a/src/zimscraperlib/misc.py +++ b/src/zimscraperlib/misc.py @@ -1,8 +1,8 @@ """ Miscelaneous utils""" -from typing import Iterable +from typing import Optional -def first(*args: Iterable[object]) -> object: +def first(*args: Optional[object]) -> object: """first non-None value from *args ; fallback to empty string""" return next((item for item in args if item is not None), "") diff --git a/src/zimscraperlib/types.py b/src/zimscraperlib/types.py index d1aabd41..de142111 100644 --- a/src/zimscraperlib/types.py +++ b/src/zimscraperlib/types.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ File extensions to MIME-Type mapping @@ -51,9 +50,11 @@ def get_mime_for_name( filename = pathlib.Path(filename) if not filename.suffix: return no_ext_to - return mimetypes.guess_type(f"{filename.stem}{filename.suffix}")[0] or fallback + return ( + mimetypes.guess_type(f"{filename.stem}{filename.suffix}")[0] or fallback + ) # pyright: ignore except Exception: - return fallback + return fallback # pyright: ignore def init_types(): diff --git a/src/zimscraperlib/uri.py b/src/zimscraperlib/uri.py index c0fdb60d..b4906473 100644 --- a/src/zimscraperlib/uri.py +++ b/src/zimscraperlib/uri.py @@ -1,39 +1,39 @@ """ URI handling module""" import urllib.parse -from typing import Union +from typing import Optional, Union -from . import logger -from .misc import first +from zimscraperlib import logger +from zimscraperlib.misc import first def rebuild_uri( uri: urllib.parse.ParseResult, - scheme: str = None, - username: str = None, - password: str = None, - hostname: str = None, - port: Union[str, int] = None, - path: str = None, - params: str = None, - query: str = None, - fragment: str = None, - failsafe: bool = False, + scheme: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + hostname: Optional[str] = None, + port: Optional[Union[str, int]] = None, + path: Optional[str] = None, + params: Optional[str] = None, + query: Optional[str] = None, + fragment: Optional[str] = None, + failsafe: bool = False, # noqa: FBT001, FBT002 ) -> urllib.parse.ParseResult: """new ParseResult named tuple from uri with requested part updated""" try: - username = first(username, uri.username, "") - password = first(password, uri.password, "") - hostname = first(hostname, uri.hostname, "") - port = first(port, uri.port, "") + username = first(username, uri.username, "") # pyright: ignore + password = first(password, uri.password, "") # pyright: ignore + hostname = first(hostname, uri.hostname, "") # pyright: ignore + port = first(port, uri.port, "") # pyright: ignore netloc = ( f"{username}{':' if password else ''}{password}" f"{'@' if username or password else ''}{hostname}" f"{':' if port else ''}{port}" ) - return urllib.parse.urlparse( - urllib.parse.urlunparse( - ( + return urllib.parse.urlparse( # pyright: ignore + urllib.parse.urlunparse( # pyright: ignore + ( # pyright: ignore first(scheme, uri.scheme), netloc, first(path, uri.path), diff --git a/src/zimscraperlib/video/config.py b/src/zimscraperlib/video/config.py index d15553ab..82d8ed54 100644 --- a/src/zimscraperlib/video/config.py +++ b/src/zimscraperlib/video/config.py @@ -1,15 +1,18 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu +from typing import ClassVar, Dict, Optional, Union + class Config(dict): VERSION = 1 ext = "dat" mimetype = "application/data" - options = {} - defaults = {"-max_muxing_queue_size": "9999"} - mapping = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = {} + defaults: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { + "-max_muxing_queue_size": "9999" + } + mapping: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "video_codec": "-codec:v", "audio_codec": "-codec:a", "max_video_bitrate": "-maxrate", @@ -111,7 +114,7 @@ def buffersize(self, value): @property def video_scale(self): # remove "scale='" and "'" and return the value in between - return self.get("-vf")[7:-1] if self.get("-vf") else None + return self.get("-vf")[7:-1] if self.get("-vf") else None # pyright: ignore @video_scale.setter def video_scale(self, value): @@ -129,8 +132,8 @@ def quantizer_scale_range(self, value): if ( isinstance(qmin, int) and isinstance(qmax, int) - and -1 <= qmin <= 69 - and -1 <= qmax <= 1024 + and -1 <= qmin <= 69 # noqa: PLR2004 + and -1 <= qmax <= 1024 # noqa: PLR2004 ): self["-qmin"] = str(qmin) self["-qmax"] = str(qmax) diff --git a/src/zimscraperlib/video/encoding.py b/src/zimscraperlib/video/encoding.py index ec6726d1..20509e7a 100644 --- a/src/zimscraperlib/video/encoding.py +++ b/src/zimscraperlib/video/encoding.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu @@ -8,12 +7,17 @@ import subprocess import tempfile -from .. import logger -from ..logging import nicer_args_join +from zimscraperlib import logger +from zimscraperlib.logging import nicer_args_join def reencode( - src_path, dst_path, ffmpeg_args, delete_src=False, with_process=False, failsafe=True + src_path, + dst_path, + ffmpeg_args, + delete_src=False, # noqa: FBT002 + with_process=False, # noqa: FBT002 + failsafe=True, # noqa: FBT002 ): """Runs ffmpeg with given ffmpeg_args @@ -28,20 +32,25 @@ def reencode( with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = pathlib.Path(tmp_dir).joinpath(f"video.tmp{dst_path.suffix}") - args = ( - ["ffmpeg", "-y", "-i", f"file:{src_path}"] - + ffmpeg_args - + [f"file:{tmp_path}"] - ) + args = [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + f"file:{src_path}", + *ffmpeg_args, + f"file:{tmp_path}", + ] logger.debug( - f"Encode {src_path} -> {dst_path} " f"video format = {dst_path.suffix}" + f"Encode {src_path} -> {dst_path} video format = {dst_path.suffix}" ) logger.debug(nicer_args_join(args)) ffmpeg = subprocess.run( args, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, - universal_newlines=True, + text=True, + check=False, ) if not failsafe: ffmpeg.check_returncode() diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index 4fb7328d..da55da2d 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu -from .config import Config +from typing import ClassVar, Dict, Optional, Union + +from zimscraperlib.video.config import Config preset_type = "video" @@ -19,7 +20,7 @@ class VoiceMp3Low(Config): ext = "mp3" mimetype = "audio/mp3" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-vn": "", # remove video stream "-codec:a": "mp3", # audio codec "-ar": "44100", # audio sampling rate @@ -40,7 +41,7 @@ class VideoWebmLow(Config): ext = "webm" mimetype = f"{preset_type}/webm" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "libvpx", # video codec "-quality": "best", # codec preset "-b:v": "300k", # target video bitrate @@ -68,7 +69,7 @@ class VideoMp4Low(Config): ext = "mp4" mimetype = f"{preset_type}/mp4" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "h264", # video codec "-b:v": "300k", # target video bitrate "-maxrate": "300k", # max video bitrate @@ -93,7 +94,7 @@ class VideoWebmHigh(Config): ext = "webm" mimetype = f"{preset_type}/webm" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "libvpx", # video codec "-codec:a": "libvorbis", # audio codec "-crf": "25", # constant quality, lower value gives better qual and larger size @@ -111,7 +112,7 @@ class VideoMp4High(Config): ext = "mp4" mimetype = f"{preset_type}/mp4" - options = { + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "h264", # video codec "-codec:a": "aac", # audio codec "-crf": "20", # constant quality, lower value gives better qual and larger size diff --git a/src/zimscraperlib/video/probing.py b/src/zimscraperlib/video/probing.py index 15b98a50..5f6b217f 100644 --- a/src/zimscraperlib/video/probing.py +++ b/src/zimscraperlib/video/probing.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu @@ -14,6 +13,7 @@ def get_media_info(src_path): bitrate: file's main bitrate""" args = [ + "/usr/bin/env", "ffprobe", "-i", f"file:{src_path}", @@ -28,9 +28,8 @@ def get_media_info(src_path): ] ffprobe = subprocess.run( args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, + capture_output=True, + text=True, check=False, ) result = ffprobe.stdout.strip().split("\n") diff --git a/src/zimscraperlib/zim/__init__.py b/src/zimscraperlib/zim/__init__.py index 44a9a601..df282f3a 100644 --- a/src/zimscraperlib/zim/__init__.py +++ b/src/zimscraperlib/zim/__init__.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ ZIM file creation tools @@ -10,13 +9,18 @@ zim.items: item to add to creator zim.archive: read ZIM files, accessing or searching its content""" -from libzim.writer import Blob +from libzim.writer import Blob # pyright: ignore -from .archive import Archive -from .creator import Creator -from .filesystem import make_zim_file -from .items import Item, StaticItem, URLItem -from .providers import FileLikeProvider, FileProvider, StringProvider, URLProvider +from zimscraperlib.zim.archive import Archive +from zimscraperlib.zim.creator import Creator +from zimscraperlib.zim.filesystem import make_zim_file +from zimscraperlib.zim.items import Item, StaticItem, URLItem +from zimscraperlib.zim.providers import ( + FileLikeProvider, + FileProvider, + StringProvider, + URLProvider, +) __all__ = [ "Archive", diff --git a/src/zimscraperlib/zim/_libkiwix.py b/src/zimscraperlib/zim/_libkiwix.py index d71db702..117b5b20 100644 --- a/src/zimscraperlib/zim/_libkiwix.py +++ b/src/zimscraperlib/zim/_libkiwix.py @@ -17,7 +17,9 @@ from typing import Dict, List, Optional, Tuple MimetypeAndCounter = namedtuple("MimetypeAndCounter", ["mimetype", "value"]) -CounterMap = Dict[type(MimetypeAndCounter.mimetype), type(MimetypeAndCounter.value)] +CounterMap = Dict[ + type(MimetypeAndCounter.mimetype), type(MimetypeAndCounter.value) # pyright: ignore +] def getline(src: io.StringIO, delim: Optional[bool] = None) -> Tuple[bool, str]: @@ -38,16 +40,18 @@ def getline(src: io.StringIO, delim: Optional[bool] = None) -> Tuple[bool, str]: return char == "", output -def readFullMimetypeAndCounterString(src: io.StringIO) -> Tuple[bool, str]: +def readFullMimetypeAndCounterString( + src: io.StringIO, +) -> Tuple[bool, str]: """read a single mimetype-and-counter string from source Returns whether the source is EOF and the extracted string (or empty one)""" params = "" - eof, mtcStr = getline(src, ";") + eof, mtcStr = getline(src, ";") # pyright: ignore if mtcStr.find("=") == -1: - while params.count("=") != 2: - eof, params = getline(src, ";") - if params.count("=") == 2: + while params.count("=") != 2: # noqa: PLR2004 + eof, params = getline(src, ";") # pyright: ignore + if params.count("=") == 2: # noqa: PLR2004 mtcStr += ";" + params if eof: break @@ -60,11 +64,10 @@ def parseASingleMimetypeCounter(string: str) -> MimetypeAndCounter: if k != len(string) - 1: mimeType = string[:k] counter = string[k + 1 :] - if counter: - try: - return MimetypeAndCounter(mimeType, int(counter)) - except ValueError: - pass # value is not castable to int + try: + return MimetypeAndCounter(mimeType, int(counter)) + except ValueError: + pass # value is not castable to int return MimetypeAndCounter("", 0) @@ -72,7 +75,7 @@ def parseMimetypeCounter( counterData: str, ) -> CounterMap: """Mapping of MIME types with count for each from ZIM Counter metadata string""" - counters = dict() + counters = {} ss = io.StringIO(counterData) eof = False while not eof: diff --git a/src/zimscraperlib/zim/archive.py b/src/zimscraperlib/zim/archive.py index 01c317a1..7d69ee9a 100644 --- a/src/zimscraperlib/zim/archive.py +++ b/src/zimscraperlib/zim/archive.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ ZIM Archive helper @@ -12,12 +11,12 @@ from typing import Dict, Iterable, List, Optional -import libzim.reader -import libzim.search # Query, Searcher -import libzim.suggestion # SuggestionSearcher +import libzim.reader # pyright: ignore +import libzim.search # Query, Searcher # pyright: ignore +import libzim.suggestion # SuggestionSearcher # pyright: ignore -from ._libkiwix import convertTags, parseMimetypeCounter -from .items import Item +from zimscraperlib.zim._libkiwix import convertTags, parseMimetypeCounter +from zimscraperlib.zim.items import Item class Archive(libzim.reader.Archive): @@ -40,11 +39,11 @@ def metadata(self) -> Dict[str, str]: def tags(self): return self.get_tags() - def get_tags(self, libkiwix: bool = False) -> List[str]: + def get_tags(self, libkiwix: bool = False) -> List[str]: # noqa: FBT001, FBT002 """List of ZIM tags, optionnaly expanded with libkiwix's hints""" try: tags_meta = self.get_text_metadata("Tags") - except RuntimeError: # pragma: nocover + except RuntimeError: # pragma: no cover tags_meta = "" if libkiwix: diff --git a/src/zimscraperlib/zim/creator.py b/src/zimscraperlib/zim/creator.py index a87a24e6..6ab905db 100644 --- a/src/zimscraperlib/zim/creator.py +++ b/src/zimscraperlib/zim/creator.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ ZIM Creator helper @@ -24,18 +23,22 @@ import weakref from typing import Any, Callable, Iterable, Optional, Tuple, Union -import libzim.writer +import libzim.writer # pyright: ignore -from ..constants import ( +from zimscraperlib.constants import ( DEFAULT_DEV_ZIM_METADATA, FRONT_ARTICLE_MIMETYPES, MANDATORY_ZIM_METADATA_KEYS, ) -from ..filesystem import delete_callback, get_content_mimetype, get_file_mimetype -from ..i18n import is_valid_iso_639_3 -from ..types import get_mime_for_name -from .items import StaticItem -from .metadata import ( +from zimscraperlib.filesystem import ( + delete_callback, + get_content_mimetype, + get_file_mimetype, +) +from zimscraperlib.i18n import is_valid_iso_639_3 +from zimscraperlib.types import get_mime_for_name +from zimscraperlib.zim.items import StaticItem +from zimscraperlib.zim.metadata import ( validate_counter, validate_date, validate_description, @@ -65,7 +68,9 @@ def mimetype_for( """mimetype as provided or guessed from fpath, path or content""" if not mimetype: mimetype = ( - get_file_mimetype(fpath) if fpath else get_content_mimetype(content[:2048]) + get_file_mimetype(fpath) + if fpath + else get_content_mimetype(content[:2048]) # pyright: ignore ) # try to guess more-defined mime if it's text if ( @@ -78,7 +83,6 @@ def mimetype_for( class Creator(libzim.writer.Creator): - """libzim.writer.Creator subclass Note: due to the lack of a cancel() method in the libzim itself, it is not possible @@ -99,11 +103,11 @@ def __init__( filename: pathlib.Path, main_path: str, compression: Optional[str] = None, - workaround_nocancel: Optional[bool] = True, - ignore_duplicates: Optional[bool] = False, + workaround_nocancel: Optional[bool] = True, # noqa: FBT002 + ignore_duplicates: Optional[bool] = False, # noqa: FBT002 ): super().__init__(filename=filename) - self._metadata = dict() + self._metadata = {} self.__indexing_configured = False self.can_finish = True @@ -119,7 +123,9 @@ def __init__( self.workaround_nocancel = workaround_nocancel self.ignore_duplicates = ignore_duplicates - def config_indexing(self, indexing: bool, language: Optional[str] = None): + def config_indexing( + self, indexing: bool, language: Optional[str] = None # noqa: FBT001 + ): """Toggle full-text and title indexing of entries Uses Language metadata's value (or "") if not set""" @@ -131,7 +137,7 @@ def config_indexing(self, indexing: bool, language: Optional[str] = None): return self def start(self): - if not all([self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS]): + if not all(self._metadata.get(key) for key in MANDATORY_ZIM_METADATA_KEYS): raise ValueError("Mandatory metadata are not all set.") for name, value in self._metadata.items(): @@ -163,16 +169,16 @@ def validate_metadata( See https://wiki.openzim.org/wiki/Metadata""" validate_required_values(name, value) - validate_standard_str_types(name, value) + validate_standard_str_types(name, value) # pyright: ignore - validate_title(name, value) - validate_date(name, value) - validate_language(name, value) - validate_counter(name, value) - validate_description(name, value) - validate_longdescription(name, value) - validate_tags(name, value) - validate_illustrations(name, value) + validate_title(name, value) # pyright: ignore + validate_date(name, value) # pyright: ignore + validate_language(name, value) # pyright: ignore + validate_counter(name, value) # pyright: ignore + validate_description(name, value) # pyright: ignore + validate_longdescription(name, value) # pyright: ignore + validate_tags(name, value) # pyright: ignore + validate_illustrations(name, value) # pyright: ignore def add_metadata( self, @@ -186,21 +192,21 @@ def add_metadata( def config_metadata( self, *, - Name: str, - Language: str, - Title: str, - Description: str, - LongDescription: Optional[str] = None, - Creator: str, - Publisher: str, - Date: Union[datetime.datetime, datetime.date, str], - Illustration_48x48_at_1: bytes, - Tags: Optional[Union[Iterable[str], str]] = None, - Scraper: Optional[str] = None, - Flavour: Optional[str] = None, - Source: Optional[str] = None, - License: Optional[str] = None, - Relation: Optional[str] = None, + Name: str, # noqa: N803 + Language: str, # noqa: N803 + Title: str, # noqa: N803 + Description: str, # noqa: N803 + LongDescription: Optional[str] = None, # noqa: N803 + Creator: str, # noqa: N803 + Publisher: str, # noqa: N803 + Date: Union[datetime.datetime, datetime.date, str], # noqa: N803 + Illustration_48x48_at_1: bytes, # noqa: N803 + Tags: Optional[Union[Iterable[str], str]] = None, # noqa: N803 + Scraper: Optional[str] = None, # noqa: N803 + Flavour: Optional[str] = None, # noqa: N803 + Source: Optional[str] = None, # noqa: N803 + License: Optional[str] = None, # noqa: N803 + Relation: Optional[str] = None, # noqa: N803 **extras: str, ): """Sets all mandatory Metadata as well as standard and any other text ones""" @@ -241,9 +247,11 @@ def add_item_for( mimetype: Optional[str] = None, is_front: Optional[bool] = None, should_compress: Optional[bool] = None, - delete_fpath: Optional[bool] = False, + delete_fpath: Optional[bool] = False, # noqa: FBT002 duplicate_ok: Optional[bool] = None, - callback: Optional[Union[callable, Tuple[callable, Any]]] = None, + callback: Optional[ + Union[callable, Tuple[callable, Any]] # pyright: ignore + ] = None, ): """Add a File or content at a specified path and get its path @@ -355,7 +363,7 @@ def add_redirect( self.can_finish = False # pragma: no cover raise - def finish(self, exc_type=None, exc_val=None, exc_tb=None): + def finish(self, exc_type=None, exc_val=None, exc_tb=None): # noqa: ARG002 """Triggers finalization of ZIM creation and create final ZIM file.""" if not getattr(self, "can_finish", False): return diff --git a/src/zimscraperlib/zim/filesystem.py b/src/zimscraperlib/zim/filesystem.py index f047a8c6..e0f3d6a2 100644 --- a/src/zimscraperlib/zim/filesystem.py +++ b/src/zimscraperlib/zim/filesystem.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ zimwriterfs-like tools to convert a build folder into a ZIM @@ -32,12 +31,12 @@ import re from typing import Optional, Sequence, Tuple -from .. import logger -from ..filesystem import get_file_mimetype -from ..html import find_title_in_file -from ..types import get_mime_for_name -from .creator import Creator -from .items import StaticItem +from zimscraperlib import logger +from zimscraperlib.filesystem import get_file_mimetype +from zimscraperlib.html import find_title_in_file +from zimscraperlib.types import get_mime_for_name +from zimscraperlib.zim.creator import Creator +from zimscraperlib.zim.items import StaticItem class FileItem(StaticItem): @@ -47,8 +46,8 @@ def __init__( self, root: pathlib.Path, filepath: pathlib.Path, - ): - super().__init__(root=root, filepath=filepath) + ): # pyright: ignore + super().__init__(root=root, filepath=filepath) # pyright: ignore # first look inside the file's magic headers self.mimetype = get_file_mimetype(self.filepath) # most web-specific files are plain text. In this case, use extension @@ -99,11 +98,11 @@ def add_redirects_to_zim( zim_file.add_redirect(source_url, target_url, title) if redirects_file: - with open(redirects_file, "r") as fh: + with open(redirects_file) as fh: for line in fh.readlines(): namespace, path, title, target_url = re.match( r"^(.)\t(.+)\t(.*)\t(.+)$", line - ).groups() + ).groups() # pyright: ignore if namespace.strip(): path = f"{namespace.strip()}/{path}" zim_file.add_redirect(path, target_url, title) @@ -117,20 +116,20 @@ def make_zim_file( illustration: str, title: str, description: str, - date: datetime.date = None, + date: datetime.date = None, # noqa: RUF013 # pyright: ignore language: str = "eng", creator: str = "-", publisher="-", - tags: Sequence[str] = None, - source: str = None, - flavour: str = None, - scraper: str = None, - long_description: str = None, - without_fulltext_index: bool = False, - redirects: Sequence[Tuple[str, str, str]] = None, - redirects_file: pathlib.Path = None, - rewrite_links: bool = True, - workaround_nocancel: bool = True, + tags: Sequence[str] = None, # noqa: RUF013 # pyright: ignore + source: str = None, # noqa: RUF013 # pyright: ignore + flavour: str = None, # noqa: RUF013 # pyright: ignore + scraper: str = None, # noqa: RUF013 # pyright: ignore + long_description: str = None, # noqa: RUF013 # pyright: ignore + without_fulltext_index: bool = False, # noqa: FBT001, FBT002, ARG001 + redirects: Sequence[Tuple[str, str, str]] = None, # noqa: RUF013 # pyright: ignore + redirects_file: pathlib.Path = None, # noqa: RUF013 # pyright: ignore + rewrite_links: bool = True, # noqa: FBT001, FBT002, ARG001 + workaround_nocancel: bool = True, # noqa: FBT001, FBT002 ): """Creates a zimwriterfs-like ZIM file at {fpath} from {build_dir} @@ -144,11 +143,11 @@ def make_zim_file( # sanity checks if not build_dir.exists() or not build_dir.is_dir(): - raise IOError(f"Incorrect build_dir: {build_dir}") + raise OSError(f"Incorrect build_dir: {build_dir}") illustration_path = build_dir / illustration if not illustration_path.exists() or not illustration_path.is_file(): - raise IOError(f"Incorrect illustration: {illustration} ({illustration_path})") + raise OSError(f"Incorrect illustration: {illustration} ({illustration_path})") with open(illustration_path, "rb") as fh: illustration_data = fh.read() @@ -161,7 +160,7 @@ def make_zim_file( "Name": name, "Title": title, "Description": description, - "Date": date or datetime.date.today(), + "Date": date or datetime.date.today(), # noqa: DTZ011 "Language": language, "Creator": creator, "Publisher": publisher, diff --git a/src/zimscraperlib/zim/items.py b/src/zimscraperlib/zim/items.py index 20eff63f..a3d20d74 100644 --- a/src/zimscraperlib/zim/items.py +++ b/src/zimscraperlib/zim/items.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu @@ -12,10 +11,15 @@ import urllib.parse from typing import Dict, Union -import libzim.writer +import libzim.writer # pyright: ignore -from ..download import stream_file -from .providers import FileLikeProvider, FileProvider, StringProvider, URLProvider +from zimscraperlib.download import stream_file +from zimscraperlib.zim.providers import ( + FileLikeProvider, + FileProvider, + StringProvider, + URLProvider, +) class Item(libzim.writer.Item): @@ -42,7 +46,7 @@ def get_mimetype(self) -> str: return getattr(self, "mimetype", "") def get_hints(self) -> dict: - return getattr(self, "hints", dict()) + return getattr(self, "hints", {}) class StaticItem(Item): @@ -113,7 +117,7 @@ def __init__(self, url: str, **kwargs): url, byte_stream=io.BytesIO(), only_first_block=True ) except Exception as exc: - raise IOError(f"Unable to access URL at {url}: {exc}") + raise OSError(f"Unable to access URL at {url}: {exc}") from None # HTML content will be indexed. # we proxy the content in the Item to prevent double-download of the resource diff --git a/src/zimscraperlib/zim/metadata.py b/src/zimscraperlib/zim/metadata.py index bedcddd6..66e92f13 100644 --- a/src/zimscraperlib/zim/metadata.py +++ b/src/zimscraperlib/zim/metadata.py @@ -3,15 +3,15 @@ from collections.abc import Iterable as IterableT from typing import Any, Iterable, Union -from ..constants import ( +from zimscraperlib.constants import ( ILLUSTRATIONS_METADATA_RE, MANDATORY_ZIM_METADATA_KEYS, MAXIMUM_DESCRIPTION_METADATA_LENGTH, MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH, RECOMMENDED_MAX_TITLE_LENGTH, ) -from ..i18n import is_valid_iso_639_3 -from ..image.probing import is_valid_image +from zimscraperlib.i18n import is_valid_iso_639_3 +from zimscraperlib.image.probing import is_valid_image def validate_required_values(name: str, value: Any): @@ -53,9 +53,14 @@ def validate_date(name: str, value: Union[datetime.datetime, datetime.date, str] elif isinstance(value, str): match = re.match(r"(?P\d{4})-(?P\d{2})-(?P\d{2})", value) try: - datetime.date(**{k: int(v) for k, v in match.groupdict().items()}) + datetime.date( + **{ + k: int(v) + for k, v in match.groupdict().items() # pyright: ignore + } + ) except Exception as exc: - raise ValueError(f"Invalid {name} format: {exc}") + raise ValueError(f"Invalid {name} format: {exc}") from None def validate_language(name: str, value: Union[Iterable[str], str]): @@ -68,7 +73,7 @@ def validate_language(name: str, value: Union[Iterable[str], str]): raise ValueError(f"{code} is not ISO-639-3.") -def validate_counter(name: str, value: str): +def validate_counter(name: str, value: str): # noqa: ARG001 """ensures Counter metadata is not manually set""" if name == "Counter": raise ValueError(f"{name} cannot be set. libzim sets it.") @@ -93,7 +98,7 @@ def validate_tags(name: str, value: Union[Iterable[str], str]): """ensures Tags metadata is either one or a list of strings""" if name == "Tags" and ( not isinstance(value, IterableT) - or not all([isinstance(tag, str) for tag in value]) + or not all(isinstance(tag, str) for tag in value) ): raise ValueError(f"Invalid type(s) for {name}") diff --git a/src/zimscraperlib/zim/providers.py b/src/zimscraperlib/zim/providers.py index 6fa74e0f..a698b4f5 100644 --- a/src/zimscraperlib/zim/providers.py +++ b/src/zimscraperlib/zim/providers.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu """ libzim Providers accepting a `ref` arg to keep it away from garbage collection @@ -14,17 +13,17 @@ import pathlib from typing import Optional, Union -import libzim.writer +import libzim.writer # pyright: ignore import requests -from ..download import _get_retry_adapter, stream_file +from zimscraperlib.download import _get_retry_adapter, stream_file class FileProvider(libzim.writer.FileProvider): def __init__( self, filepath: pathlib.Path, - size: Optional[int] = None, + size: Optional[int] = None, # noqa: ARG002 ref: Optional[object] = None, ): super().__init__(filepath) @@ -59,10 +58,12 @@ def __init__( self.fileobj.seek(0, io.SEEK_SET) def get_size(self) -> int: - return self.size + return self.size # pyright: ignore def gen_blob(self) -> libzim.writer.Blob: - yield libzim.writer.Blob(self.fileobj.getvalue()) # pragma: nocover + yield libzim.writer.Blob( # pragma: no cover + self.fileobj.getvalue() # pyright: ignore + ) class URLProvider(libzim.writer.ContentProvider): @@ -92,9 +93,9 @@ def get_size_of(url) -> Union[int, None]: return None def get_size(self) -> int: - return self.size + return self.size # pyright: ignore - def gen_blob(self) -> libzim.writer.Blob: # pragma: nocover + def gen_blob(self) -> libzim.writer.Blob: # pragma: no cover for chunk in self.resp.iter_content(10 * 1024): if chunk: yield libzim.writer.Blob(chunk) diff --git a/tasks.py b/tasks.py new file mode 100644 index 00000000..90854e86 --- /dev/null +++ b/tasks.py @@ -0,0 +1,109 @@ +# pyright: strict, reportUntypedFunctionDecorator=false +import os + +from invoke.context import Context +from invoke.tasks import task # pyright: ignore [reportUnknownVariableType] + +use_pty = not os.getenv("CI", "") + + +@task(optional=["args"], help={"args": "pytest additional arguments"}) +def test(ctx: Context, args: str = ""): + """run tests (without coverage)""" + ctx.run(f"pytest {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "pytest additional arguments"}) +def test_cov(ctx: Context, args: str = ""): + """run test vith coverage""" + ctx.run(f"coverage run -m pytest {args}", pty=use_pty) + + +@task(optional=["html"], help={"html": "flag to export html report"}) +def report_cov(ctx: Context, *, html: bool = False): + """report coverage""" + ctx.run("coverage combine", warn=True, pty=use_pty) + ctx.run("coverage report --show-missing", pty=use_pty) + if html: + ctx.run("coverage html", pty=use_pty) + + +@task( + optional=["args", "html"], + help={ + "args": "pytest additional arguments", + "html": "flag to export html report", + }, +) +def coverage(ctx: Context, args: str = "", *, html: bool = False): + """run tests and report coverage""" + test_cov(ctx, args=args) + report_cov(ctx, html=html) + + +@task(optional=["args"], help={"args": "black additional arguments"}) +def lint_black(ctx: Context, args: str = "."): + args = args or "." # needed for hatch script + ctx.run("black --version", pty=use_pty) + ctx.run(f"black --check --diff {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "ruff additional arguments"}) +def lint_ruff(ctx: Context, args: str = "."): + args = args or "." # needed for hatch script + ctx.run("ruff --version", pty=use_pty) + ctx.run(f"ruff check {args}", pty=use_pty) + + +@task( + optional=["args"], + help={ + "args": "linting tools (black, ruff) additional arguments, typically a path", + }, +) +def lintall(ctx: Context, args: str = "."): + """Check linting""" + args = args or "." # needed for hatch script + lint_black(ctx, args) + lint_ruff(ctx, args) + + +@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"}) +def check_pyright(ctx: Context, args: str = ""): + """check static types with pyright""" + ctx.run("pyright --version") + ctx.run(f"pyright {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"}) +def checkall(ctx: Context, args: str = ""): + """check static types""" + check_pyright(ctx, args) + + +@task(optional=["args"], help={"args": "black additional arguments"}) +def fix_black(ctx: Context, args: str = "."): + """fix black formatting""" + args = args or "." # needed for hatch script + ctx.run(f"black {args}", pty=use_pty) + + +@task(optional=["args"], help={"args": "ruff additional arguments"}) +def fix_ruff(ctx: Context, args: str = "."): + """fix all ruff rules""" + args = args or "." # needed for hatch script + ctx.run(f"ruff --fix {args}", pty=use_pty) + + +@task( + optional=["args"], + help={ + "args": "linting tools (black, ruff) additional arguments, typically a path", + }, +) +def fixall(ctx: Context, args: str = "."): + """Fix everything automatically""" + args = args or "." # needed for hatch script + fix_black(ctx, args) + fix_ruff(ctx, args) + lintall(ctx, args) diff --git a/test b/test deleted file mode 100755 index b45a31d2..00000000 --- a/test +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -pip install pytest coverage pytest-cov - -PYTHONPATH=$(pwd)/src/ pytest "$@" tests/ diff --git a/tests/conftest.py b/tests/conftest.py index cece10ec..bee8e890 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pathlib @@ -11,28 +10,16 @@ def pytest_addoption(parser): parser.addoption( "--runslow", action="store_true", default=False, help="run slow tests" ) - parser.addoption( - "--runinstalled", - action="store_true", - default=False, - help="run tests checking for installed features", - ) def pytest_configure(config): config.addinivalue_line("markers", "slow: mark test as slow to run") - config.addinivalue_line( - "markers", "installed: mark test as testing installed features" - ) def pytest_collection_modifyitems(config, items): skip_slow = pytest.mark.skip(reason="need --runslow option to run") - skip_installed = pytest.mark.skip(reason="need --runinstalled option to run") for item in items: - if "installed" in item.keywords and not config.getoption("--runinstalled"): - item.add_marker(skip_installed) if "slow" in item.keywords and not config.getoption("--runslow"): item.add_marker(skip_slow) @@ -139,7 +126,7 @@ def small_zim_file(tmpdir_factory): dst = tmpdir_factory.mktemp("data").join("small.zim") stream_file( - "https://github.com/openzim/zim-testing-suite/raw/v0.3/data/nons/" "small.zim", + "https://github.com/openzim/zim-testing-suite/raw/v0.3/data/nons/small.zim", dst, ) return dst @@ -158,7 +145,6 @@ def ns_zim_file(tmpdir_factory): return dst -@pytest.mark.slow @pytest.fixture(scope="session") def real_zim_file(tmpdir_factory): from zimscraperlib.download import stream_file diff --git a/tests/download/test_download.py b/tests/download/test_download.py index a2c123d9..2c10093f 100644 --- a/tests/download/test_download.py +++ b/tests/download/test_download.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import concurrent.futures import io import pathlib +import re +from typing import ClassVar, Dict, Optional, Union import pytest import requests +from yt_dlp import DownloadError from zimscraperlib.download import ( BestMp4, @@ -17,17 +19,21 @@ stream_file, ) +DEFAULT_REQUEST_TIMEOUT = 60 + def assert_downloaded_file(url, file): assert file.exists() # our google test urls dont support HEAD - req = requests.get(url) + req = requests.get(url, timeout=DEFAULT_REQUEST_TIMEOUT) # we test against binary response: Content-Length not accurate as gzip-encoded assert file.stat().st_size == len(req.content) def assert_headers(returned_headers): - assert isinstance(returned_headers, requests.structures.CaseInsensitiveDict) + assert isinstance( + returned_headers, requests.structures.CaseInsensitiveDict # pyright: ignore + ) assert returned_headers["Content-Type"] == "image/x-icon" @@ -35,7 +41,7 @@ def get_dest_file(tmp_path): return tmp_path.joinpath("favicon.ico") -def test_missing_dest(tmp_path): +def test_missing_dest(): with pytest.raises(requests.exceptions.ConnectionError): stream_file(url="http://some_url", byte_stream=io.BytesIO()) @@ -53,7 +59,7 @@ def test_no_output_supplied(valid_http_url): stream_file(url=valid_http_url) -def test_first_block_download(valid_http_url): +def test_first_block_download_default_session(valid_http_url): byte_stream = io.BytesIO() size, ret = stream_file( url=valid_http_url, byte_stream=byte_stream, only_first_block=True @@ -65,6 +71,29 @@ def test_first_block_download(valid_http_url): assert len(byte_stream.read()) <= expected +def test_first_block_download_custom_session(mocker, valid_http_url): + byte_stream = io.BytesIO() + custom_session = mocker.Mock(spec=requests.Session) + + expected_response = requests.Response() + expected_response.status_code = 200 + expected_response.raw = io.BytesIO(b"Whatever\n") + custom_session.get.return_value = expected_response + + mocker.patch("requests.Session") + stream_file( + url=valid_http_url, + byte_stream=byte_stream, + only_first_block=True, + session=custom_session, + ) + # check that custom session has been used + custom_session.get.assert_called_once_with( + valid_http_url, stream=True, proxies=None, headers=None + ) + requests.Session.assert_not_called() # pyright: ignore + + @pytest.mark.slow def test_user_agent(): ua = "zimscraperlib-test" @@ -98,7 +127,10 @@ def test_stream_to_bytes(valid_https_url): byte_stream = io.BytesIO() size, ret = stream_file(url=valid_https_url, byte_stream=byte_stream) assert_headers(ret) - assert byte_stream.read() == requests.get(valid_https_url).content + assert ( + byte_stream.read() + == requests.get(valid_https_url, timeout=DEFAULT_REQUEST_TIMEOUT).content + ) @pytest.mark.slow @@ -156,20 +188,20 @@ def test_youtube_download_nowait(tmp_path): BestMp4.get_options(target_dir=tmp_path), wait=False, ) - assert future.running() + assert future.running() # pyright: ignore assert not yt_downloader.executor._shutdown done, not_done = concurrent.futures.wait( - [future], return_when=concurrent.futures.ALL_COMPLETED + [future], return_when=concurrent.futures.ALL_COMPLETED # pyright: ignore ) - assert future.exception() is None + assert future.exception() is None # pyright: ignore assert len(done) == 1 assert len(not_done) == 0 @pytest.mark.slow -def test_youtube_download_error(tmp_path): +def test_youtube_download_error(): yt_downloader = YoutubeDownloader(threads=1) - with pytest.raises(Exception): + with pytest.raises(DownloadError, match=re.escape("is not a valid URL")): yt_downloader.download("11", BestMp4.get_options()) yt_downloader.shutdown() @@ -182,3 +214,70 @@ def test_youtube_download_contextmanager(tmp_path): ) assert yt_downloader.executor._shutdown assert tmp_path.joinpath("video.mp4").exists() # videmo doesn't offer webm + + +@pytest.fixture +def target_dir() -> pathlib.Path: + return pathlib.Path("adir1") + + +@pytest.fixture +def filepath() -> pathlib.Path: + return pathlib.Path("adir2/afile") + + +@pytest.fixture +def custom_outtmpl() -> str: + return "custom.%(ext)s" + + +class WrongOuttmplType(BestWebm): + options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = {"outtmpl": 123} + + +def test_get_options_wrong_outtmpl_type(): + with pytest.raises(ValueError): + WrongOuttmplType.get_options() + + +def test_get_options_target_dir(target_dir): + options = BestWebm.get_options(target_dir=target_dir) + assert options["outtmpl"] == "adir1/video.%(ext)s" + + +def test_get_options_filepath(filepath): + options = BestWebm.get_options(filepath=filepath) + assert options["outtmpl"] == "adir2/afile" + + +def test_get_options_target_dir_filepath(target_dir, filepath): + options = BestWebm.get_options(target_dir=target_dir, filepath=filepath) + assert options["outtmpl"] == "adir1/adir2/afile" + + +def test_get_options_override_outtmpl_no_other_vars(custom_outtmpl): + original = BestWebm.get_options() + overriden = BestWebm.get_options(outtmpl=custom_outtmpl) + assert "outtmpl" in original + assert "outtmpl" in overriden + for key, value in original.items(): + if key != "outtmpl": + assert overriden[key] == value + else: + assert overriden[key] == custom_outtmpl + + +def test_get_options_override_outtmpl_other_vars(target_dir, filepath, custom_outtmpl): + original = BestWebm.get_options(target_dir=target_dir, filepath=filepath) + overriden = BestWebm.get_options( + target_dir=target_dir, + filepath=filepath, + outtmpl=custom_outtmpl, + ) + assert "outtmpl" in original + assert "outtmpl" in overriden + for key, value in original.items(): + if key != "outtmpl": + assert overriden[key] == value + else: + assert overriden[key] == custom_outtmpl diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py index 76524f42..71daba63 100644 --- a/tests/filesystem/test_filesystem.py +++ b/tests/filesystem/test_filesystem.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import magic @@ -29,7 +28,7 @@ def test_content_mimetype_fallback(monkeypatch, undecodable_byte_stream): assert get_content_mimetype(undecodable_byte_stream) == "application/octet-stream" # mock then so we keep coverage on systems where magic works - def raising_magic(*args): + def raising_magic(*args): # noqa: ARG001 raise UnicodeDecodeError("nocodec", b"", 0, 1, "noreason") monkeypatch.setattr(magic, "detect_from_content", raising_magic) @@ -44,11 +43,11 @@ def test_mime_overrides(svg_image): assert get_content_mimetype(fh.read(64)) == expected_mime -def test_delete_callback(tmp_path): +def test_delete_callback_with_cb(tmp_path): class Store: called = 0 - def cb(*args): + def cb(*args): # noqa: ARG001 Store.called += 1 fpath = tmp_path.joinpath("my-file") @@ -60,3 +59,13 @@ def cb(*args): assert not fpath.exists() assert Store.called assert Store.called == 1 + + +def test_delete_callback_without_cb(tmp_path): + fpath = tmp_path.joinpath("my-file") + with open(fpath, "w") as fh: + fh.write("content") + + delete_callback(fpath) + + assert not fpath.exists() diff --git a/tests/html/conftest.py b/tests/html/conftest.py index f0f7fb22..f31cbcef 100644 --- a/tests/html/conftest.py +++ b/tests/html/conftest.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pytest @@ -15,7 +14,7 @@ def html_page(): - Kiwix lets you access free knowledge – even offline + Kiwix lets you access free knowledge - even offline 0 else: diff --git a/tests/zim/conftest.py b/tests/zim/conftest.py index d73b37f4..f5a7f59b 100644 --- a/tests/zim/conftest.py +++ b/tests/zim/conftest.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pytest diff --git a/tests/zim/test_archive.py b/tests/zim/test_archive.py index 580e9004..9ec27309 100644 --- a/tests/zim/test_archive.py +++ b/tests/zim/test_archive.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import pytest @@ -40,7 +39,14 @@ def test_get_item(small_zim_file): def test_suggestions(small_zim_file): with Archive(small_zim_file) as zim: assert zim.get_suggestions_count("test") == 1 - assert "main.html" in list(zim.get_suggestions("test")) + assert list(zim.get_suggestions("test")) == ["main.html"] + + +def test_suggestions_end_index(small_zim_file): + with Archive(small_zim_file) as zim: + assert zim.get_suggestions_count("test") == 1 + assert len(list(zim.get_suggestions("test", end=0))) == 0 + assert list(zim.get_suggestions("test", end=1)) == ["main.html"] def test_search_no_fti(small_zim_file): @@ -62,6 +68,15 @@ def test_search(real_zim_file): assert "A/Diesel_emissions_scandal" in list(zim.get_search_results("test")) +@pytest.mark.slow +def test_search_end_index(real_zim_file): + with Archive(real_zim_file) as zim: + assert list(zim.get_search_results("test", end=0)) == [] + assert "A/Diesel_emissions_scandal" in list( + zim.get_search_results("test", end=1) + ) + + def test_counters(small_zim_file): with Archive(small_zim_file) as zim: assert zim.counters == {"image/png": 1, "text/html": 1} @@ -98,7 +113,7 @@ def test_get_tags(small_zim_file, real_zim_file): assert zim.tags == zim.get_tags() -def test_libkiwix_convertTags(): +def test_libkiwix_convert_tags(): assert convertTags("") == [ "_ftindex:no", "_pictures:yes", diff --git a/tests/zim/test_fs.py b/tests/zim/test_fs.py index 41be535e..348a68ad 100644 --- a/tests/zim/test_fs.py +++ b/tests/zim/test_fs.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import shutil @@ -59,7 +58,13 @@ def test_make_zim_file_fail_noillustration(build_data): assert not build_data["fpath"].exists() -def test_make_zim_file_working(build_data, png_image): +@pytest.mark.parametrize( + "with_redirects, with_redirects_file", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_make_zim_file_working( + build_data, png_image, with_redirects, with_redirects_file +): build_data["build_dir"].mkdir() # add an image @@ -74,11 +79,20 @@ def test_make_zim_file_working(build_data, png_image): with open(build_data["build_dir"] / "app.js", "w") as fh: fh.write("console.log(window);") + if not with_redirects: + build_data.pop("redirects") + if not with_redirects_file: + build_data.pop("redirects_file") make_zim_file(**build_data) assert build_data["fpath"].exists() reader = Archive(build_data["fpath"]) - # welcome (actual) and two redirs - assert reader.entry_count == 8 # includes redirect + expected_entry_count = 4 + if with_redirects: + expected_entry_count += 1 + if with_redirects_file: + expected_entry_count += 3 + + assert reader.entry_count == expected_entry_count assert reader.get_item("style.css").mimetype == "text/css" assert reader.get_item("app.js").mimetype in ( @@ -123,7 +137,7 @@ def test_make_zim_file_no_file_on_error(tmp_path, png_image, build_data): print("Program exiting") """ - py = subprocess.run([sys.executable, "-c", pycode]) + py = subprocess.run([sys.executable, "-c", pycode], check=False) # returncode will be either 0 or -11, depending on garbage collection # in scrapers, we want to be able to fail on errors and absolutely don't want to # create a ZIM file, so SEGFAULT on exit it (somewhat) OK diff --git a/tests/zim/test_libkiwix.py b/tests/zim/test_libkiwix.py index 1e5a2e24..02fa218c 100644 --- a/tests/zim/test_libkiwix.py +++ b/tests/zim/test_libkiwix.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import io @@ -7,7 +6,7 @@ import pytest from zimscraperlib.zim._libkiwix import getline -from zimscraperlib.zim._libkiwix import parseMimetypeCounter as parse +from zimscraperlib.zim._libkiwix import parseMimetypeCounter as parse # noqa: N813 empty = {} @@ -20,14 +19,17 @@ def test_geline_nodelim(): def test_getline(): ins = io.StringIO("text/javascript=8;text/html=3;application/warc-headers=28364;") - assert getline(ins, ";") == (False, "text/javascript=8") - assert getline(ins, ";") == (False, "text/html=3") - assert getline(ins, ";") == (False, "application/warc-headers=28364") - assert getline(ins, ";") == (True, "") + assert getline(ins, ";") == (False, "text/javascript=8") # pyright: ignore + assert getline(ins, ";") == (False, "text/html=3") # pyright: ignore + assert getline(ins, ";") == ( # pyright: ignore + False, + "application/warc-headers=28364", + ) + assert getline(ins, ";") == (True, "") # pyright: ignore @pytest.mark.parametrize( - "counterStr, counterMap", + "counter_str, counter_map", [ ("", empty), ("foo=1", {"foo": 1}), @@ -67,6 +69,7 @@ def test_getline(): ), ("text/html", empty), ("text/html=", empty), + ("text/html=0", {"text/html": 0}), ("text/html=foo", empty), ("text/html=123foo", empty), ("text/html=50;foo", {"text/html": 50}), @@ -75,6 +78,6 @@ def test_getline(): ("text/html=50;;foo", {"text/html": 50}), ], ) -def test_counter_parsing(counterStr, counterMap): +def test_counter_parsing(counter_str, counter_map): # https://github.com/kiwix/libkiwix/blob/master/test/counterParsing.cpp - assert parse(counterStr) == counterMap + assert parse(counter_str) == counter_map diff --git a/tests/zim/test_metadata.py b/tests/zim/test_metadata.py new file mode 100644 index 00000000..c4d96255 --- /dev/null +++ b/tests/zim/test_metadata.py @@ -0,0 +1,32 @@ +import re +from typing import Iterable, Union + +import pytest + +from zimscraperlib.zim import metadata + + +@pytest.mark.parametrize( + "name, value", + [ + ("Language", "fra"), + ("Language", "fra,eng"), + ("Language", ["fra", "eng"]), + ("Other", "not_an_iso_639_3_code"), + ], +) +def test_validate_language_valid(name: str, value: Union[Iterable[str], str]): + metadata.validate_language(name, value) + + +@pytest.mark.parametrize( + "name, value", + [ + ("Language", "fr"), + ("Language", "fra;eng"), + ("Language", "fra, eng"), + ], +) +def test_validate_language_invalid(name: str, value: Union[Iterable[str], str]): + with pytest.raises(ValueError, match=re.escape("is not ISO-639-3")): + metadata.validate_language(name, value) diff --git a/tests/zim/test_zim_creator.py b/tests/zim/test_zim_creator.py index 151a906f..9c27685c 100644 --- a/tests/zim/test_zim_creator.py +++ b/tests/zim/test_zim_creator.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # vim: ai ts=4 sts=4 et sw=4 nu import base64 @@ -15,7 +14,7 @@ import time import pytest -from libzim.writer import Compression +from libzim.writer import Compression # pyright: ignore from zimscraperlib.constants import ( DEFAULT_DEV_ZIM_METADATA, @@ -52,7 +51,7 @@ def test_zim_creator(tmp_path, png_image, html_file, html_str): with open(png_image, "rb") as fh: png_data = fh.read() with Creator(fpath, main_path).config_dev_metadata( - Tags=tags, Illustration_48x48_at_1=png_data + Tags=tags, Illustration_48x48_at_1=png_data # pyright: ignore ) as creator: # verbatim HTML from string creator.add_item_for("welcome", "wel", content=html_str, is_front=True) @@ -120,7 +119,7 @@ def test_noindexlanguage(tmp_path): creator = Creator(fpath, "welcome").config_dev_metadata(Language="bam") creator.config_indexing(False) with creator as creator: - creator.add_item(StaticItem(path="welcome", content="hello")) + creator.add_item(StaticItem(path="welcome", content="hello")) # pyright: ignore creator.add_item_for("index", "Index", content="-", mimetype="text/html") reader = Archive(fpath) @@ -167,12 +166,15 @@ def test_add_item_for_delete_fail(tmp_path, png_image): shutil.copyfile(png_image, local_path) def remove_source(item): - print("##########", "remove_source") os.remove(item.filepath) with Creator(fpath, "welcome").config_dev_metadata() as creator: creator.add_item( - StaticItem(filepath=local_path, path="index", callback=remove_source), + StaticItem( + filepath=local_path, # pyright: ignore + path="index", # pyright: ignore + callback=remove_source, # pyright: ignore + ), callback=(delete_callback, local_path), ) assert not local_path.exists() @@ -186,18 +188,18 @@ def test_compression(tmp_path): with Creator( tmp_path / "test.zim", "welcome", compression="zstd" ).config_dev_metadata() as creator: - creator.add_item(StaticItem(path="welcome", content="hello")) + creator.add_item(StaticItem(path="welcome", content="hello")) # pyright: ignore with Creator( - fpath, "welcome", compression=Compression.zstd + fpath, "welcome", compression=Compression.zstd # pyright: ignore ).config_dev_metadata() as creator: - creator.add_item(StaticItem(path="welcome", content="hello")) + creator.add_item(StaticItem(path="welcome", content="hello")) # pyright: ignore def test_double_finish(tmp_path): fpath = tmp_path / "test.zim" with Creator(fpath, "welcome").config_dev_metadata() as creator: - creator.add_item(StaticItem(path="welcome", content="hello")) + creator.add_item(StaticItem(path="welcome", content="hello")) # pyright: ignore # ensure we can finish an already finished creator creator.finish() @@ -217,7 +219,11 @@ def test_sourcefile_removal(tmp_path, html_file): # copy html to folder src_path = pathlib.Path(tmpdir.name, "source.html") shutil.copyfile(html_file, src_path) - creator.add_item(StaticItem(filepath=src_path, path=src_path.name, ref=tmpdir)) + creator.add_item( + StaticItem( + filepath=src_path, path=src_path.name, ref=tmpdir # pyright: ignore + ) + ) del tmpdir assert not src_path.exists() @@ -235,7 +241,7 @@ def test_sourcefile_removal_std(tmp_path, html_file): StaticItem( filepath=paths[-1], path=paths[-1].name, - mimetype="text/html", + mimetype="text/html", # pyright: ignore ), callback=(delete_callback, paths[-1]), ) @@ -324,7 +330,9 @@ def test_filelikeprovider_nosize(tmp_path, png_image_url): fpath = tmp_path / "test.zim" with Creator(fpath, "").config_dev_metadata() as creator: - creator.add_item(FileLikeProviderItem(fileobj=fileobj, path="one.png")) + creator.add_item( + FileLikeProviderItem(fileobj=fileobj, path="one.png") # pyright: ignore + ) zim = Archive(fpath) assert bytes(zim.get_item("one.png").content) == fileobj.getvalue() @@ -338,7 +346,9 @@ def test_urlprovider(tmp_path, png_image_url): fpath = tmp_path / "test.zim" with Creator(fpath, "").config_dev_metadata() as creator: - creator.add_item(SpecialURLProviderItem(url=png_image_url, path="one.png")) + creator.add_item( + SpecialURLProviderItem(url=png_image_url, path="one.png") # pyright: ignore + ) zim = Archive(fpath) assert bytes(zim.get_item("one.png").content) == file_bytes @@ -353,7 +363,7 @@ def test_urlprovider_nolength(tmp_path, png_image_url, png_image): # create and start an http server without Content-Length support server_fpath = tmp_path / "httpd.py" - port = random.randint(10000, 20000) + port = random.randint(10000, 20000) # noqa: S311 server_code = """ from http.server import BaseHTTPRequestHandler, HTTPServer @@ -386,7 +396,7 @@ def do_GET(self): with tempfile.TemporaryDirectory() as tmp_dir, Creator( fpath, "" ).config_dev_metadata() as creator: - tmp_dir = pathlib.Path(tmp_dir) + tmp_dir = pathlib.Path(tmp_dir) # noqa: PLW2901 creator.add_item( URLItem( url=f"http://localhost:{port}/hoho.png", @@ -401,7 +411,8 @@ def do_GET(self): creator.add_item( SpecialURLProviderItem( - url=f"http://localhost:{port}/home.png", mimetype="image/png" + url=f"http://localhost:{port}/home.png", # pyright: ignore + mimetype="image/png", # pyright: ignore ) ) finally: @@ -443,7 +454,7 @@ def test_callback_and_remove(tmp_path, html_file): class Store: called = 0 - def cb(*args): + def cb(*args): # noqa: ARG001 Store.called += 1 # duplicate test file as we'll want to remove twice @@ -494,7 +505,7 @@ def test_without_metadata(tmp_path): def test_check_metadata(tmp_path): with pytest.raises(ValueError, match="Counter cannot be set"): - Creator(tmp_path, "").config_dev_metadata(Counter=1).start() + Creator(tmp_path, "").config_dev_metadata(Counter=1).start() # pyright: ignore with pytest.raises(ValueError, match="Description is too long."): Creator(tmp_path, "").config_dev_metadata(Description="T" * 90).start() @@ -580,10 +591,10 @@ def test_config_metadata(tmp_path, png_image): ("Title", "X" * 30, True), ("Title", "X" * 31, False), ("Date", 4, False), - ("Date", datetime.datetime.now(), True), - ("Date", datetime.datetime(1969, 12, 31, 23, 59), True), + ("Date", datetime.datetime.now(), True), # noqa: DTZ005 + ("Date", datetime.datetime(1969, 12, 31, 23, 59), True), # noqa: DTZ001 ("Date", datetime.date(1969, 12, 31), True), - ("Date", datetime.date.today(), True), + ("Date", datetime.date.today(), True), # noqa: DTZ011 ("Date", "1969-12-31", True), ("Date", "1969-13-31", False), ("Date", "2023/02/29", False), diff --git a/tox.ini b/tox.ini deleted file mode 100644 index d9176d58..00000000 --- a/tox.ini +++ /dev/null @@ -1,37 +0,0 @@ -[tox] -envlist = py38, py39, py310, py311, black, isort, flake8, coverage - -[testenv] -passenv = - WGET_BINARY - ZIMWRITERFS_BINARY -deps = - pytest - coverage - pytest-cov - -r{toxinidir}/requirements.txt -commands = - pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing {posargs} - -[testenv:black] -deps = - black>=23.1.0,<24 -commands = black --check . - -[testenv:isort] -base_python=py38 -deps = - isort>=5.12.0,<5.13 -commands = isort --profile black --check src tests - -[testenv:flake8] -deps = - flake8>=6.0.0,<7.0 -commands = flake8 src --count --max-line-length=88 --statistics --extend-ignore=E203 - -[testenv:coverage] -passenv = - CODECOV_TOKEN -deps = - codecov -commands = codecov