diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 32c28dbb..ec9e5486 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,21 +8,41 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - # TODO: enable - # - repo: https://github.com/pre-commit/mirrors-mypy - # rev: v1.10.0 - # hooks: - # - id: mypy - # args: [--strict] - # TODO: enable - # - repo: https://github.com/RobertCraigie/pyright-python - # rev: v1.1.363 - # hooks: - # - id: pyright - # Disabled because it doesn't work on NixOS - # - repo: https://github.com/astral-sh/ruff-pre-commit - # rev: v0.4.4 - # hooks: - # - id: ruff # linter - # args: [--fix] - # - id: ruff-format + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.28.4 + hooks: + - id: check-taskfile + - repo: https://github.com/rhysd/actionlint + rev: v1.7.0 + hooks: + - id: actionlint + - repo: https://github.com/IamTheFij/docker-pre-commit + rev: v3.0.1 + hooks: + - id: docker-compose-check + - repo: https://github.com/hadolint/hadolint + rev: v2.12.0 + hooks: + - id: hadolint + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.10.0.1 + hooks: + - id: shellcheck + # NOTE: not using https://github.com/RobertCraigie/pyright-python because it doesn't work with poetry virtual environments + # NOTE: not using github.com/astral-sh/ruff-pre-commit because it doesn't work on NixOS + - repo: local + hooks: + - id: pyright + name: pyright + entry: ./pre-commit-scripts/pyright.sh + language: script + pass_filenames: false + - id: ruff-lint + name: ruff-lint + entry: ./pre-commit-scripts/ruff-lint.sh + pass_filenames: false + language: script + - id: ruff-format + name: ruff-format + entry: ./pre-commit-scripts/ruff-format.sh + language: script diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 7afe7120..5ed07ce5 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -1,9 +1,12 @@ FROM ubuntu:22.04 +# hadolint ignore=DL3008,DL4006 RUN apt-get update && \ - apt-get install -y curl software-properties-common && \ + apt-get install -y --no-install-recommends curl software-properties-common && \ add-apt-repository ppa:deadsnakes/ppa && \ apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN pip install --no-cache-dir poetry==1.8.2 WORKDIR /root/speaches diff --git a/Dockerfile.cuda b/Dockerfile.cuda index c9da09d3..84a4c9d2 100644 --- a/Dockerfile.cuda +++ b/Dockerfile.cuda @@ -1,9 +1,12 @@ FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 +# hadolint ignore=DL3008,DL4006 RUN apt-get update && \ - apt-get install -y curl software-properties-common && \ + apt-get install -y --no-install-recommends curl software-properties-common && \ add-apt-repository ppa:deadsnakes/ppa && \ apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN pip install --no-cache-dir poetry==1.8.2 WORKDIR /root/speaches diff --git a/pre-commit-scripts/pyright.sh b/pre-commit-scripts/pyright.sh new file mode 100755 index 00000000..19402c82 --- /dev/null +++ b/pre-commit-scripts/pyright.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1091 +source "$(poetry env info --path)"/bin/activate +pyright diff --git a/pre-commit-scripts/ruff-format.sh b/pre-commit-scripts/ruff-format.sh new file mode 100755 index 00000000..3bb13f75 --- /dev/null +++ b/pre-commit-scripts/ruff-format.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +ruff format diff --git a/pre-commit-scripts/ruff-lint.sh b/pre-commit-scripts/ruff-lint.sh new file mode 100755 index 00000000..35b33254 --- /dev/null +++ b/pre-commit-scripts/ruff-lint.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +ruff check --fix diff --git a/pyproject.toml b/pyproject.toml index 8b1933f9..235713aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,11 @@ youtube-dl = {git = "https://github.com/ytdl-org/youtube-dl.git"} [tool.ruff] target-version = "py311" +[tool.pyright] +# typeCheckingMode = "strict" +pythonVersion = "3.11" +pythonPlatform = "Linux" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/speaches/main.py b/speaches/main.py index bbd4e5b8..ea1d0768 100644 --- a/speaches/main.py +++ b/speaches/main.py @@ -6,8 +6,15 @@ from io import BytesIO from typing import Annotated, Literal, OrderedDict -from fastapi import (FastAPI, Form, Query, Response, UploadFile, WebSocket, - WebSocketDisconnect) +from fastapi import ( + FastAPI, + Form, + Query, + Response, + UploadFile, + WebSocket, + WebSocketDisconnect, +) from fastapi.responses import StreamingResponse from fastapi.websockets import WebSocketState from faster_whisper import WhisperModel @@ -16,11 +23,12 @@ from speaches import utils from speaches.asr import FasterWhisperASR from speaches.audio import AudioStream, audio_samples_from_file -from speaches.config import (SAMPLES_PER_SECOND, Language, Model, - ResponseFormat, config) +from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config from speaches.logger import logger -from speaches.server_models import (TranscriptionJsonResponse, - TranscriptionVerboseJsonResponse) +from speaches.server_models import ( + TranscriptionJsonResponse, + TranscriptionVerboseJsonResponse, +) from speaches.transcriber import audio_transcriber models: OrderedDict[Model, WhisperModel] = OrderedDict() diff --git a/speaches/server_models.py b/speaches/server_models.py index 52fd9916..6ec3efd4 100644 --- a/speaches/server_models.py +++ b/speaches/server_models.py @@ -85,7 +85,7 @@ def from_segment( text=segment.text, words=( [WordObject.from_word(word) for word in segment.words] - if type(segment.words) == list + if isinstance(segment.words, list) else [] ), segments=[SegmentObject.from_segment(segment)], diff --git a/tests/app_test.py b/tests/app_test.py index 8f03fc8a..5c3b7b42 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -12,7 +12,7 @@ from speaches.config import BYTES_PER_SECOND from speaches.main import app -from speaches.server_models import TranscriptionVerboseResponse +from speaches.server_models import TranscriptionVerboseJsonResponse SIMILARITY_THRESHOLD = 0.97 AUDIO_FILES_LIMIT = 5 @@ -54,13 +54,13 @@ def stream_audio_data( def transcribe_audio_data( client: TestClient, data: bytes -) -> TranscriptionVerboseResponse: +) -> TranscriptionVerboseJsonResponse: response = client.post( TRANSCRIBE_ENDPOINT, files={"file": ("audio.raw", data, "audio/raw")}, ) data = json.loads(response.json()) # TODO: figure this out - return TranscriptionVerboseResponse(**data) # type: ignore + return TranscriptionVerboseJsonResponse(**data) # type: ignore @pytest.mark.parametrize("file_path", file_paths) @@ -70,14 +70,16 @@ def test_ws_audio_transcriptions( with open(file_path, "rb") as file: data = file.read() - streaming_transcription: TranscriptionVerboseResponse = None # type: ignore + streaming_transcription: TranscriptionVerboseJsonResponse = None # type: ignore thread = threading.Thread( target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0} ) thread.start() while True: try: - streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json()) + streaming_transcription = TranscriptionVerboseJsonResponse( + **ws.receive_json() + ) except WebSocketDisconnect: break file_transcription = transcribe_audio_data(client, data)