fix: Improve failure handling and logging (#189)

janw · web-flow · commit 80e41d85a546 · 2024-10-20T14:29:13.000+02:00
diff --git a/podcast_archiver/base.py b/podcast_archiver/base.py
@@ -3,9 +3,7 @@
 import xml.etree.ElementTree as etree
 from typing import TYPE_CHECKING
 
-from rich import print as rprint
-
-from podcast_archiver.logging import logger
+from podcast_archiver.logging import logger, rprint
 from podcast_archiver.processor import FeedProcessor
 
 if TYPE_CHECKING:
diff --git a/podcast_archiver/cli.py b/podcast_archiver/cli.py
@@ -283,8 +283,8 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b
 )
 @click.pass_context
 def main(ctx: click.RichContext, /, **kwargs: Any) -> int:
+    get_console().quiet = kwargs["quiet"]
     configure_logging(kwargs["verbose"])
-    get_console().quiet = kwargs["quiet"] or kwargs["verbose"] > 1
     try:
         settings = Settings.load_from_dict(kwargs)
 
diff --git a/podcast_archiver/download.py b/podcast_archiver/download.py
@@ -59,7 +59,8 @@ def __call__(self) -> EpisodeResult:
         try:
             return self.run()
         except Exception as exc:
-            logger.error("Download failed", exc_info=exc)
+            logger.error(f"Download failed: {exc}")
+            logger.debug("Exception while downloading", exc_info=exc)
             return EpisodeResult(self.episode, DownloadResult.FAILED)
 
     def run(self) -> EpisodeResult:
@@ -73,7 +74,6 @@ def run(self) -> EpisodeResult:
             self.episode.enclosure.href,
             stream=True,
             allow_redirects=True,
-            timeout=constants.REQUESTS_TIMEOUT,
         )
         response.raise_for_status()
         total_size = int(response.headers.get("content-length", "0"))
diff --git a/podcast_archiver/logging.py b/podcast_archiver/logging.py
@@ -2,18 +2,25 @@
 
 import logging
 import logging.config
+from typing import Any
 
 from rich import get_console
+from rich import print as _print
 from rich.logging import RichHandler
 
 logger = logging.getLogger("podcast_archiver")
 
 
+def rprint(*objects: Any, sep: str = " ", end: str = "\n", **kwargs: Any) -> None:
+    if logger.level == logging.NOTSET or logger.level >= logging.WARNING:
+        _print(*objects, sep=sep, end=end, **kwargs)
+        return
+    logger.info(objects[0].strip(), *objects[1:])
+
+
 def configure_logging(verbosity: int) -> None:
-    if verbosity > 2:
+    if verbosity > 1:
         level = logging.DEBUG
-    elif verbosity == 2:
-        level = logging.INFO
     elif verbosity == 1:
         level = logging.WARNING
     else:
@@ -35,4 +42,5 @@ def configure_logging(verbosity: int) -> None:
             )
         ],
     )
+    logger.setLevel(level)
     logger.debug("Running in debug mode.")
diff --git a/podcast_archiver/processor.py b/podcast_archiver/processor.py
@@ -5,16 +5,12 @@
 from threading import Event
 from typing import TYPE_CHECKING
 
-from pydantic import ValidationError
-from requests import HTTPError
-from rich import print as rprint
-
 from podcast_archiver.download import DownloadJob
 from podcast_archiver.enums import DownloadResult, QueueCompletionType
-from podcast_archiver.logging import logger
+from podcast_archiver.logging import logger, rprint
 from podcast_archiver.models import Episode, Feed, FeedInfo
 from podcast_archiver.types import EpisodeResult, EpisodeResultsList
-from podcast_archiver.utils import FilenameFormatter
+from podcast_archiver.utils import FilenameFormatter, handle_feed_request
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -48,25 +44,15 @@ def __init__(self, settings: Settings) -> None:
 
     def process(self, url: str) -> ProcessingResult:
         result = ProcessingResult()
-        try:
-            feed = Feed.from_url(url)
-        except HTTPError as exc:
-            if exc.response is not None:
-                rprint(f"[error]Received status code {exc.response.status_code} from {url}[/]")
-            logger.debug("Failed to request feed url %s", url, exc_info=exc)
-            return result
-        except ValidationError as exc:
-            logger.debug("Invalid feed", exc_info=exc)
-            rprint(f"[error]Received invalid feed from {url}[/]")
-            return result
-
-        result.feed = feed
-        rprint(f"\n[bold bright_magenta]Downloading archive for: {feed.info.title}[/]\n")
-
-        episode_results, completion_msg = self._process_episodes(feed=feed)
-        self._handle_results(episode_results, result=result)
-
-        rprint(f"\n[bar.finished]✔ {completion_msg}[/]")
+        with handle_feed_request(url):
+            result.feed = Feed.from_url(url)
+
+        if result.feed:
+            rprint(f"\n[bold bright_magenta]Downloading archive for: {result.feed.info.title}[/]\n")
+            episode_results, completion_msg = self._process_episodes(feed=result.feed)
+            self._handle_results(episode_results, result=result)
+
+            rprint(f"\n[bar.finished]✔ {completion_msg}[/]")
         return result
 
     def _preflight_check(self, episode: Episode, target: Path) -> DownloadResult | None:
diff --git a/podcast_archiver/session.py b/podcast_archiver/session.py
@@ -1,6 +1,33 @@
-from requests import Session
+from typing import Any
 
-from podcast_archiver.constants import USER_AGENT
+from requests import PreparedRequest, Session
+from requests.adapters import HTTPAdapter
+from requests.models import Response as Response
+from urllib3.util import Retry
+
+from podcast_archiver.constants import REQUESTS_TIMEOUT, USER_AGENT
+
+
+class DefaultTimeoutHTTPAdapter(HTTPAdapter):
+    def send(
+        self,
+        request: PreparedRequest,
+        timeout: None | float | tuple[float, float] | tuple[float, None] = None,
+        **kwargs: Any,
+    ) -> Response:
+        return super().send(request, timeout=timeout or REQUESTS_TIMEOUT, **kwargs)
+
+
+_retries = Retry(
+    total=3,
+    connect=1,
+    backoff_factor=0.5,
+    status_forcelist=[500, 501, 502, 503, 504],
+)
+
+_adapter = DefaultTimeoutHTTPAdapter(max_retries=_retries)
 
 session = Session()
+session.mount("http://", _adapter)
+session.mount("https://", _adapter)
 session.headers.update({"user-agent": USER_AGENT})
diff --git a/podcast_archiver/utils.py b/podcast_archiver/utils.py
@@ -4,11 +4,13 @@
 import re
 from contextlib import contextmanager
 from string import Formatter
-from typing import IO, TYPE_CHECKING, Any, Iterable, Iterator, TypedDict
+from typing import IO, TYPE_CHECKING, Any, Generator, Iterable, Iterator, TypedDict
 
+from pydantic import ValidationError
+from requests import HTTPError
 from slugify import slugify as _slugify
 
-from podcast_archiver.logging import logger
+from podcast_archiver.logging import logger, rprint
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -119,3 +121,24 @@ def atomic_write(target: Path, mode: str = "w") -> Iterator[IO[Any]]:
         os.rename(tempfile, target)
     finally:
         tempfile.unlink(missing_ok=True)
+
+
+@contextmanager
+def handle_feed_request(url: str) -> Generator[None, Any, None]:
+    try:
+        yield
+    except HTTPError as exc:
+        logger.debug("Failed to request feed url %s", url, exc_info=exc)
+        if (response := getattr(exc, "response", None)) is None:
+            rprint(f"[error]Failed to retrieve feed {url}: {exc}[/]")
+            return
+
+        rprint(f"[error]Received status code {response.status_code} from {url}[/]")
+
+    except ValidationError as exc:
+        logger.debug("Feed validation failed for %s", url, exc_info=exc)
+        rprint(f"[error]Received invalid feed from {url}[/]")
+
+    except Exception as exc:
+        logger.debug("Unexpected error for url %s", url, exc_info=exc)
+        rprint(f"[error]Failed to retrieve feed {url}: {exc}[/]")
diff --git a/tests/test_download.py b/tests/test_download.py
@@ -77,13 +77,22 @@ def test_download_failed(
         responses.add(responses.GET, MEDIA_URL, b"BLOB")
 
     job = download.DownloadJob(episode=episode, target=Path("file.mp3"))
-    with failure_mode(side_effect=side_effect), caplog.at_level(logging.ERROR):
+    with failure_mode(side_effect=side_effect), caplog.at_level(logging.DEBUG):
         result = job()
 
     assert result == (episode, DownloadResult.FAILED)
     failure_rec = None
     for record in caplog.records:
-        if record.message == "Download failed":
+        if record.message.startswith("Download failed: "):
+            failure_rec = record
+            break
+
+    assert failure_rec
+    assert not failure_rec.exc_info
+
+    failure_rec = None
+    for record in caplog.records:
+        if record.message == "Exception while downloading":
             failure_rec = record
             break
 
diff --git a/tests/test_filenames.py b/tests/test_filenames.py
@@ -5,7 +5,7 @@
 from podcast_archiver.utils import FilenameFormatter
 
 FEED_INFO = FeedInfo(
-    title="That\Show",
+    title="That\\Show",
     subtitle="The one that never comes/came to be",
     author="TheJanwShow",
     language="de-DE",
diff --git a/tests/test_processor.py b/tests/test_processor.py
@@ -9,10 +9,11 @@
 from podcast_archiver.config import Settings
 from podcast_archiver.enums import DownloadResult
 from podcast_archiver.models import FeedPage
-from podcast_archiver.processor import FeedProcessor
+from podcast_archiver.processor import FeedProcessor, ProcessingResult
 
 if TYPE_CHECKING:
     from pydantic_core import Url
+    from responses import RequestsMock
 
 
 @pytest.mark.parametrize(
@@ -43,78 +44,11 @@ def test_preflight_check(
     assert result == expected_result
 
 
-# def test_download_already_exists(tmp_path_cd: Path, feedobj_lautsprecher_notconsumed: dict[str, Any]) -> None:
-#     feed = FeedPage.model_validate(feedobj_lautsprecher_notconsumed)
-#     episode = feed.episodes[0]
-
-#     job = download.DownloadJob(episode=episode, target=Path("file.mp3"))
-#     job.target.parent.mkdir(exist_ok=True)
-#     job.target.touch()
-#     result = job()
-
-#     assert result == (episode, DownloadResult.ALREADY_EXISTS)
-
-
-# def test_download_aborted(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any]) -> None:
-#     feed = FeedPage.model_validate(feedobj_lautsprecher)
-#     episode = feed.episodes[0]
-
-#     job = download.DownloadJob(episode=episode, target=Path("file.mp3"))
-#     job.stop_event.set()
-#     result = job()
-
-#     assert result == (episode, DownloadResult.ABORTED)
-
-
-# class PartialObjectMock(Protocol):
-#     def __call__(self, side_effect: type[Exception]) -> mock.Mock: ...
-
-
-# # mypy: disable-error-code="attr-defined"
-# @pytest.mark.parametrize(
-#     "failure_mode, side_effect, should_download",
-#     [
-#         (partial(mock.patch.object, download.session, "get"), HTTPError, False),
-#         (partial(mock.patch.object, utils.os, "fsync"), IOError, True),
-#     ],
-# )
-# def test_download_failed(
-#     tmp_path_cd: Path,
-#     feedobj_lautsprecher_notconsumed: dict[str, Any],
-#     failure_mode: PartialObjectMock,
-#     side_effect: type[Exception],
-#     caplog: pytest.LogCaptureFixture,
-#     should_download: bool,
-#     responses: RequestsMock,
-# ) -> None:
-#     feed = FeedPage.model_validate(feedobj_lautsprecher_notconsumed)
-#     episode = feed.episodes[0]
-#     if should_download:
-#         responses.add(responses.GET, MEDIA_URL, b"BLOB")
-
-#     job = download.DownloadJob(episode=episode, target=Path("file.mp3"))
-#     with failure_mode(side_effect=side_effect), caplog.at_level(logging.ERROR):
-#         result = job()
-
-#     assert result == (episode, DownloadResult.FAILED)
-#     failure_rec = None
-#     for record in caplog.records:
-#         if record.message == "Download failed":
-#             failure_rec = record
-#             break
-
-#     assert failure_rec
-#     assert failure_rec.exc_info
-#     exc_type, _, _ = failure_rec.exc_info
-#     assert exc_type == side_effect, failure_rec.exc_info
-
+def test_retrieve_failure(responses: RequestsMock) -> None:
+    settings = Settings()
+    proc = FeedProcessor(settings)
 
-# @pytest.mark.parametrize("write_info_json", [False, True])
-# def test_download_info_json(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any], write_info_json: bool) -> None:
-#     feed = FeedPage.model_validate(feedobj_lautsprecher)
-#     episode = feed.episodes[0]
-#     job = download.DownloadJob(episode=episode, target=tmp_path_cd / "file.mp3", write_info_json=write_info_json)
-#     result = job()
+    result = proc.process("https://broken.url.invalid")
 
-#     assert result == (episode, DownloadResult.COMPLETED_SUCCESSFULLY)
-#     assert job.infojsonfile.exists() == write_info_json
+    assert result == ProcessingResult()
+    assert result.feed is None

Original file line number	Diff line number	Diff line change
`@@ -283,8 +283,8 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b`
`283`	`283`	`)`
`284`	`284`	`@click.pass_context`
`285`	`285`	`def main(ctx: click.RichContext, /, **kwargs: Any) -> int:`
	`286`	`+ get_console().quiet = kwargs["quiet"]`
`286`	`287`	`configure_logging(kwargs["verbose"])`
`287`		`- get_console().quiet = kwargs["quiet"] or kwargs["verbose"] > 1`
`288`	`288`	`try:`
`289`	`289`	`settings = Settings.load_from_dict(kwargs)`
`290`	`290`