Skip to content

Commit

Permalink
feat: Add auto-discovery from popular services
Browse files Browse the repository at this point in the history
  • Loading branch information
janw committed Jan 2, 2025
1 parent 7fe8f14 commit 265546e
Show file tree
Hide file tree
Showing 21 changed files with 434 additions and 55 deletions.
64 changes: 32 additions & 32 deletions .assets/podcast-archiver-dry-run.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 7 additions & 7 deletions .assets/podcast-archiver-help.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@ repos:
language: system
require_serial: true
pass_filenames: false
always_run: true
files: ^podcast_archiver/config\.py$
types: [python]
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,34 @@ podcast-archiver --dir ~/Podcasts --feed https://feeds.feedburner.com/TheAnthrop

Podcast Archiver expects values to its `--feed/-f` argument to be URLs pointing to an [RSS feed of a podcast](https://archive.is/jYk3E).

If you are not certain if the link you have for a show that you like, you can try and pass it to Podcast Archiver directly. The archiver supports a variety of links from popular podcast players and platforms, including [Apple Podcasts](https://podcasts.apple.com/us/browse), [Overcast.fm](https://overcast.fm/), [Castro](https://castro.fm/), and [Pocket Casts](https://pocketcasts.com/):

```sh
# Archive from Apple Podcasts URL
podcast-archiver -f https://podcasts.apple.com/us/podcast/black-girl-gone-a-true-crime-podcast/id1556267741
# ... or just the ID
podcast-archiver -f 1556267741

# From Overcast podcast URL
podcast-archiver -f https://overcast.fm/itunes394775318/99-invisible
# ... or episode sharing links (will resolve to all episodes)
podcast-archiver -f https://overcast.fm/+AAyIOzrEy1g
```

#### Supported services

TBD

#### Local files

Feeds can also be "fetched" from a local file:

```bash
podcast-archiver -f file:/Users/janw/downloaded_feed.xml
```

#### Testing without downloading

To find out if you have to the right feed, you may want to use the `--dry-run` option to output the discovered feed information and found episodes. It will prevent all downloads.

```sh
Expand Down
2 changes: 1 addition & 1 deletion config.yaml.example
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## Podcast-Archiver configuration
## Generated using podcast-archiver v2.0.2
## Generated using podcast-archiver v2.1.0

# Field 'feeds': Feed URLs to archive.
#
Expand Down
3 changes: 2 additions & 1 deletion hack/rich-codex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
TMPDIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'tmpdir')

export FORCE_COLOR="1"
export TERM="xterm-16color"
export COLUMNS="120"
export CREATED_FILES="created.txt"
export DELETED_FILES="deleted.txt"
Expand All @@ -17,4 +18,4 @@ export PODCAST_ARCHIVER_IGNORE_DATABASE=true
# shellcheck disable=SC2064
trap "rm -rf '$TMPDIR'" EXIT

exec poetry run rich-codex --terminal-width $COLUMNS --notrim
poetry run rich-codex --terminal-width $COLUMNS --notrim --terminal-theme DIMMED_MONOKAI
3 changes: 2 additions & 1 deletion podcast_archiver/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

_theme = Theme(
{
"error": "bold dark_red",
"error": "dark_red bold",
"errorhint": "dark_red dim",
"warning": "orange1 bold",
"warning_hint": "orange1 dim",
"completed": "dark_cyan bold",
Expand Down
2 changes: 1 addition & 1 deletion podcast_archiver/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
USER_AGENT = f"{PROG_NAME}/{__version__} (https://github.com/janw/podcast-archiver)"
ENVVAR_PREFIX = "PODCAST_ARCHIVER"

REQUESTS_TIMEOUT = 30
REQUESTS_TIMEOUT = (5, 30)

SUPPORTED_LINK_TYPES_RE = re.compile(r"^(audio|video)/")
DOWNLOAD_CHUNK_SIZE = 256 * 1024
Expand Down
4 changes: 4 additions & 0 deletions podcast_archiver/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ class NotModified(PodcastArchiverException):
def __init__(self, info: FeedInfo, *args: object) -> None:
super().__init__(*args)
self.info = info


class NotSupported(PodcastArchiverException):
pass
29 changes: 22 additions & 7 deletions podcast_archiver/models/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator

from podcast_archiver.constants import MAX_TITLE_LENGTH
from podcast_archiver.exceptions import NotModified
from podcast_archiver.exceptions import NotModified, NotSupported
from podcast_archiver.logging import logger, rprint
from podcast_archiver.models.episode import EpisodeOrFallback
from podcast_archiver.models.field_types import LenientDatetime
Expand Down Expand Up @@ -90,6 +90,13 @@ def truncate_title(cls, value: str) -> str:
def field_titles(cls) -> list[str]:
return [field.title for field in cls.model_fields.values() if field.title]

@property
def alternate_rss(self) -> str | None:
for link in self.links:
if link.rel == "alternate" and link.link_type == "application/rss+xml":
return link.href
return None


class FeedPage(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
Expand All @@ -103,17 +110,25 @@ class FeedPage(BaseModel):
episodes: list[EpisodeOrFallback] = Field(default_factory=list, validation_alias=AliasChoices("entries", "items"))

@classmethod
def parse_feed(cls, source: str | bytes, alt_url: str | None) -> FeedPage:
def parse_feed(cls, source: str | bytes, alt_url: str | None, retry: bool = False) -> FeedPage:
feedobj = feedparser.parse(source)
obj = cls.model_validate(feedobj)
if obj.bozo and (exc := obj.bozo_exception) and isinstance(exc, SAXParseException):
url = source if isinstance(source, str) and not alt_url else alt_url
if not obj.bozo:
return obj

if (fallback_url := obj.feed.alternate_rss) and not retry:
logger.info("Attempting to fetch alternate feed at %s", fallback_url)
return cls.from_url(fallback_url, retry=True)

url = source if isinstance(source, str) and not alt_url else alt_url
if (exc := obj.bozo_exception) and isinstance(exc, SAXParseException):
rprint(f"Feed content is not well-formed for {url}", style="warning")
rprint(f"Continuing processing but here be dragons ({exc.getMessage()})", style="warning_hint")
return obj
rprint(f"Attemping processing but here be dragons ({exc.getMessage()})", style="warninghint")

raise NotSupported(f"Content at {url} is not supported")

@classmethod
def from_url(cls, url: str, *, known_info: FeedInfo | None = None) -> FeedPage:
def from_url(cls, url: str, *, known_info: FeedInfo | None = None, retry: bool = False) -> FeedPage:
parsed = urlparse(url)
if parsed.scheme == "file":
return cls.parse_feed(parsed.path, None)
Expand Down
8 changes: 5 additions & 3 deletions podcast_archiver/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
FutureEpisodeResult,
ProcessingResult,
)
from podcast_archiver.urls import registry
from podcast_archiver.utils import FilenameFormatter, handle_feed_request
from podcast_archiver.utils.pretty_printing import PrettyPrintEpisodeRange

Expand Down Expand Up @@ -59,8 +60,9 @@ def process(self, url: str, dry_run: bool = False) -> ProcessingResult:
return result

def load_feed(self, url: str, known_feeds: dict[str, FeedInfo]) -> Feed | None:
with handle_feed_request(url):
feed = Feed(url=url, known_info=known_feeds.get(url))
resolved_url = registry.get_feed(url) or url
with handle_feed_request(resolved_url):
feed = Feed(url=resolved_url, known_info=known_feeds.get(url))
known_feeds[url] = feed.info
return feed

Expand Down Expand Up @@ -116,7 +118,7 @@ def process_feed(self, feed: Feed, dry_run: bool) -> ProcessingResult:
exists = isinstance(enqueued, EpisodeResult) and enqueued.result == DownloadResult.ALREADY_EXISTS
pretty_range.update(exists, episode)

if not dry_run or self.settings.verbose > 0:
if not dry_run:
results.append(enqueued)

if (max_count := self.settings.maximum_episode_count) and idx == max_count:
Expand Down
16 changes: 16 additions & 0 deletions podcast_archiver/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ def get_and_raise(
response.raise_for_status()
return response

def request(self, method: str, url: str, **kwargs: Any) -> Response: # type: ignore[override]
if url.startswith("https://"):
return super().request(method, url, **kwargs)

if url.startswith("http://"):
securl = "https" + url[4:]
else:
securl = "https://" + url

try:
return super().request(method, securl, **kwargs)
except Exception:
pass

return super().request(method, url, **kwargs)


session = ArchiverSession()
session.mount("http://", _adapter)
Expand Down
23 changes: 23 additions & 0 deletions podcast_archiver/urls/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from podcast_archiver.urls.base import UrlSourceRegistry
from podcast_archiver.urls.base64 import Base64EncodedUrlSource
from podcast_archiver.urls.fireside import FiresideSource
from podcast_archiver.urls.prefixed import UrlPrefixSource
from podcast_archiver.urls.soundcloud import SoundCloudSource
from podcast_archiver.urls.via_apple import (
ApplePodcastsByIdSource,
ApplePodcastsSource,
ContainingApplePodcastsUrlSource,
)

registry = UrlSourceRegistry()

registry.register(ApplePodcastsSource)
registry.register(ApplePodcastsByIdSource)
registry.register(ContainingApplePodcastsUrlSource)
registry.register(UrlPrefixSource)
registry.register(Base64EncodedUrlSource)

# Known website sources that define feeds as alternate+application/rss+xml
# or use a deterministic URL pattern to find the feed URL from the website URL.
registry.register(FiresideSource)
registry.register(SoundCloudSource)
Loading

0 comments on commit 265546e

Please sign in to comment.