Skip to content
This repository was archived by the owner on Mar 22, 2025. It is now read-only.

Commit 265546e

Browse files
committed
feat: Add auto-discovery from popular services
1 parent 7fe8f14 commit 265546e

21 files changed

+434
-55
lines changed

.assets/podcast-archiver-dry-run.svg

Lines changed: 32 additions & 32 deletions
Loading

.assets/podcast-archiver-help.svg

Lines changed: 7 additions & 7 deletions
Loading

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,6 @@ repos:
6262
language: system
6363
require_serial: true
6464
pass_filenames: false
65+
always_run: true
6566
files: ^podcast_archiver/config\.py$
6667
types: [python]

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,34 @@ podcast-archiver --dir ~/Podcasts --feed https://feeds.feedburner.com/TheAnthrop
6262

6363
Podcast Archiver expects values to its `--feed/-f` argument to be URLs pointing to an [RSS feed of a podcast](https://archive.is/jYk3E).
6464

65+
If you are not certain if the link you have for a show that you like, you can try and pass it to Podcast Archiver directly. The archiver supports a variety of links from popular podcast players and platforms, including [Apple Podcasts](https://podcasts.apple.com/us/browse), [Overcast.fm](https://overcast.fm/), [Castro](https://castro.fm/), and [Pocket Casts](https://pocketcasts.com/):
66+
67+
```sh
68+
# Archive from Apple Podcasts URL
69+
podcast-archiver -f https://podcasts.apple.com/us/podcast/black-girl-gone-a-true-crime-podcast/id1556267741
70+
# ... or just the ID
71+
podcast-archiver -f 1556267741
72+
73+
# From Overcast podcast URL
74+
podcast-archiver -f https://overcast.fm/itunes394775318/99-invisible
75+
# ... or episode sharing links (will resolve to all episodes)
76+
podcast-archiver -f https://overcast.fm/+AAyIOzrEy1g
77+
```
78+
79+
#### Supported services
80+
81+
TBD
82+
83+
#### Local files
84+
6585
Feeds can also be "fetched" from a local file:
6686

6787
```bash
6888
podcast-archiver -f file:/Users/janw/downloaded_feed.xml
6989
```
7090

91+
#### Testing without downloading
92+
7193
To find out if you have to the right feed, you may want to use the `--dry-run` option to output the discovered feed information and found episodes. It will prevent all downloads.
7294

7395
```sh

config.yaml.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
## Podcast-Archiver configuration
2-
## Generated using podcast-archiver v2.0.2
2+
## Generated using podcast-archiver v2.1.0
33

44
# Field 'feeds': Feed URLs to archive.
55
#

hack/rich-codex.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
TMPDIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'tmpdir')
44

55
export FORCE_COLOR="1"
6+
export TERM="xterm-16color"
67
export COLUMNS="120"
78
export CREATED_FILES="created.txt"
89
export DELETED_FILES="deleted.txt"
@@ -17,4 +18,4 @@ export PODCAST_ARCHIVER_IGNORE_DATABASE=true
1718
# shellcheck disable=SC2064
1819
trap "rm -rf '$TMPDIR'" EXIT
1920

20-
exec poetry run rich-codex --terminal-width $COLUMNS --notrim
21+
poetry run rich-codex --terminal-width $COLUMNS --notrim --terminal-theme DIMMED_MONOKAI

podcast_archiver/console.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
_theme = Theme(
77
{
8-
"error": "bold dark_red",
8+
"error": "dark_red bold",
9+
"errorhint": "dark_red dim",
910
"warning": "orange1 bold",
1011
"warning_hint": "orange1 dim",
1112
"completed": "dark_cyan bold",

podcast_archiver/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
USER_AGENT = f"{PROG_NAME}/{__version__} (https://github.com/janw/podcast-archiver)"
88
ENVVAR_PREFIX = "PODCAST_ARCHIVER"
99

10-
REQUESTS_TIMEOUT = 30
10+
REQUESTS_TIMEOUT = (5, 30)
1111

1212
SUPPORTED_LINK_TYPES_RE = re.compile(r"^(audio|video)/")
1313
DOWNLOAD_CHUNK_SIZE = 256 * 1024

podcast_archiver/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,7 @@ class NotModified(PodcastArchiverException):
4545
def __init__(self, info: FeedInfo, *args: object) -> None:
4646
super().__init__(*args)
4747
self.info = info
48+
49+
50+
class NotSupported(PodcastArchiverException):
51+
pass

podcast_archiver/models/feed.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator
1111

1212
from podcast_archiver.constants import MAX_TITLE_LENGTH
13-
from podcast_archiver.exceptions import NotModified
13+
from podcast_archiver.exceptions import NotModified, NotSupported
1414
from podcast_archiver.logging import logger, rprint
1515
from podcast_archiver.models.episode import EpisodeOrFallback
1616
from podcast_archiver.models.field_types import LenientDatetime
@@ -90,6 +90,13 @@ def truncate_title(cls, value: str) -> str:
9090
def field_titles(cls) -> list[str]:
9191
return [field.title for field in cls.model_fields.values() if field.title]
9292

93+
@property
94+
def alternate_rss(self) -> str | None:
95+
for link in self.links:
96+
if link.rel == "alternate" and link.link_type == "application/rss+xml":
97+
return link.href
98+
return None
99+
93100

94101
class FeedPage(BaseModel):
95102
model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -103,17 +110,25 @@ class FeedPage(BaseModel):
103110
episodes: list[EpisodeOrFallback] = Field(default_factory=list, validation_alias=AliasChoices("entries", "items"))
104111

105112
@classmethod
106-
def parse_feed(cls, source: str | bytes, alt_url: str | None) -> FeedPage:
113+
def parse_feed(cls, source: str | bytes, alt_url: str | None, retry: bool = False) -> FeedPage:
107114
feedobj = feedparser.parse(source)
108115
obj = cls.model_validate(feedobj)
109-
if obj.bozo and (exc := obj.bozo_exception) and isinstance(exc, SAXParseException):
110-
url = source if isinstance(source, str) and not alt_url else alt_url
116+
if not obj.bozo:
117+
return obj
118+
119+
if (fallback_url := obj.feed.alternate_rss) and not retry:
120+
logger.info("Attempting to fetch alternate feed at %s", fallback_url)
121+
return cls.from_url(fallback_url, retry=True)
122+
123+
url = source if isinstance(source, str) and not alt_url else alt_url
124+
if (exc := obj.bozo_exception) and isinstance(exc, SAXParseException):
111125
rprint(f"Feed content is not well-formed for {url}", style="warning")
112-
rprint(f"Continuing processing but here be dragons ({exc.getMessage()})", style="warning_hint")
113-
return obj
126+
rprint(f"Attemping processing but here be dragons ({exc.getMessage()})", style="warninghint")
127+
128+
raise NotSupported(f"Content at {url} is not supported")
114129

115130
@classmethod
116-
def from_url(cls, url: str, *, known_info: FeedInfo | None = None) -> FeedPage:
131+
def from_url(cls, url: str, *, known_info: FeedInfo | None = None, retry: bool = False) -> FeedPage:
117132
parsed = urlparse(url)
118133
if parsed.scheme == "file":
119134
return cls.parse_feed(parsed.path, None)

podcast_archiver/processor.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
FutureEpisodeResult,
2020
ProcessingResult,
2121
)
22+
from podcast_archiver.urls import registry
2223
from podcast_archiver.utils import FilenameFormatter, handle_feed_request
2324
from podcast_archiver.utils.pretty_printing import PrettyPrintEpisodeRange
2425

@@ -59,8 +60,9 @@ def process(self, url: str, dry_run: bool = False) -> ProcessingResult:
5960
return result
6061

6162
def load_feed(self, url: str, known_feeds: dict[str, FeedInfo]) -> Feed | None:
62-
with handle_feed_request(url):
63-
feed = Feed(url=url, known_info=known_feeds.get(url))
63+
resolved_url = registry.get_feed(url) or url
64+
with handle_feed_request(resolved_url):
65+
feed = Feed(url=resolved_url, known_info=known_feeds.get(url))
6466
known_feeds[url] = feed.info
6567
return feed
6668

@@ -116,7 +118,7 @@ def process_feed(self, feed: Feed, dry_run: bool) -> ProcessingResult:
116118
exists = isinstance(enqueued, EpisodeResult) and enqueued.result == DownloadResult.ALREADY_EXISTS
117119
pretty_range.update(exists, episode)
118120

119-
if not dry_run or self.settings.verbose > 0:
121+
if not dry_run:
120122
results.append(enqueued)
121123

122124
if (max_count := self.settings.maximum_episode_count) and idx == max_count:

podcast_archiver/session.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,22 @@ def get_and_raise(
4040
response.raise_for_status()
4141
return response
4242

43+
def request(self, method: str, url: str, **kwargs: Any) -> Response: # type: ignore[override]
44+
if url.startswith("https://"):
45+
return super().request(method, url, **kwargs)
46+
47+
if url.startswith("http://"):
48+
securl = "https" + url[4:]
49+
else:
50+
securl = "https://" + url
51+
52+
try:
53+
return super().request(method, securl, **kwargs)
54+
except Exception:
55+
pass
56+
57+
return super().request(method, url, **kwargs)
58+
4359

4460
session = ArchiverSession()
4561
session.mount("http://", _adapter)

podcast_archiver/urls/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from podcast_archiver.urls.base import UrlSourceRegistry
2+
from podcast_archiver.urls.base64 import Base64EncodedUrlSource
3+
from podcast_archiver.urls.fireside import FiresideSource
4+
from podcast_archiver.urls.prefixed import UrlPrefixSource
5+
from podcast_archiver.urls.soundcloud import SoundCloudSource
6+
from podcast_archiver.urls.via_apple import (
7+
ApplePodcastsByIdSource,
8+
ApplePodcastsSource,
9+
ContainingApplePodcastsUrlSource,
10+
)
11+
12+
registry = UrlSourceRegistry()
13+
14+
registry.register(ApplePodcastsSource)
15+
registry.register(ApplePodcastsByIdSource)
16+
registry.register(ContainingApplePodcastsUrlSource)
17+
registry.register(UrlPrefixSource)
18+
registry.register(Base64EncodedUrlSource)
19+
20+
# Known website sources that define feeds as alternate+application/rss+xml
21+
# or use a deterministic URL pattern to find the feed URL from the website URL.
22+
registry.register(FiresideSource)
23+
registry.register(SoundCloudSource)

0 commit comments

Comments
 (0)