Skip to content
This repository was archived by the owner on Oct 30, 2025. It is now read-only.

Commit 265546e

Browse files
committed
feat: Add auto-discovery from popular services
1 parent 7fe8f14 commit 265546e

21 files changed

+434
-55
lines changed

.assets/podcast-archiver-dry-run.svg

Lines changed: 32 additions & 32 deletions
Loading

.assets/podcast-archiver-help.svg

Lines changed: 7 additions & 7 deletions
Loading

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,6 @@ repos:
6262
language: system
6363
require_serial: true
6464
pass_filenames: false
65+
always_run: true
6566
files: ^podcast_archiver/config\.py$
6667
types: [python]

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,34 @@ podcast-archiver --dir ~/Podcasts --feed https://feeds.feedburner.com/TheAnthrop
6262

6363
Podcast Archiver expects values to its `--feed/-f` argument to be URLs pointing to an [RSS feed of a podcast](https://archive.is/jYk3E).
6464

65+
If you are not certain if the link you have for a show that you like, you can try and pass it to Podcast Archiver directly. The archiver supports a variety of links from popular podcast players and platforms, including [Apple Podcasts](https://podcasts.apple.com/us/browse), [Overcast.fm](https://overcast.fm/), [Castro](https://castro.fm/), and [Pocket Casts](https://pocketcasts.com/):
66+
67+
```sh
68+
# Archive from Apple Podcasts URL
69+
podcast-archiver -f https://podcasts.apple.com/us/podcast/black-girl-gone-a-true-crime-podcast/id1556267741
70+
# ... or just the ID
71+
podcast-archiver -f 1556267741
72+
73+
# From Overcast podcast URL
74+
podcast-archiver -f https://overcast.fm/itunes394775318/99-invisible
75+
# ... or episode sharing links (will resolve to all episodes)
76+
podcast-archiver -f https://overcast.fm/+AAyIOzrEy1g
77+
```
78+
79+
#### Supported services
80+
81+
TBD
82+
83+
#### Local files
84+
6585
Feeds can also be "fetched" from a local file:
6686

6787
```bash
6888
podcast-archiver -f file:/Users/janw/downloaded_feed.xml
6989
```
7090

91+
#### Testing without downloading
92+
7193
To find out if you have to the right feed, you may want to use the `--dry-run` option to output the discovered feed information and found episodes. It will prevent all downloads.
7294

7395
```sh

config.yaml.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
## Podcast-Archiver configuration
2-
## Generated using podcast-archiver v2.0.2
2+
## Generated using podcast-archiver v2.1.0
33

44
# Field 'feeds': Feed URLs to archive.
55
#

hack/rich-codex.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
TMPDIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'tmpdir')
44

55
export FORCE_COLOR="1"
6+
export TERM="xterm-16color"
67
export COLUMNS="120"
78
export CREATED_FILES="created.txt"
89
export DELETED_FILES="deleted.txt"
@@ -17,4 +18,4 @@ export PODCAST_ARCHIVER_IGNORE_DATABASE=true
1718
# shellcheck disable=SC2064
1819
trap "rm -rf '$TMPDIR'" EXIT
1920

20-
exec poetry run rich-codex --terminal-width $COLUMNS --notrim
21+
poetry run rich-codex --terminal-width $COLUMNS --notrim --terminal-theme DIMMED_MONOKAI

podcast_archiver/console.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
_theme = Theme(
77
{
8-
"error": "bold dark_red",
8+
"error": "dark_red bold",
9+
"errorhint": "dark_red dim",
910
"warning": "orange1 bold",
1011
"warning_hint": "orange1 dim",
1112
"completed": "dark_cyan bold",

podcast_archiver/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
USER_AGENT = f"{PROG_NAME}/{__version__} (https://github.com/janw/podcast-archiver)"
88
ENVVAR_PREFIX = "PODCAST_ARCHIVER"
99

10-
REQUESTS_TIMEOUT = 30
10+
REQUESTS_TIMEOUT = (5, 30)
1111

1212
SUPPORTED_LINK_TYPES_RE = re.compile(r"^(audio|video)/")
1313
DOWNLOAD_CHUNK_SIZE = 256 * 1024

podcast_archiver/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,7 @@ class NotModified(PodcastArchiverException):
4545
def __init__(self, info: FeedInfo, *args: object) -> None:
4646
super().__init__(*args)
4747
self.info = info
48+
49+
50+
class NotSupported(PodcastArchiverException):
51+
pass

podcast_archiver/models/feed.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator
1111

1212
from podcast_archiver.constants import MAX_TITLE_LENGTH
13-
from podcast_archiver.exceptions import NotModified
13+
from podcast_archiver.exceptions import NotModified, NotSupported
1414
from podcast_archiver.logging import logger, rprint
1515
from podcast_archiver.models.episode import EpisodeOrFallback
1616
from podcast_archiver.models.field_types import LenientDatetime
@@ -90,6 +90,13 @@ def truncate_title(cls, value: str) -> str:
9090
def field_titles(cls) -> list[str]:
9191
return [field.title for field in cls.model_fields.values() if field.title]
9292

93+
@property
94+
def alternate_rss(self) -> str | None:
95+
for link in self.links:
96+
if link.rel == "alternate" and link.link_type == "application/rss+xml":
97+
return link.href
98+
return None
99+
93100

94101
class FeedPage(BaseModel):
95102
model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -103,17 +110,25 @@ class FeedPage(BaseModel):
103110
episodes: list[EpisodeOrFallback] = Field(default_factory=list, validation_alias=AliasChoices("entries", "items"))
104111

105112
@classmethod
106-
def parse_feed(cls, source: str | bytes, alt_url: str | None) -> FeedPage:
113+
def parse_feed(cls, source: str | bytes, alt_url: str | None, retry: bool = False) -> FeedPage:
107114
feedobj = feedparser.parse(source)
108115
obj = cls.model_validate(feedobj)
109-
if obj.bozo and (exc := obj.bozo_exception) and isinstance(exc, SAXParseException):
110-
url = source if isinstance(source, str) and not alt_url else alt_url
116+
if not obj.bozo:
117+
return obj
118+
119+
if (fallback_url := obj.feed.alternate_rss) and not retry:
120+
logger.info("Attempting to fetch alternate feed at %s", fallback_url)
121+
return cls.from_url(fallback_url, retry=True)
122+
123+
url = source if isinstance(source, str) and not alt_url else alt_url
124+
if (exc := obj.bozo_exception) and isinstance(exc, SAXParseException):
111125
rprint(f"Feed content is not well-formed for {url}", style="warning")
112-
rprint(f"Continuing processing but here be dragons ({exc.getMessage()})", style="warning_hint")
113-
return obj
126+
rprint(f"Attemping processing but here be dragons ({exc.getMessage()})", style="warninghint")
127+
128+
raise NotSupported(f"Content at {url} is not supported")
114129

115130
@classmethod
116-
def from_url(cls, url: str, *, known_info: FeedInfo | None = None) -> FeedPage:
131+
def from_url(cls, url: str, *, known_info: FeedInfo | None = None, retry: bool = False) -> FeedPage:
117132
parsed = urlparse(url)
118133
if parsed.scheme == "file":
119134
return cls.parse_feed(parsed.path, None)

0 commit comments

Comments
 (0)