-
Notifications
You must be signed in to change notification settings - Fork 144
/
Copy pathmain.py
136 lines (111 loc) · 5.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Podcast Downloader/Uploader
from pathlib import Path
from urllib.request import urlretrieve
import feedparser
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import config
# podcast homepages
# not needed anymore, getting links from the rss feeds instead, but keeping for reference
# URL_BEGINNERS = "http://nihongoconteppei.com"
# URL_INTERMED = "http://teppeisensei.com"
# URL_TEPPNORI = "http://teppeinorikojapanese.com"
# URL_NORIKO = "https://www.japanesewithnoriko.com"
# RSS feeds
RSS_BEGINNERS = "https://nihongoconteppei.com/feed/podcast/"
RSS_INTERMED = "http://teppeisensei.com/index20.rdf"
RSS_TEPPNORI = "https://teppeinorikojapanese.com/feed/podcast/"
RSS_NORIKO = "https://anchor.fm/s/1380f800/podcast/rss"
def find_last_ep(folder):
"""Takes folder as STR and returns the episode number of the most recent file as INT."""
# TODO: Needs updating once episode number reaches 1000+
path = Path(folder)
# convert the generator object in to a list of .mp3 files
file_list = [p for p in path.glob("*.mp3")]
last_file = str(file_list[len(file_list) - 1])
# return just the last three characters before ".mp3"
return int(last_file[-7:-4])
def get_episodes_list(rss_url, last_ep, is_intermed=False):
"""Takes RSS URL and the number of the last episode and returns a LIST of all more recent ones."""
list_out = []
feed = feedparser.parse(rss_url)
# gets the number of entries
newest_link = feed.entries[0].enclosures[0].get("href")
if is_intermed:
# deal with the occasional case of extra characters at the end of intermed files
# i.e. Nihongo20con20Teppei23526-8cceb.mp3
newest_ep = int(newest_link[63:66])
else:
# just get the episode number from the file name e.g. "...284.mp3"
# fix for "Teppeinoriko152-1.mp3", just in case it happens again, also fix in the file names below
newest_link = newest_link.replace("-1.mp3", ".mp3")
newest_ep = int(newest_link[-7:-4])
# add all newer eps into a list
# deal with the case there are no newer eps
if newest_ep > last_ep:
for i in range(0, newest_ep - last_ep):
list_out.append(feed.entries[i].enclosures[0].get("href"))
# sort the list (older to newer), doesn't matter if empty
list_out.sort()
# then return list of new eps, or just an empty list if there are none
return list_out
def download_episodes(ep_list, is_intermed=False, is_teppnori=False):
"""Takes a LIST of episodes, downloads and uploads them."""
# check if the upload directories exist, and create them
for file in ep_list:
# intermed/teppnori files have a different file name format, deal with the possible extra characters
if is_intermed:
filename = f"{str(file)[41:66]}.mp3"
target = config.FOLDER_INTERMED + filename
elif is_teppnori:
# the fix for "Teppeinoriko152-1.mp3"
file = file.replace("-1.mp3", ".mp3")
filename = f"{str(file)[-19:-4]}.mp3"
target = config.FOLDER_TEPPNORI + filename
else:
filename = f"{str(file)[-27:-4]}.mp3"
target = config.FOLDER_BEGINNERS + filename
urlretrieve(file, target)
print(f"Downloaded {filename}")
# copy the file to the upload folder too
upload_episode(filename, is_intermed, is_teppnori)
print(f"Uploaded {filename}")
def upload_episode(file_name, is_intermed, is_teppnori):
"""Takes a file name as STR and uploads in to the appropriate Google Drive folder."""
if is_intermed:
source_file = config.FOLDER_INTERMED + file_name
folder_id = config.UPLOAD_INTERMED
elif is_teppnori:
source_file = config.FOLDER_TEPPNORI + file_name
folder_id = config.UPLOAD_TEPPNORI
else:
source_file = config.FOLDER_BEGINNERS + file_name
folder_id = config.UPLOAD_BEGINNERS
# upload the file using PyDrive
file = drive.CreateFile({"title": file_name, "mimeType": "audio/mpeg", "parents": [{"id": folder_id}]})
file.SetContentFile(source_file)
file.Upload()
def main():
# arguments: rss url and the number of the last downloaded episode
beginners_list = get_episodes_list(RSS_BEGINNERS, find_last_ep(config.FOLDER_BEGINNERS))
if len(beginners_list) > 0:
download_episodes(beginners_list)
else:
print("No new beginners episodes found.")
intermed_list = get_episodes_list(RSS_INTERMED, find_last_ep(config.FOLDER_INTERMED), is_intermed=True)
if len(intermed_list) > 0:
download_episodes(intermed_list, is_intermed=True)
else:
print("No new intermed episodes found.")
teppnori_list = get_episodes_list(RSS_TEPPNORI, find_last_ep(config.FOLDER_TEPPNORI))
if len(teppnori_list) > 0:
download_episodes(teppnori_list, is_teppnori=True)
else:
print("No new Teppei & Noriko episodes found.")
# Google Auth and Drive stuff, opens a link in the web browser for authentication
# requires client_secrets.json (downloaded from the Google API Console) in the same folder
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
if __name__ == "__main__":
main()