Skip to content

Commit

Permalink
Merge branch 'skadogg/issue101'
Browse files Browse the repository at this point in the history
  • Loading branch information
skadogg committed Feb 24, 2024
2 parents d9dfa3b + eed2678 commit 0d7333c
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 37 deletions.
41 changes: 8 additions & 33 deletions modules/justwatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from selenium.webdriver.support.ui import WebDriverWait
import logging
import modules.auto_sign_in
import modules.data_bin_convert
import modules.html
import modules.ld_json
import modules.runtime
import os
import random
import time


def get_titles_count(driver):
try:
# Reads number of titles from top of page, e.g. "887 titles," and converts to int
Expand Down Expand Up @@ -81,12 +81,13 @@ def balance_movie_and_tv_lists(movie_list, tv_list, good_ratio=0.8):
return bigger_list + smaller_list


def scrape_justwatch(media):
def scrape_justwatch(url):
# Scrape your data from JustWatch.
# media should be either 'tv' or 'movies'
import modules.auto_sign_in # TODO this probably doesn't belong here

media = media.lower()
if '/tv-show/' in url or 'content_type=show' in url or '/tv-show-tracking' in url:
media = 'tv'
else:
media = 'movies'

logging.debug(f'{media=}')
load_dotenv(dotenv_path='./.env')

Expand All @@ -105,10 +106,7 @@ def scrape_justwatch(media):
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(60)

if media == 'movies':
driver.get('https://www.justwatch.com/us/lists/my-lists?content_type=movie&sort_by=popular_30_day')
else:
driver.get('https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=continue_watching')
driver.get(url)

driver.maximize_window()
# driver.implicitly_driwait(1.0)
Expand All @@ -125,30 +123,13 @@ def scrape_justwatch(media):
logging.debug('Scrolling to bottom of page')
scroll_down(driver)

# # Get name, episode number/title, left in season, main show link from main watchlist
# logging.debug('Getting all show cards from main page')
# if media == 'movies':
# show_cards = driver.find_elements(By.XPATH, '//div[@class="title-card-basic title-card-basic"]')
# else:
# show_cards = driver.find_elements(By.XPATH, '//div[@class="title-card-basic title-card-show-episode"]')

show_card_data = get_show_card_data(driver, media)

if dev_mode:
dev_items = 5
logging.debug('Dev mode: only looking at first dev_items items in list')
show_card_data = show_card_data[0:dev_items]

# logging.debug('Getting all show links from each card')
# # show_card_all_links = []
# # show_card_full_text = []
# show_card_data = []
# for i in range(len(show_cards)):
# show_card_main_link = show_cards[i].find_elements(By.TAG_NAME, 'a')[0].get_dom_attribute('href')
# show_card_full_text = show_cards[i].text
# show_card_data.append([show_card_main_link, show_card_full_text])


'''
show_card_data = [['/us/movie/oppenheimer', "Oppenheimer (2023)\nThe story of J. Robert Oppenheimer's role in the development of the atomic bomb during World War II.\n8.4\n29 offers available"], ['/us/movie/killers-of-the-flower-moon', 'Killers of the Flower Moon (2023)\nWhen oil is discovered in 1920s Oklahoma under Osage Nation land, the Osage people are murdered one by one—until the FBI steps in to unravel the mystery.\n7.7\nWatch now'], ['/us/movie/everything-everywhere-all-at-once', "Everything Everywhere All at Once (2022)\nAn aging Chinese immigrant is swept up in an insane adventure, where she alone can save what's important to her by connecting with the lives she could have led in other universes.\n7.8\nWatch now"], ['/us/movie/asteroid-city', 'Asteroid City (2023)\nIn an American desert town circa 1955, the itinerary of a Junior Stargazer/Space Cadet convention is spectacularly disrupted by world-changing events.\n6.5\nWatch now'], ['/us/movie/dumb-money', "Dumb Money (2023)\nDavid vs. Goliath tale about everyday people who flipped the script on Wall Street and got rich by turning GameStop (the video game store) into the world's hottest company.\n6.9\nWatch now"]]
show_card_data = [['/us/tv-show/scrubs', 'TV\nScrubs\nS6 E5\n+17\nMy Friend with Money\nWatch now'], ['/us/tv-show/love-on-the-spectrum-u-s', 'TV\nLove on the Spectrum U.S.\nS2 E2\n+5\nSeason 2\nWatch now']]
Expand Down Expand Up @@ -275,12 +256,6 @@ def scrape_justwatch(media):
show_db += activity_list

# Save my work
import modules.data_bin_convert # TODO this probably doesn't belong here
# if media == 'movies':
# modules.data_bin_convert.data_to_bin(activity_list, './my_data/saved_data_movies.bin')
# else:
# modules.data_bin_convert.data_to_bin(activity_list, './my_data/saved_data_tv.bin')
# data_list_everything = modules.data_bin_convert.bin_to_data()
modules.data_bin_convert.data_to_bin(show_db, './my_data/saved_data.bin')


Expand Down
13 changes: 11 additions & 2 deletions modules/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,24 @@ def percent_complete(minutes_left, minutes_total):
# minutes_left = modules.runtime.time_left_in_tv_series(season_data,50,3,21)
# minutes_total = modules.runtime.time_left_in_tv_series(season_data,50)
# pct_done = modules.runtime.percent_complete(minutes_left, minutes_total)
return (minutes_total - minutes_left) / minutes_total
try:
return (minutes_total - minutes_left) / minutes_total
except:
return 0


def time_left_in_tv_series_report(show_list):
time_info = []
for i in range(len(show_list)):
if isinstance(show_list[i], Tvshow):
show_title = modules.shield.generate_shield_text(show_list[i])
current_season, current_episode = show_list[i].next_episode.replace('S', '').replace('E', '').split(' ')

# print(show_list[i].next_episode)
try:
current_season, current_episode = show_list[i].next_episode.replace('S', '').replace('E', '').split(' ')
except:
current_season, current_episode = (1, 1)

season_data = show_list[i].season_data
runtime = show_list[i].duration
# episodes_left
Expand Down
9 changes: 7 additions & 2 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@
# Scrape your data from JustWatch and stor in .bin files for later
# These can take a while, so it can sometimes be useful to store your data, especially while developing
logging.info('Scraping data from JustWatch')
modules.justwatch.scrape_justwatch('TV')
modules.justwatch.scrape_justwatch('Movies')

# TV in progress
modules.justwatch.scrape_justwatch('https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=continue_watching')
# TV not started
modules.justwatch.scrape_justwatch('https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=havent_started')
# Movies
modules.justwatch.scrape_justwatch('https://www.justwatch.com/us/lists/my-lists?content_type=movie&sort_by=popular_30_day')

# Read all genres from scraped data and store in .bin file for later
logging.info('Reading genres from scraped data')
Expand Down

0 comments on commit 0d7333c

Please sign in to comment.