Merge branch 'skadogg/issue101'

skadogg · Feb 24, 2024 · 0d7333c · 0d7333c
2 parents d9dfa3b + eed2678
commit 0d7333c
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 37 deletions.
diff --git a/modules/justwatch.py b/modules/justwatch.py
@@ -7,14 +7,14 @@
 from selenium.webdriver.support.ui import WebDriverWait
 import logging
 import modules.auto_sign_in
+import modules.data_bin_convert
 import modules.html
 import modules.ld_json
 import modules.runtime
 import os
 import random
 import time
 
-
 def get_titles_count(driver):
     try:
         # Reads number of titles from top of page, e.g. "887 titles," and converts to int
@@ -81,12 +81,13 @@ def balance_movie_and_tv_lists(movie_list, tv_list, good_ratio=0.8):
     return bigger_list + smaller_list
 
 
-def scrape_justwatch(media):
+def scrape_justwatch(url):
     # Scrape your data from JustWatch.
-    # media should be either 'tv' or 'movies'
-    import modules.auto_sign_in  # TODO this probably doesn't belong here
-
-    media = media.lower()
+    if '/tv-show/' in url or 'content_type=show' in url or '/tv-show-tracking' in url:
+        media = 'tv'
+    else:
+        media = 'movies'
+
     logging.debug(f'{media=}')
     load_dotenv(dotenv_path='./.env')
 
@@ -105,10 +106,7 @@ def scrape_justwatch(media):
     driver = webdriver.Chrome(options=options)
     driver.set_page_load_timeout(60)
 
-    if media == 'movies':
-        driver.get('https://www.justwatch.com/us/lists/my-lists?content_type=movie&sort_by=popular_30_day')
-    else:
-        driver.get('https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=continue_watching')
+    driver.get(url)
 
     driver.maximize_window()
     # driver.implicitly_driwait(1.0)
@@ -125,30 +123,13 @@ def scrape_justwatch(media):
     logging.debug('Scrolling to bottom of page')
     scroll_down(driver)
 
-    # # Get name, episode number/title, left in season, main show link from main watchlist
-    # logging.debug('Getting all show cards from main page')
-    # if media == 'movies':
-    #     show_cards = driver.find_elements(By.XPATH, '//div[@class="title-card-basic title-card-basic"]')
-    # else:
-    #     show_cards = driver.find_elements(By.XPATH, '//div[@class="title-card-basic title-card-show-episode"]')
-
     show_card_data = get_show_card_data(driver, media)
 
     if dev_mode:
         dev_items = 5
         logging.debug('Dev mode: only looking at first dev_items items in list')
         show_card_data = show_card_data[0:dev_items]
 
-    # logging.debug('Getting all show links from each card')
-    # # show_card_all_links = []
-    # # show_card_full_text = []
-    # show_card_data = []
-    # for i in range(len(show_cards)):
-    #     show_card_main_link = show_cards[i].find_elements(By.TAG_NAME, 'a')[0].get_dom_attribute('href')
-    #     show_card_full_text = show_cards[i].text
-    #     show_card_data.append([show_card_main_link, show_card_full_text])
-
-
     '''
     show_card_data = [['/us/movie/oppenheimer', "Oppenheimer (2023)\nThe story of J. Robert Oppenheimer's role in the development of the atomic bomb during World War II.\n8.4\n29 offers available"], ['/us/movie/killers-of-the-flower-moon', 'Killers of the Flower Moon (2023)\nWhen oil is discovered in 1920s Oklahoma under Osage Nation land, the Osage people are murdered one by one—until the FBI steps in to unravel the mystery.\n7.7\nWatch now'], ['/us/movie/everything-everywhere-all-at-once', "Everything Everywhere All at Once (2022)\nAn aging Chinese immigrant is swept up in an insane adventure, where she alone can save what's important to her by connecting with the lives she could have led in other universes.\n7.8\nWatch now"], ['/us/movie/asteroid-city', 'Asteroid City (2023)\nIn an American desert town circa 1955, the itinerary of a Junior Stargazer/Space Cadet convention is spectacularly disrupted by world-changing events.\n6.5\nWatch now'], ['/us/movie/dumb-money', "Dumb Money (2023)\nDavid vs. Goliath tale about everyday people who flipped the script on Wall Street and got rich by turning GameStop (the video game store) into the world's hottest company.\n6.9\nWatch now"]]
     show_card_data = [['/us/tv-show/scrubs', 'TV\nScrubs\nS6 E5\n+17\nMy Friend with Money\nWatch now'], ['/us/tv-show/love-on-the-spectrum-u-s', 'TV\nLove on the Spectrum U.S.\nS2 E2\n+5\nSeason 2\nWatch now']]
@@ -275,12 +256,6 @@ def scrape_justwatch(media):
     show_db += activity_list
 
     # Save my work
-    import modules.data_bin_convert  # TODO this probably doesn't belong here
-    # if media == 'movies':
-    #     modules.data_bin_convert.data_to_bin(activity_list, './my_data/saved_data_movies.bin')
-    # else:
-    #     modules.data_bin_convert.data_to_bin(activity_list, './my_data/saved_data_tv.bin')
-    # data_list_everything = modules.data_bin_convert.bin_to_data()
     modules.data_bin_convert.data_to_bin(show_db, './my_data/saved_data.bin')
 
 

diff --git a/modules/runtime.py b/modules/runtime.py
@@ -55,15 +55,24 @@ def percent_complete(minutes_left, minutes_total):
     #     minutes_left = modules.runtime.time_left_in_tv_series(season_data,50,3,21)
     #     minutes_total = modules.runtime.time_left_in_tv_series(season_data,50)
     #     pct_done = modules.runtime.percent_complete(minutes_left, minutes_total)
-    return (minutes_total - minutes_left) / minutes_total
+    try:
+        return (minutes_total - minutes_left) / minutes_total
+    except:
+        return 0
 
 
 def time_left_in_tv_series_report(show_list):
     time_info = []
     for i in range(len(show_list)):
         if isinstance(show_list[i], Tvshow):
             show_title = modules.shield.generate_shield_text(show_list[i])
-            current_season, current_episode = show_list[i].next_episode.replace('S', '').replace('E', '').split(' ')
+
+            # print(show_list[i].next_episode)
+            try:
+                current_season, current_episode = show_list[i].next_episode.replace('S', '').replace('E', '').split(' ')
+            except:
+                current_season, current_episode = (1, 1)
+
             season_data = show_list[i].season_data
             runtime = show_list[i].duration
             # episodes_left

diff --git a/run.py b/run.py
@@ -28,8 +28,13 @@
 # Scrape your data from JustWatch and stor in .bin files for later
 # These can take a while, so it can sometimes be useful to store your data, especially while developing
 logging.info('Scraping data from JustWatch')
-modules.justwatch.scrape_justwatch('TV')
-modules.justwatch.scrape_justwatch('Movies')
+
+# TV in progress
+modules.justwatch.scrape_justwatch('https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=continue_watching')
+# TV not started
+modules.justwatch.scrape_justwatch('https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=havent_started')
+# Movies
+modules.justwatch.scrape_justwatch('https://www.justwatch.com/us/lists/my-lists?content_type=movie&sort_by=popular_30_day')
 
 # Read all genres from scraped data and store in .bin file for later
 logging.info('Reading genres from scraped data')