onlyphantom · onlyphantom · May 20, 2022 · May 21, 2022 · May 23, 2022
diff --git a/utils/comments.py b/utils/comments.py
@@ -1,10 +1,11 @@
 import csv
 from datetime import datetime as dt
 
-comments = []
 today = dt.today().strftime('%d-%m-%Y')
+PATH = 'commentsFolder/'
 
 def process_comments(response_items, csv_output=False):
+    comments = []
 
     for res in response_items:
 
@@ -29,13 +30,19 @@ def process_comments(response_items, csv_output=False):
     return comments
 
 
-def make_csv(comments, channelID=None):
+def make_csv(comments, channelID=None, videoID=None):
+    # Handle 0 comments issue
+    if len(comments) == 0:
+        return
+
     header = comments[0].keys()
 
-    if channelID:
-        filename = f'comments_{channelID}_{today}.csv'
+    if channelID and videoID:
+        filename = f'{PATH}comments_{channelID}_{videoID}_{today}.csv'
+    elif channelID:
+        filename = f'{PATH}comments_{channelID}_{today}.csv'
     else:
-        filename = f'comments_{today}.csv'
+        filename = f'{PATH}comments_{today}.csv'
 
     with open(filename, 'w', encoding='utf8', newline='') as f:
         writer = csv.DictWriter(f, fieldnames=header)

diff --git a/yt_public.py b/yt_public.py
@@ -1,15 +1,28 @@
 import os
+import csv
+from datetime import datetime as dt
+from urllib import response
 from dotenv import load_dotenv
 from googleapiclient.discovery import build
 
 from utils.comments import process_comments, make_csv
 
 load_dotenv()
-API_KEY = os.getenv("API_KEY")
+API_KEY_1 = os.getenv("API_KEY_1")
+API_KEY_2 = os.getenv("API_KEY_2")
+API_KEY_3 = os.getenv("API_KEY_3")
+API_KEY_4 = os.getenv("API_KEY_4")
+API_KEY_5 = os.getenv("API_KEY_5")
 
-youtube = build("youtube", "v3", developerKey=API_KEY)
+youtube_1 = build("youtube", "v3", developerKey=API_KEY_1)
+youtube_2 = build("youtube", "v3", developerKey=API_KEY_2)
+youtube_3 = build("youtube", "v3", developerKey=API_KEY_3)
+youtube_4 = build("youtube", "v3", developerKey=API_KEY_4)
+youtube_5 = build("youtube", "v3", developerKey=API_KEY_5)
 
-def search_result(query):
+scraped_videos = {}
+
+def search_result(youtube, query):
     """
     Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.search.html
     """
@@ -21,87 +34,167 @@ def search_result(query):
 
     return request.execute()
 
-def channel_stats(channelID):
+def get_video_ids(youtube, channelId):
     """
-    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.channels.html
+    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.search.html
     """
+    videoIds = []
+
     request = youtube.channels().list(
-        part="statistics",
-        id=channelID
+        part="contentDetails",
+        id=channelId
     )
-    return request.execute()
 
-def comment_threads(channelID, to_csv=False):
-
-    comments_list = []
-
-    request = youtube.commentThreads().list(
-        part='id,replies,snippet',
-        videoId=channelID,
+    response = request.execute()
+
+    playlistId = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
+
+    request = youtube.playlistItems().list(
+        part="contentDetails",
+        playlistId=playlistId,
+        maxResults=50
     )
+
     response = request.execute()
-    comments_list.extend(process_comments(response['items']))
+    responseItems = response['items']
+
+    videoIds.extend([item['contentDetails']['videoId'] for item in responseItems])
 
     # if there is nextPageToken, then keep calling the API
     while response.get('nextPageToken', None):
-        request = youtube.commentThreads().list(
-            part='id,replies,snippet',
-            videoId=channelID,
+        print(f'Fetching next page of videos for {channelId}_{playlistId}')
+        request = youtube.playlistItems().list(
+            part="contentDetails",
+            playlistId=playlistId,
+            maxResults=50,
             pageToken=response['nextPageToken']
         )
         response = request.execute()
-        comments_list.extend(process_comments(response['items']))
-
-    print(f"Finished fetching comments for {channelID}. {len(comments_list)} comments found.")
-
-    if to_csv:
-        make_csv(comments_list, channelID)
+        responseItems = response['items']
+
+        videoIds.extend([item['contentDetails']['videoId'] for item in responseItems])
 
-    return comments_list
+    print(f"Finished fetching videoIds for {channelId}. {len(videoIds)} videos found.")
 
+    return videoIds
 
-def get_video_ids(channelId):
+def channel_stats(youtube, channelIDs, to_csv=False):
     """
-    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.search.html
+    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.channels.html
     """
-    videoIds = []
-
-    request = youtube.search().list(
-        part="snippet",
-        channelId=channelId,
-        type="video",
-        maxResults=50,
-        order="date"
-    )
+    if type(channelIDs) == str:
+        channelIDs = [channelIDs]
 
-    response = request.execute()
-    responseItems = response['items']
-
-    videoIds.extend([item['id']['videoId'] for item in responseItems if item['id'].get('videoId', None) != None])
+    stats_list = []
 
-    # if there is nextPageToken, then keep calling the API
-    while response.get('nextPageToken', None):
-        request = youtube.search().list(
-            part="snippet",
-            channelId=channelId,
+    for channelId in channelIDs:
+        request = youtube.channels().list(
+            part="statistics",
+            id=channelId
         )
         response = request.execute()
-        responseItems = response['items']
+        response = response['items'][0]['statistics']
+        response['channelId'] = channelId
+
+        stats_list.append(response)
+
+    if to_csv:
+        header = stats_list[0].keys()
+        with open(f'channelStats.csv', 'w') as f:
+            writer = csv.DictWriter(f, fieldnames=header)
+            writer.writeheader()
+            writer.writerows(stats_list)
+
+    return stats_list
 
-        videoIds.extend([item['id']['videoId'] for item in responseItems if item['id'].get('videoId', None) != None])
+def video_stats(youtube, videoIDs, channelID, to_csv=False):
+    if type(videoIDs) == str:
+        videoIDs = [videoIDs]
 
-    print(f"Finished fetching videoIds for {channelId}. {len(videoIds)} videos found.")
+    stats_list = []
 
-    return videoIds
+    for videoId in videoIDs:
+        request = youtube.videos().list(
+            part="snippet, statistics, contentDetails",
+            id=videoId
+        )
+        response = request.execute()
+        statistics = response['items'][0]['statistics']
+        snippet = response['items'][0]['snippet']
+        statistics['videoId'] = videoId
+        statistics['title'] = snippet['title']
+        statistics['description'] = snippet['description']
+        statistics['publishedAt'] = snippet['publishedAt']
+        statistics['duration'] = response['items'][0]['contentDetails']['duration']
+        statistics['thumbnail'] = snippet['thumbnails']['high']['url']
+        statistics['channelId'] = channelID
+        statistics['likeCount'] = statistics.get('likeCount', 0)
+
+        print(f"Fetched stats for {videoId}")
+        stats_list.append(statistics)
+
+    if to_csv:
+        header = stats_list[0].keys()
+        with open(f'videosFolder/videoStats_{channelID}.csv', 'w', encoding='utf8', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=header)
+            writer.writeheader()
+            writer.writerows(stats_list)
+
+    print(f'Success in fetching video stats for {channelID}')
 
+    return stats_list
 
 
-if __name__ == '__main__':
-    pyscriptVidId = 'Qo8dXyKXyME'
-    channelId = 'UCzIxc8Vg53_ewaRIk3shBug'
+def comment_threads(youtube, videoID, channelID=None, to_csv=False):
+
+    comments_list = []
+
+    try:
+        request = youtube.commentThreads().list(
+            part='id,replies,snippet',
+            videoId=videoID,
+        )
+        response = request.execute()
+    except Exception as e:
+        print(f'Error fetching comments for {videoID} - error: {e}')
+        if scraped_videos.get('error_ids', None):
+            scraped_videos['error_ids'].append(videoID)
+        else:
+            scraped_videos['error_ids'] = [videoID]
+        return
 
-    # response = search_result("pyscript")
-    response = channel_stats(channelId) 
-    # response = comment_threads(pyscriptVidId, to_csv=True)
+    comments_list.extend(process_comments(response['items']))
 
-    print(response)
+    # if there is nextPageToken, then keep calling the API
+    while response.get('nextPageToken', None):
+        request = youtube.commentThreads().list(
+            part='id,replies,snippet',
+            videoId=videoID,
+            pageToken=response['nextPageToken']
+        )
+        response = request.execute()
+        comments_list.extend(process_comments(response['items']))
+
+    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
+
+    if to_csv:
+        try:
+            make_csv(comments_list, channelID, videoID)
+        except Exception as e:
+            print(f'Error writing comments to csv for {videoID} - error: {e}')
+            if scraped_videos.get('error_csv_ids', None):
+                scraped_videos['error_csv_ids'].append(videoID)
+            else:
+                scraped_videos['error_csv_ids'] = [videoID]
+            return
+
+    if scraped_videos.get(channelID, None):
+        scraped_videos[channelID].append(videoID)
+    else:
+        scraped_videos[channelID] = [videoID]
+
+    return comments_list
+
+if __name__ == '__main__':
+    pyscriptVidId = 'Qo8dXyKXyME'
+    channelId = 'UCzIxc8Vg53_ewaRIk3shBug'