diff --git a/BeautifulSoup/scrape.py b/BeautifulSoup/scrape.py index 42a191f1c..dedfeae1d 100644 --- a/BeautifulSoup/scrape.py +++ b/BeautifulSoup/scrape.py @@ -2,36 +2,28 @@ import requests import csv -source = requests.get('http://coreyms.com').text +with open('cms_scrape.csv', 'w') as csv_file: + csv_writer = csv.writer(csv_file) + csv_writer.writerow(['headline', 'summary', 'video_link']) + source = requests.get('http://coreyms.com').text + soup = BeautifulSoup(source, 'lxml') -soup = BeautifulSoup(source, 'lxml') + threads = [] -csv_file = open('cms_scrape.csv', 'w') + for article in soup.find_all('article'): + headline = article.h2.a.text + summary = article.find('div', class_='entry-content').p.text -csv_writer = csv.writer(csv_file) -csv_writer.writerow(['headline', 'summary', 'video_link']) + try: + yt_link = f'https://youtube.com/watch?v={article.find("iframe", class_="youtube-player")["src"].split("/")[4].split("?")[0]}' + except Exception as e: + yt_link = None -for article in soup.find_all('article'): - headline = article.h2.a.text - print(headline) + t = threading.Thread(target=csv_writer.writerow, args=([headline, summary, yt_link])) + threads.append(t) + t.start() - summary = article.find('div', class_='entry-content').p.text - print(summary) - - try: - vid_src = article.find('iframe', class_='youtube-player')['src'] - - vid_id = vid_src.split('/')[4] - vid_id = vid_id.split('?')[0] - - yt_link = f'https://youtube.com/watch?v={vid_id}' - except Exception as e: - yt_link = None - - print(yt_link) - - print() - - csv_writer.writerow([headline, summary, yt_link]) +for thread in threads: + thread.join() csv_file.close()