-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnews_scraper.py
executable file
·74 lines (61 loc) · 2.28 KB
/
news_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import newspaper
import africastalking as at
import bitlyshortener as bts
from dotenv import load_dotenv
load_dotenv()
# get the environment values from the .env file
api_key = os.getenv('api_key')
username = os.getenv('username')
mobile_number = os.getenv('mobile_number')
# Initialize the Africas talking client using username and api_key
at.initialize(username, api_key)
# create a variable to reference the SMS client
sms = at.SMS
# Get the token(s) and create a shortener variable
token = os.getenv('bitly_token')
# create a variable to takes the token and returns a shortener object
shortener = bts.Shortener(tokens=[token], max_cache_size=256)
# create variables to hold urls to be scraped
business_daily = "https://www.businessdailyafrica.com/bd/corporate/technology"
standard_daily = "https://www.standardmedia.co.ke/"
nation_daily = "https://www.nation.co.ke/"
# create an empty list to hold the headlines and urls
message = []
# Create a function to scrape the top 3 headlines from news sources
def top_news(url):
# get top articles on standard standard
news_source = newspaper.build(url)
top_articles = []
for index in range(3):
article = news_source.articles[index]
article.download()
article.parse()
article.keywords()
top_articles.append(article)
print(article.title)
for a in top_articles:
# Shorten the long article urls using bitly shortener lib
short_url = shortener.shorten_urls([a.url])
message.append(a.title)
# Short url is a list and we need to unpack it
# for url in short_url:
# message.append(url)
return message
top_news("https://cnn.com/")
top_news(business_daily)
#top_news(standard_daily)
# top_news(nation_daily)
print(message)
# Returns a the current usage of the url shortening quota
usage = shortener.usage()
print(f"Current url quota usage: {usage * 1000}%")
# Create a function to send a message containing the scraped news headlines.
def send_message(news: list, number: int):
try:
response = sms.send(news, [number])
print(response)
except Exception as e:
print(f" Houston we have a problem: {e}")
# Call the function passing the message and mobile_number as a arguments
send_message(str(message), mobile_number)