-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclimatechange1.py
129 lines (72 loc) · 2.55 KB
/
climatechange1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 11 21:35:52 2021
@author: peace
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import collections
import tweepy as tw
import nltk
from nltk import bigrams
from nltk.corpus import stopwords
import re
import networkx as nx
from ..credentials import consumer_key, consumer_secret, \
access_token, access_token_secret
import warnings
warnings.filterwarnings("ignore")
sns.set(font_scale=1.5)
sns.set_style("whitegrid")
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)
search_term = "#climate+change -filter:retweets"
tweets = tw.Cursor(api.search,
q=search_term,
lang="en",
since='2018-11-01').items(1000)
def remove_url(txt):
url_pattern = re.compile(r'https?://\S+|www\.\S+')
no_url = url_pattern.sub(r'', txt)
return no_url
tweets_no_urls = [remove_url(tweet.text) for tweet in tweets]
words_in_tweet = [tweet.lower().split() for tweet in tweets_no_urls]
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
tweets_nsw = [[word for word in tweet_words if not word in stop_words]
for tweet_words in words_in_tweet]
collection_words = ['climatechange', 'climate', 'change']
tweets_nsw_nc = [[w for w in word if not w in collection_words]
for word in tweets_nsw]
terms_bigram = [list(bigrams(tweet)) for tweet in tweets_nsw_nc]
bigrams = list(itertools.chain(*terms_bigram))
bigram_counts = collections.Counter(bigrams)
bigram_counts.most_common(20)
bigram_df = pd.DataFrame(bigram_counts.most_common(20),
columns=['bigram', 'count'])
d = bigram_df.set_index('bigram').T.to_dict('records')
G = nx.Graph()
for k, v in d[0].items():
G.add_edge(k[0], k[1], weight=(v * 10))
G.add_node("china", weight=100)
fig, ax = plt.subplots(figsize=(10, 8))
pos = nx.spring_layout(G, k=2)
nx.draw_networkx(G, pos,
font_size=16,
width=3,
edge_color='grey',
node_color='purple',
with_labels = False,
ax=ax)
for key, value in pos.items():
x, y = value[0]+.135, value[1]+.045
ax.text(x, y,
s=key,
bbox=dict(facecolor='red', alpha=0.25),
horizontalalignment='center', fontsize=13)
plt.show()