-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrump_approval.py
35 lines (29 loc) · 1.46 KB
/
trump_approval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
if __name__ == '__main__':
# Load the polling data
polls = pd.read_csv('approval_polllist.csv', parse_dates=['createddate'])
top_pollsters = ['Gallup', 'Ipsos', 'Morning Consult', 'Rasmussen Reports/Pulse Opinion Research', 'YouGov']
polls = polls.query('president == "Donald Trump" and subgroup == "All polls" and pollster in @top_pollsters')
polls = polls[['pollster', 'createddate', 'adjusted_approve']]
polls = polls.pivot_table(values='adjusted_approve', index='createddate', columns='pollster')
# Extract the topline, which the approval score from FiveThirtyEight
top_line = pd.read_csv('approval_topline.csv', parse_dates=['modeldate'])\
.query('president == "Donald Trump" and subgroup == "All polls"')\
.sort_values('modeldate')
top_line = top_line[['modeldate', 'approve_estimate']]
# Join the two together
hist = top_line.join(polls, on='modeldate')
hist['modeldate'] = hist['modeldate'].apply(lambda x: x.toordinal())
hist = hist.rename(columns={
'modeldate': 'ordinal_date',
'approve_estimate': 'five_thirty_eight',
'Rasmussen Reports/Pulse Opinion Research': 'rasmussen',
'Gallup': 'gallup',
'Ipsos': 'ipsos',
'Morning Consult': 'morning_consult',
'YouGov': 'you_gov'
})
hist = hist.ffill()
hist = hist[~hist.isnull().any(axis='columns')]
# Save
hist.to_csv('trump_approval.csv', index=False)