-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeeches.py
executable file
·112 lines (82 loc) · 2.97 KB
/
speeches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
from datetime import datetime
from functools import lru_cache
import requests
import click
import csv
BABEL_URL = 'https://babel.labhackercd.leg.br/api/v1/manifestations'
def get_data(url=None, **kwargs):
click.echo('.', nl=False)
if url is None:
response = requests.get(BABEL_URL, params=kwargs)
else:
response = requests.get(url)
response_data = response.json()
return_data = response_data['results']
if response_data['next']:
return_data += get_data(response_data['next'])
return return_data
def filter_by_stage(stage, data):
filtered = []
for speech in data:
for attribute in speech['attrs']:
if attribute['field'] == 'fase' and attribute['value'] == stage:
filtered.append(speech)
break
return filtered
@lru_cache()
def get_profile_author_data(url):
response = requests.get(url)
return response.json()
def format_data(speech_data):
meta = {}
meta['id'] = speech_data['id_in_channel']
author = get_profile_author_data(speech_data['profile'])
meta['author_name'] = author['id_in_channel']
for attr in author['attrs']:
if attr['field'] == 'partido':
meta['author_party'] = attr['value']
if attr['field'] == 'UF':
meta['author_region'] = attr['value']
for attr in speech_data['attrs']:
if attr['field'] == 'dtDiscurso':
meta['date'] = attr['value']
if attr['field'] == 'dtAtualizacao':
meta['updated_at'] = attr['value']
if attr['field'] == 'fase':
meta['stage'] = attr['value']
if attr['field'] == 'original':
meta['speech'] = attr['value']
return meta
def write_csv(speeches):
csv_file = open('speeches.csv', mode='w')
fieldnames = ['speech', 'id', 'author_name', 'author_party',
'author_region', 'date', 'updated_at', 'stage']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for speech in speeches:
writer.writerow(speech)
csv_file.close()
@click.command()
@click.argument('initial_date')
@click.argument('end_date')
@click.option('--stage', '-s', help='Initials from speech stage. For example, PE to \'Pequeno Expediente\'')
def main(initial_date, end_date, stage):
datetime.strptime(initial_date, '%Y-%m-%d')
datetime.strptime(end_date, '%Y-%m-%d')
click.echo('Fetching speeches from {} to {}'.format(initial_date, end_date))
data = get_data(
manifestation_type__id=1,
timestamp__gte=initial_date,
timestamp__lte=end_date,
)
if stage:
click.echo('\nFiltering only {} speeches'.format(stage))
data = filter_by_stage(stage, data)
speeches = []
with click.progressbar(data, label='Formatting data') as bar:
speeches = [format_data(speech) for speech in bar]
click.echo('Writing csv')
write_csv(speeches)
if __name__ == '__main__':
main()