Skip to content

Commit 650b76c

Browse files
committed
added county_parser
1 parent 75c41f5 commit 650b76c

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed

county_parser.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import string
2+
import requests
3+
from BeautifulSoup import BeautifulSoup
4+
import unicodecsv
5+
6+
OFFICES = ['United States Senator, Isakson', 'Governor', 'Lieutenant Governor', 'Secretary Of State', 'Attorney General',
7+
'State School Superintendent', 'Commissioner Of Insurance', 'Commissioner Of Agriculture', 'Commissioner Of Labor',
8+
'U.S. Representative, District 1', 'U.S. Representative, District 2', 'U.S. Representative, District 3', 'U.S. Representative, District 4',
9+
'U.S. Representative, District 5', 'U.S. Representative, District 6', 'U.S. Representative, District 7', 'U.S. Representative, District 8',
10+
'U.S. Representative, District 9', 'U.S. Representative, District 10', 'U.S. Representative, District 11',
11+
'U.S. Representative, District 12', 'U.S. Representative, District 13']
12+
13+
def parse_statewide_url(url):
14+
contests = []
15+
r = requests.get(url)
16+
soup = BeautifulSoup(r.text)
17+
offices = soup.findAll('strong')[1:]
18+
for office in offices:
19+
if office.text in OFFICES:
20+
o = office.text
21+
22+
def get_candidates(candidates_row):
23+
candidates = []
24+
for cell in candidates_row.findAll('td'):
25+
if cell.text == ' ':
26+
continue
27+
elif cell.findAll('br')[0].previous.strip() == 'Totals':
28+
continue
29+
else:
30+
candidates.append({'name': cell.findAll('br')[0].previous.strip(), 'party': cell.findAll('br')[1].next.strip().replace('(','').replace(')',''),
31+
'total_votes': cell.findAll('br')[3].previous.strip().replace(',',''), 'counties': []})
32+
return candidates
33+
34+
def parse_county_results(url):
35+
r = requests.get(url)
36+
soup = BeautifulSoup(r.text)
37+
table = soup.findAll('table')[2]
38+
rows = table.findAll('tr')
39+
candidates = get_candidates(rows[0].find('table').find('tr'))
40+
for row in rows[4:]:
41+
county_name = row.find('td').text.strip()
42+
for idx, r in enumerate(row.findAll('td')[2:-1]):
43+
candidate = candidates[idx]
44+
candidate['counties'].append({"county": county_name, 'votes': r.text.replace(',','')})
45+
return candidates
46+
47+
def get_county_results(url, file_name, office, district):
48+
results = parse_county_results(url)
49+
with open(file_name, 'wb') as csvfile:
50+
w = unicodecsv.writer(csvfile, encoding='utf-8')
51+
w.writerow(['county', 'office', 'district', 'party', 'candidate', 'votes'])
52+
for result in results:
53+
for county in result['counties']:
54+
w.writerow([county['county'], office, district, result['party'], result['name'], county['votes']])
55+
56+
def get_state_senate(base_url, districts):
57+
with open('state_senate.csv', 'wb') as csvfile:
58+
w = unicodecsv.writer(csvfile, encoding='utf-8')
59+
for district in range(1, districts):
60+
url = base_url + string.zfill(str(district), 2)+'.htm'
61+
print url
62+
results = parse_county_results(url)
63+
for result in results:
64+
for county in result['counties']:
65+
w.writerow([county['county'], 'State Senate', district, result['party'], result['name'], county['votes']])
66+
67+
def get_state_house(base_url, districts):
68+
with open('state_house.csv', 'wb') as csvfile:
69+
w = unicodecsv.writer(csvfile, encoding='utf-8')
70+
for district in range(501, districts):
71+
url = base_url + str(district)+'.htm'
72+
print url
73+
d = district - 500
74+
results = parse_county_results(url)
75+
for result in results:
76+
for county in result['counties']:
77+
w.writerow([county['county'], 'State House', d, result['party'], result['name'], county['votes']])

0 commit comments

Comments
 (0)