|
| 1 | +import string |
| 2 | +import requests |
| 3 | +from BeautifulSoup import BeautifulSoup |
| 4 | +import unicodecsv |
| 5 | + |
| 6 | +OFFICES = ['United States Senator, Isakson', 'Governor', 'Lieutenant Governor', 'Secretary Of State', 'Attorney General', |
| 7 | +'State School Superintendent', 'Commissioner Of Insurance', 'Commissioner Of Agriculture', 'Commissioner Of Labor', |
| 8 | +'U.S. Representative, District 1', 'U.S. Representative, District 2', 'U.S. Representative, District 3', 'U.S. Representative, District 4', |
| 9 | +'U.S. Representative, District 5', 'U.S. Representative, District 6', 'U.S. Representative, District 7', 'U.S. Representative, District 8', |
| 10 | +'U.S. Representative, District 9', 'U.S. Representative, District 10', 'U.S. Representative, District 11', |
| 11 | +'U.S. Representative, District 12', 'U.S. Representative, District 13'] |
| 12 | + |
| 13 | +def parse_statewide_url(url): |
| 14 | + contests = [] |
| 15 | + r = requests.get(url) |
| 16 | + soup = BeautifulSoup(r.text) |
| 17 | + offices = soup.findAll('strong')[1:] |
| 18 | + for office in offices: |
| 19 | + if office.text in OFFICES: |
| 20 | + o = office.text |
| 21 | + |
| 22 | +def get_candidates(candidates_row): |
| 23 | + candidates = [] |
| 24 | + for cell in candidates_row.findAll('td'): |
| 25 | + if cell.text == ' ': |
| 26 | + continue |
| 27 | + elif cell.findAll('br')[0].previous.strip() == 'Totals': |
| 28 | + continue |
| 29 | + else: |
| 30 | + candidates.append({'name': cell.findAll('br')[0].previous.strip(), 'party': cell.findAll('br')[1].next.strip().replace('(','').replace(')',''), |
| 31 | + 'total_votes': cell.findAll('br')[3].previous.strip().replace(',',''), 'counties': []}) |
| 32 | + return candidates |
| 33 | + |
| 34 | +def parse_county_results(url): |
| 35 | + r = requests.get(url) |
| 36 | + soup = BeautifulSoup(r.text) |
| 37 | + table = soup.findAll('table')[2] |
| 38 | + rows = table.findAll('tr') |
| 39 | + candidates = get_candidates(rows[0].find('table').find('tr')) |
| 40 | + for row in rows[4:]: |
| 41 | + county_name = row.find('td').text.strip() |
| 42 | + for idx, r in enumerate(row.findAll('td')[2:-1]): |
| 43 | + candidate = candidates[idx] |
| 44 | + candidate['counties'].append({"county": county_name, 'votes': r.text.replace(',','')}) |
| 45 | + return candidates |
| 46 | + |
| 47 | +def get_county_results(url, file_name, office, district): |
| 48 | + results = parse_county_results(url) |
| 49 | + with open(file_name, 'wb') as csvfile: |
| 50 | + w = unicodecsv.writer(csvfile, encoding='utf-8') |
| 51 | + w.writerow(['county', 'office', 'district', 'party', 'candidate', 'votes']) |
| 52 | + for result in results: |
| 53 | + for county in result['counties']: |
| 54 | + w.writerow([county['county'], office, district, result['party'], result['name'], county['votes']]) |
| 55 | + |
| 56 | +def get_state_senate(base_url, districts): |
| 57 | + with open('state_senate.csv', 'wb') as csvfile: |
| 58 | + w = unicodecsv.writer(csvfile, encoding='utf-8') |
| 59 | + for district in range(1, districts): |
| 60 | + url = base_url + string.zfill(str(district), 2)+'.htm' |
| 61 | + print url |
| 62 | + results = parse_county_results(url) |
| 63 | + for result in results: |
| 64 | + for county in result['counties']: |
| 65 | + w.writerow([county['county'], 'State Senate', district, result['party'], result['name'], county['votes']]) |
| 66 | + |
| 67 | +def get_state_house(base_url, districts): |
| 68 | + with open('state_house.csv', 'wb') as csvfile: |
| 69 | + w = unicodecsv.writer(csvfile, encoding='utf-8') |
| 70 | + for district in range(501, districts): |
| 71 | + url = base_url + str(district)+'.htm' |
| 72 | + print url |
| 73 | + d = district - 500 |
| 74 | + results = parse_county_results(url) |
| 75 | + for result in results: |
| 76 | + for county in result['counties']: |
| 77 | + w.writerow([county['county'], 'State House', d, result['party'], result['name'], county['votes']]) |
0 commit comments