forked from ptwobrussell/Recipes-for-Mining-Twitter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecipe__geocode_profile_locations.py
102 lines (70 loc) · 3.45 KB
/
recipe__geocode_profile_locations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
import sys
from urllib2 import HTTPError
import geopy
from recipe__oauth_login import oauth_login
from recipe__analyze_users_in_search_results import analyze_users_in_search_results
def geocode_locations(geocoder, locations):
# Some basic replacement transforms may be necessary for geocoding services to
# function properly. You may probably need to add your own as you encounter rough
# edges in the data or with the geocoding service you settle on. For example, ...
replacement_transforms = [('San Francisco Bay', 'San Francisco')]
location_to_coords = {}
location_to_description = {}
for location in locations:
# Avoid unnecessary I/O with a simple cache
if location_to_coords.has_key(location):
continue
xformed_location = location
for transform in replacement_transforms:
xformed_location = xformed_location.replace(*transform)
while True:
num_errors = 0
try:
# This call returns a generator
results = geocoder.geocode(xformed_location, exactly_one=False)
break
except HTTPError, e:
num_errors += 1
if num_errors >= MAX_HTTP_ERRORS:
sys.exit()
print >> sys.stderr, e.message
print >> sys.stderr, 'A urllib2 error. Retrying...'
except UnicodeEncodeError, e:
print >> sys.stderr, e
print >> sys.stderr, 'A UnicodeEncodeError...', e.message
break
except geopy.geocoders.google.GQueryError, e:
print >> sys.stderr, e
print >> sys.stderr, 'A GQueryError', e.message
break
for result in results:
# Each result is of the form ("Description", (X,Y))
# Unless you have a some special logic for picking the best of many
# possible results, choose the first one returned in results and move
# along
location_to_coords[location] = result[1]
location_to_description[location] = result[0]
break
# Use location_to_coords and other information of interest to populate a
# visualization. Depending on your particular needs, it is highly likely that
# you'll want to further post process the geocoded locations to filter out
# location such as "U.S.A." which will plot a placemarker in the geographic
# center of the United States yet make the visualization look skewed in favor
# of places like Oklahoma, for example.
return location_to_coords, location_to_description
if __name__ == '__main__':
# Use your own API key here if you use a geocoding service
# such as Google or Yahoo!
GEOCODING_API_KEY = sys.argv[1]
Q = ' '.join(sys.argv[2:])
MAX_HTTP_ERRORS = 100
g = geopy.geocoders.Google(GEOCODING_API_KEY)
# Don't forget to pass in keyword parameters if you don't have
# a token file stored to disk
t = oauth_login()
# This function returns a few useful maps. Let's use the
# screen_name => location map and geocode the locations
_, screen_name_to_location, _ = analyze_users_in_search_results(t, Q, 2)
locations = screen_name_to_location.values()
location2coords, location2description = geocode_locations(g, locations)