14
14
15
15
16
16
# get data from warehouse
17
- def get_ntd_agencies () :
17
+ def get_ntd_agencies (min_year : str ) -> pd . DataFrame :
18
18
"""
19
19
reads in ntd data from warehouse, filters for CA agencies since 2018.
20
20
groups data by agency and sum their UPT.
@@ -23,7 +23,7 @@ def get_ntd_agencies():
23
23
tbls .mart_ntd_funding_and_expenses .fct_service_data_and_operating_expenses_time_series_by_mode_upt ()
24
24
>> filter (_ .state .str .contains ("CA" ) |
25
25
_ .state .str .contains ("NV" ), # to get lake Tahoe Transportation back
26
- _ .year >= "2018" ,
26
+ _ .year >= min_year ,
27
27
_ .city != None ,
28
28
_ .primary_uza_name .str .contains (", CA" ) |
29
29
_ .primary_uza_name .str .contains ("CA-NV" ) |
@@ -69,25 +69,25 @@ def get_ntd_agencies():
69
69
return ntd_time_series
70
70
71
71
72
- def get_cdp_to_rtpa_map () :
72
+ def get_cdp_to_rtpa_map (rtpa_url : str , cdp_url : str ) -> pd . DataFrame :
73
73
"""
74
74
reads in map of CA census designated places (CDPs)(polygon) and CA RTPA (polygon).
75
75
Get centraiod of CDPS, then sjoin to RTPA map.
76
76
Do some manual cleaning.
77
77
"""
78
78
# RTPA map
79
- rtpa_url = "https://cecgis-caenergy.opendata.arcgis.com/api/download/v1/items/3a83743378be4e7f84c8230889c01dea/geojson?layers=0"
80
- rtpa_map = gpd .read_file (rtpa_url )[
79
+ rtpa_path = rtpa_url
80
+ rtpa_map = gpd .read_file (rtpa_path )[
81
81
["RTPA" , "LABEL_RTPA" , "geometry" ]
82
82
]
83
83
84
84
rtpa_map = rtpa_map .to_crs ("ESRI:102600" ) # for sjoin later
85
85
86
86
# California Census Designated Places (2010), includes cities and CDPs
87
- cdp_url = "https://services6.arcgis.com/YBp5dUuxCMd8W1EI/arcgis/rest/services/California_Census_Designated_Places_2010/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
87
+ cdp_path = cdp_url
88
88
keep_cdp_col = ["FID" , "NAME10" , "NAMELSAD10" , "geometry" ]
89
89
90
- cdp_map = gpd .read_file (cdp_url )[keep_cdp_col ].rename (
90
+ cdp_map = gpd .read_file (cdp_path )[keep_cdp_col ].rename (
91
91
columns = {"NAME10" : "cdp_name" , "NAMELSAD10" : "name_lsad" }
92
92
)
93
93
@@ -120,14 +120,14 @@ def get_cdp_to_rtpa_map():
120
120
return city_to_rtpa
121
121
122
122
123
- def merge_agencies_to_rtpa_map () :
123
+ def merge_agencies_to_rtpa_map (ntd_df : pd . DataFrame , city_rtpa_df : pd . DataFrame ) -> pd . DataFrame :
124
124
"""
125
125
merges the ntd data and rtpa data from `get_ntd_agencies` and `get_cdp_to_rtpa_map`.
126
126
does some manual updating.
127
127
"""
128
128
# merge
129
- alt_ntd_to_rtpa = ntd_time_series .merge (
130
- city_to_rtpa [["cdp_name" , "RTPA" ]],
129
+ alt_ntd_to_rtpa = ntd_df .merge (
130
+ city_rtpa_df [["cdp_name" , "RTPA" ]],
131
131
left_on = ("city" ),
132
132
right_on = ("cdp_name" ),
133
133
how = "left" ,
@@ -167,7 +167,7 @@ def merge_agencies_to_rtpa_map():
167
167
return alt_ntd_to_rtpa
168
168
169
169
170
- def make_export_clean_crosswalk () :
170
+ def make_export_clean_crosswalk (df : pd . DataFrame ) -> pd . DataFrame :
171
171
# final crosswalk
172
172
ntd_data_to_rtpa_cleaned = alt_ntd_to_rtpa [
173
173
["ntd_id" ,"agency_name" ,"reporter_type" ,"agency_status" ,"city" ,"state" ,"RTPA" ]
@@ -179,15 +179,23 @@ def make_export_clean_crosswalk():
179
179
180
180
if __name__ == "__main__" :
181
181
print ("get list of ntd agencies" )
182
- ntd_time_series = get_ntd_agencies ()
182
+ ntd_time_series = get_ntd_agencies (min_year = "2018" )
183
183
184
184
print ("get list census designated places to rtpa map" )
185
- city_to_rtpa = get_cdp_to_rtpa_map ()
185
+ city_to_rtpa = get_cdp_to_rtpa_map (
186
+ rtpa_url = "https://cecgis-caenergy.opendata.arcgis.com/api/download/v1/items/3a83743378be4e7f84c8230889c01dea/geojson?layers=0" ,
187
+ cdp_url = "https://services6.arcgis.com/YBp5dUuxCMd8W1EI/arcgis/rest/services/California_Census_Designated_Places_2010/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
188
+ )
186
189
187
190
print ("merge ntd agencies to cdp/rtpa map" )
188
- alt_ntd_to_rtpa = merge_agencies_to_rtpa_map ()
191
+ alt_ntd_to_rtpa = merge_agencies_to_rtpa_map (
192
+ ntd_df = ntd_time_series ,
193
+ city_rtpa_df = city_to_rtpa
194
+ )
189
195
190
196
print ("make clean crosswalk, export to GCS" )
191
- make_export_clean_crosswalk ()
197
+ make_export_clean_crosswalk (
198
+ df = alt_ntd_to_rtpa
199
+ )
192
200
193
201
print ("end script" )
0 commit comments