Skip to content

Commit a6c2378

Browse files
authored
Merge pull request #1456 from cal-itp/fix_scripts
Module/Script Refactor
2 parents c01b104 + 69cc299 commit a6c2378

File tree

3 files changed

+27
-18
lines changed

3 files changed

+27
-18
lines changed

ntd/annual_ridership_report/annual_ridership_module.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def sum_by_group(df: pd.DataFrame, group_cols: list) -> pd.DataFrame:
8989

9090
return grouped_df
9191

92-
def produce_annual_ntd_ridership_data_by_rtpa():
92+
def produce_annual_ntd_ridership_data_by_rtpa(min_year:str):
9393
"""
9494
Function that ingest ridership data from `dim_annual_service_agencies`, filters for CA agencies.
9595
Merges in ntd_id_to_RTPA_crosswalk. Aggregates by agency, mode and TOS. calculates change in UPT.
@@ -102,7 +102,7 @@ def produce_annual_ntd_ridership_data_by_rtpa():
102102
tbls.mart_ntd_funding_and_expenses.fct_service_data_and_operating_expenses_time_series_by_mode_upt()
103103
>> filter(_.state.str.contains("CA") |
104104
_.state.str.contains("NV"), # to get lake Tahoe Transportation back
105-
_.year >= "2018",
105+
_.year >= min_year,
106106
_.city != None,
107107
_.primary_uza_name.str.contains(", CA") |
108108
_.primary_uza_name.str.contains("CA-NV") |
@@ -319,8 +319,9 @@ def remove_local_outputs(
319319

320320

321321
if __name__ == "__main__":
322+
min_year="2018"
322323

323-
df = produce_annual_ntd_ridership_data_by_rtpa()
324+
df = produce_annual_ntd_ridership_data_by_rtpa(min_year)
324325
print("saving parqut to private GCS")
325326

326327
df.to_parquet(f"{GCS_FILE_PATH}annual_ridership_report_data.parquet")

ntd/annual_ridership_report/ntd_rtpa_crosswalk.py

+23-15
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515

1616
# get data from warehouse
17-
def get_ntd_agencies():
17+
def get_ntd_agencies(min_year:str) -> pd.DataFrame:
1818
"""
1919
reads in ntd data from warehouse, filters for CA agencies since 2018.
2020
groups data by agency and sum their UPT.
@@ -23,7 +23,7 @@ def get_ntd_agencies():
2323
tbls.mart_ntd_funding_and_expenses.fct_service_data_and_operating_expenses_time_series_by_mode_upt()
2424
>> filter(_.state.str.contains("CA") |
2525
_.state.str.contains("NV"), # to get lake Tahoe Transportation back
26-
_.year >= "2018",
26+
_.year >= min_year,
2727
_.city != None,
2828
_.primary_uza_name.str.contains(", CA") |
2929
_.primary_uza_name.str.contains("CA-NV") |
@@ -69,25 +69,25 @@ def get_ntd_agencies():
6969
return ntd_time_series
7070

7171

72-
def get_cdp_to_rtpa_map():
72+
def get_cdp_to_rtpa_map(rtpa_url:str, cdp_url:str) -> pd.DataFrame:
7373
"""
7474
reads in map of CA census designated places (CDPs)(polygon) and CA RTPA (polygon).
7575
Get centraiod of CDPS, then sjoin to RTPA map.
7676
Do some manual cleaning.
7777
"""
7878
# RTPA map
79-
rtpa_url = "https://cecgis-caenergy.opendata.arcgis.com/api/download/v1/items/3a83743378be4e7f84c8230889c01dea/geojson?layers=0"
80-
rtpa_map = gpd.read_file(rtpa_url)[
79+
rtpa_path = rtpa_url
80+
rtpa_map = gpd.read_file(rtpa_path)[
8181
["RTPA", "LABEL_RTPA", "geometry"]
8282
]
8383

8484
rtpa_map = rtpa_map.to_crs("ESRI:102600") # for sjoin later
8585

8686
# California Census Designated Places (2010), includes cities and CDPs
87-
cdp_url = "https://services6.arcgis.com/YBp5dUuxCMd8W1EI/arcgis/rest/services/California_Census_Designated_Places_2010/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
87+
cdp_path = cdp_url
8888
keep_cdp_col = ["FID", "NAME10", "NAMELSAD10", "geometry"]
8989

90-
cdp_map = gpd.read_file(cdp_url)[keep_cdp_col].rename(
90+
cdp_map = gpd.read_file(cdp_path)[keep_cdp_col].rename(
9191
columns={"NAME10": "cdp_name", "NAMELSAD10": "name_lsad"}
9292
)
9393

@@ -120,14 +120,14 @@ def get_cdp_to_rtpa_map():
120120
return city_to_rtpa
121121

122122

123-
def merge_agencies_to_rtpa_map():
123+
def merge_agencies_to_rtpa_map(ntd_df:pd.DataFrame, city_rtpa_df:pd.DataFrame) -> pd.DataFrame:
124124
"""
125125
merges the ntd data and rtpa data from `get_ntd_agencies` and `get_cdp_to_rtpa_map`.
126126
does some manual updating.
127127
"""
128128
# merge
129-
alt_ntd_to_rtpa = ntd_time_series.merge(
130-
city_to_rtpa[["cdp_name", "RTPA"]],
129+
alt_ntd_to_rtpa = ntd_df.merge(
130+
city_rtpa_df[["cdp_name", "RTPA"]],
131131
left_on=("city"),
132132
right_on=("cdp_name"),
133133
how="left",
@@ -167,7 +167,7 @@ def merge_agencies_to_rtpa_map():
167167
return alt_ntd_to_rtpa
168168

169169

170-
def make_export_clean_crosswalk():
170+
def make_export_clean_crosswalk(df:pd.DataFrame) -> pd.DataFrame:
171171
# final crosswalk
172172
ntd_data_to_rtpa_cleaned = alt_ntd_to_rtpa[
173173
["ntd_id","agency_name","reporter_type","agency_status","city","state","RTPA"]
@@ -179,15 +179,23 @@ def make_export_clean_crosswalk():
179179

180180
if __name__ == "__main__":
181181
print("get list of ntd agencies")
182-
ntd_time_series = get_ntd_agencies()
182+
ntd_time_series = get_ntd_agencies(min_year="2018")
183183

184184
print("get list census designated places to rtpa map")
185-
city_to_rtpa = get_cdp_to_rtpa_map()
185+
city_to_rtpa = get_cdp_to_rtpa_map(
186+
rtpa_url="https://cecgis-caenergy.opendata.arcgis.com/api/download/v1/items/3a83743378be4e7f84c8230889c01dea/geojson?layers=0",
187+
cdp_url="https://services6.arcgis.com/YBp5dUuxCMd8W1EI/arcgis/rest/services/California_Census_Designated_Places_2010/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
188+
)
186189

187190
print("merge ntd agencies to cdp/rtpa map")
188-
alt_ntd_to_rtpa = merge_agencies_to_rtpa_map()
191+
alt_ntd_to_rtpa = merge_agencies_to_rtpa_map(
192+
ntd_df=ntd_time_series,
193+
city_rtpa_df=city_to_rtpa
194+
)
189195

190196
print("make clean crosswalk, export to GCS")
191-
make_export_clean_crosswalk()
197+
make_export_clean_crosswalk(
198+
df=alt_ntd_to_rtpa
199+
)
192200

193201
print("end script")

0 commit comments

Comments
 (0)