Skip to content

Commit 34830ba

Browse files
authored
Merge pull request #1370 from cal-itp/ah_gtfs
Adding back rail routes
2 parents 27991ba + cb5314b commit 34830ba

File tree

11 files changed

+30391
-192
lines changed

11 files changed

+30391
-192
lines changed

gtfs_digest/03_report.ipynb

+717-62
Large diffs are not rendered by default.

gtfs_digest/45_missing_routes2.ipynb

+26,840-118
Large diffs are not rendered by default.

gtfs_digest/46_rail_routes.ipynb

+2,028
Large diffs are not rendered by default.

gtfs_digest/47_check_operators.ipynb

+727
Large diffs are not rendered by default.

gtfs_funnel/route_typologies.py

+55-6
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,15 @@ def overlay_shapes_to_roads(
222222
buffer_meters: int
223223
) -> gpd.GeoDataFrame:
224224

225+
# AH: removed pipe b/c it erases routes from Amtrak
226+
#common_shape = gtfs_schedule_wrangling.most_common_shape_by_route_direction(
227+
# analysis_date
228+
#).pipe(helpers.remove_shapes_outside_ca)
229+
225230
common_shape = gtfs_schedule_wrangling.most_common_shape_by_route_direction(
226231
analysis_date
227-
).pipe(helpers.remove_shapes_outside_ca)
228-
232+
)
233+
229234
common_shape = common_shape.assign(
230235
route_meters = common_shape.geometry.length,
231236
)
@@ -352,6 +357,34 @@ def reconcile_route_and_nacto_typologies(
352357

353358
return df2
354359

360+
def add_rail_back(
361+
categorize_routes_df: pd.DataFrame, overlay_shapes_to_roads_df: pd.DataFrame
362+
) -> pd.DataFrame:
363+
"""
364+
categorize_routes_df: df created by categorize_routes_by_name()
365+
overlay_shapes_to_roads_df: df created by overlay_shapes_to_roads()
366+
"""
367+
# Filter out for only rail routes and drop duplicates.
368+
rail_routes = categorize_routes_df.loc[categorize_routes_df.is_rail == 1][
369+
["route_id", "schedule_gtfs_dataset_key"]
370+
].drop_duplicates()
371+
372+
# Merge with route_typologies_df to retain the details for
373+
# columns such as typology, freq_category, etc
374+
m1 = pd.merge(gdf, rail_routes, how="inner")
375+
376+
# Retain only one row for each route-direction-operator
377+
# keeping the row with the highest pct_typology
378+
m1 = m1.sort_values(
379+
by=["route_id", "direction_id", "schedule_gtfs_dataset_key", "pct_typology"],
380+
ascending=[True, True, True, False],
381+
).drop_duplicates(subset=["route_id", "direction_id", "schedule_gtfs_dataset_key"])
382+
383+
# Apply primary_secondary_typology() function which adds
384+
# columns like is_nacto_rapid, is_nacto_coverage
385+
m1 = primary_secondary_typology(m1)
386+
387+
return m1
355388

356389
if __name__ == "__main__":
357390

@@ -363,7 +396,7 @@ def reconcile_route_and_nacto_typologies(
363396

364397
roads = delayed(prep_roads)(GTFS_DATA_DICT)
365398
ROAD_BUFFER_METERS = 20
366-
TYPOLOGY_THRESHOLD = 0.10
399+
TYPOLOGY_THRESHOLD = 0.1
367400

368401
for analysis_date in analysis_date_list:
369402

@@ -383,17 +416,33 @@ def reconcile_route_and_nacto_typologies(
383416
# Aggregate to route-dir-typology
384417
route_typology_df2 = primary_secondary_typology(route_typology_df)
385418

419+
# Tag if the route is express, rapid, or rail
386420
route_tagged = categorize_routes_by_name(analysis_date)
387421

422+
# Incorporate back rail routes that disappear if the routs
423+
# dont't meet the minimum set in typology_threshold.
424+
rail_routes_df = add_rail_back(route_tagged, gdf)
425+
all_routes = pd.concat([route_typology_df2, rail_routes_df])
426+
427+
428+
# Merge
388429
df3 = pd.merge(
389430
route_tagged,
390-
route_typology_df2,
431+
all_routes,
391432
on = ["schedule_gtfs_dataset_key", "route_id"],
392433
).pipe(reconcile_route_and_nacto_typologies)
393434

394-
df3.to_parquet(
395-
f"{SCHED_GCS}{EXPORT}_{analysis_date}.parquet")
396435

436+
# Drop duplicates because some rail routes are found both
437+
# route_typology_df2 and rail_routes_df
438+
df3 = (df3.drop_duplicates(
439+
subset = ["schedule_gtfs_dataset_key",
440+
"route_id",
441+
"route_long_name",
442+
"direction_id"])
443+
)
444+
df3.to_parquet(
445+
f"{SCHED_GCS}{EXPORT}_AH_TEST_{analysis_date}.parquet")
397446

398447
time1 = datetime.datetime.now()
399448
print(f"route typologies {analysis_date}: {time1 - time0}")

gtfs_funnel/update_vars.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
)
1313

1414

15-
# analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]
16-
analysis_date_list = all_dates
15+
analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]
16+
# analysis_date_list = all_dates
1717
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")
1818

1919
COMPILED_CACHED_VIEWS = GTFS_DATA_DICT.gcs_paths.COMPILED_CACHED_VIEWS

portfolio/sites/gtfs_digest.yml

+12
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ parts:
1515
- organization_name: Mendocino Transit Authority
1616
- organization_name: POINT
1717
- organization_name: Redwood Coast Transit Authority
18+
- organization_name: Yurok Tribe
1819
- caption: District 02 - Redding
1920
params:
2021
district: 02 - Redding
@@ -94,6 +95,8 @@ parts:
9495
district: 05 - San Luis Obispo
9596
sections:
9697
- organization_name: City of Guadalupe
98+
- organization_name: City of Lompoc
99+
- organization_name: City of Morro Bay
97100
- organization_name: City of San Luis Obispo
98101
- organization_name: City of Santa Cruz
99102
- organization_name: City of Santa Maria
@@ -154,6 +157,7 @@ parts:
154157
- organization_name: City of Los Angeles
155158
- organization_name: City of Lynwood
156159
- organization_name: City of Maywood
160+
- organization_name: City of Montebello
157161
- organization_name: City of Monterey Park
158162
- organization_name: City of Moorpark
159163
- organization_name: City of Norwalk
@@ -186,6 +190,7 @@ parts:
186190
district: 08 - San Bernardino
187191
sections:
188192
- organization_name: Basin Transit
193+
- organization_name: Chemehuevi Indian Tribe
189194
- organization_name: City of Banning
190195
- organization_name: City of Beaumont
191196
- organization_name: City of Corona
@@ -194,6 +199,7 @@ parts:
194199
- organization_name: OmniTrans
195200
- organization_name: Palo Verde Valley Transit Agency
196201
- organization_name: Riverside Transit Agency
202+
- organization_name: SunLine Transit Agency
197203
- organization_name: Victor Valley Transit Authority
198204
- caption: District 09 - Bishop
199205
params:
@@ -234,7 +240,13 @@ parts:
234240
params:
235241
district: 12 - Irvine
236242
sections:
243+
- organization_name: Anaheim Transportation Network
244+
- organization_name: City of Dana Point
245+
- organization_name: City of Irvine
237246
- organization_name: City of Laguna Beach
247+
- organization_name: City of Mission Viejo
248+
- organization_name: City of San Clemente
249+
- organization_name: City of San Juan Capistrano
238250
- organization_name: Orange County Transportation Authority
239251
- organization_name: University of California, Irvine
240252
readme: ./gtfs_digest/README.md

rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log

+1
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,4 @@
103103
2025-01-16 15:49:56.361 | INFO | __main__:route_metrics:88 - route aggregation 2023-10-11: 0:00:01.825395
104104
2025-01-16 15:49:58.178 | INFO | __main__:route_metrics:88 - route aggregation 2023-11-15: 0:00:01.812722
105105
2025-01-16 15:50:00.055 | INFO | __main__:route_metrics:88 - route aggregation 2023-12-13: 0:00:01.873527
106+
2025-01-21 16:29:01.096 | INFO | __main__:route_metrics:88 - route aggregation 2024-11-13: 0:00:03.148850

rt_scheduled_v_ran/scripts/update_vars.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
oct2024_week = rt_dates.get_week("oct2024", exclude_wed=True)
88

99

10-
# analysis_date_list = [rt_dates.DATES["nov2024"]]
11-
analysis_date_list = rt_dates.y2024_dates + rt_dates.y2023_dates
10+
analysis_date_list = [rt_dates.DATES["nov2024"]]
11+
# analysis_date_list = rt_dates.y2024_dates + rt_dates.y2023_dates
1212

1313
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")
1414

rt_segment_speeds/logs/avg_speeds.log

+4
Original file line numberDiff line numberDiff line change
@@ -678,3 +678,7 @@
678678
2025-01-16 16:28:31.739 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2023-04-15'] execution time: 0:00:17.603240
679679
2025-01-16 16:28:40.309 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:08.508140
680680
2025-01-16 16:28:49.151 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2023-04-16'] execution time: 0:00:17.350031
681+
2025-01-21 16:31:27.163 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:17.106380
682+
2025-01-21 16:31:41.624 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2024-12-11'] execution time: 0:00:31.567351
683+
2025-01-21 16:32:04.052 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:22.327037
684+
2025-01-21 16:32:17.670 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2024-11-13'] execution time: 0:00:35.944913

rt_segment_speeds/segment_speed_utils/project_vars.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@
2929
rt_dates.oct2023_week, rt_dates.apr2023_week,
3030
]
3131

32-
analysis_date_list = apr2024_week + oct2023_week + apr2023_week
33-
32+
33+
analysis_date_list = apr2024_week + oct2023_week + apr2023_week
34+
analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]
3435

3536
PROJECT_CRS = "EPSG:3310"
3637
ROAD_SEGMENT_METERS = 1_000

0 commit comments

Comments
 (0)