Skip to content

Commit 4544281

Browse files
committed
use helper in hqta
1 parent 3b9a3fa commit 4544281

File tree

9 files changed

+361
-202
lines changed

9 files changed

+361
-202
lines changed

high_quality_transit_areas/22_debug_2026.ipynb

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
},
1414
{
1515
"cell_type": "code",
16-
"execution_count": 1,
16+
"execution_count": 3,
1717
"id": "92bed811-566b-4bd5-925f-b58e755166ad",
1818
"metadata": {},
1919
"outputs": [],
@@ -33,12 +33,90 @@
3333
" PROJECT_CRS,\n",
3434
" SEGMENT_BUFFER_METERS,\n",
3535
" analysis_date,\n",
36+
" MPO_DATA_PATH\n",
3637
")\n",
3738
"\n",
3839
"from calitp_data_analysis.gcs_geopandas import GCSGeoPandas\n",
3940
"gcsgp = GCSGeoPandas()"
4041
]
4142
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 4,
46+
"id": "c5054234-5a23-48fe-8bd3-18531ed7194a",
47+
"metadata": {},
48+
"outputs": [],
49+
"source": [
50+
"from calitp_data_analysis import geography_utils, get_fs, utils\n"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 5,
56+
"id": "2aed4677-6473-412b-b940-8e00c7465d4b",
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"fs = get_fs()"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": 6,
66+
"id": "1a09fe2c-fb17-4c0d-83db-82e54a7f5b37",
67+
"metadata": {},
68+
"outputs": [],
69+
"source": [
70+
"def read_standardize_mpo_input(mpo_data_path=MPO_DATA_PATH, fs=fs) -> gpd.GeoDataFrame:\n",
71+
" \"\"\"\n",
72+
" Read in mpo-provided planned major transit stops and enforce schema.\n",
73+
" \"\"\"\n",
74+
" mpo_names = [x.split(\"/\")[-1].split(\".\")[0] for x in fs.ls(MPO_DATA_PATH) if x.split(\"/\")[-1] != \"mpo_input\"]\n",
75+
"\n",
76+
" mpo_gdfs = []\n",
77+
" for mpo_name in mpo_names:\n",
78+
" mpo_gdf = gcs_geopandas().read_file(f\"{MPO_DATA_PATH}{mpo_name}.geojson\")\n",
79+
" required_cols = [\"mpo\", \"hqta_type\", \"plan_name\"]\n",
80+
" optional_cols = [\"stop_id\", \"avg_trips_per_peak_hr\", \"agency_primary\"]\n",
81+
" all_cols = required_cols + optional_cols + [\"geometry\"]\n",
82+
" assert set(required_cols).issubset(mpo_gdf.columns)\n",
83+
" filter_cols = [col for col in all_cols if col in mpo_gdf.columns]\n",
84+
" mpo_gdf = mpo_gdf[filter_cols]\n",
85+
" mpo_gdfs += [mpo_gdf]\n",
86+
" return pd.concat(mpo_gdfs)"
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": 7,
92+
"id": "5ab13f33-e089-4846-a42c-dfcbf19cc934",
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"mpo_names = [x.split(\"/\")[-1].split(\".\")[0] for x in fs.ls(MPO_DATA_PATH) if x.split(\"/\")[-1] != \"mpo_input\"]"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 8,
102+
"id": "eb335365-8a7c-465c-9af2-2fcf748d6722",
103+
"metadata": {},
104+
"outputs": [
105+
{
106+
"data": {
107+
"text/plain": [
108+
"['', 'mtc', 'sacog', 'sandag', 'scag']"
109+
]
110+
},
111+
"execution_count": 8,
112+
"metadata": {},
113+
"output_type": "execute_result"
114+
}
115+
],
116+
"source": [
117+
"mpo_names"
118+
]
119+
},
42120
{
43121
"cell_type": "markdown",
44122
"id": "e23cf134-ea3d-47c1-93e9-0c729a417d61",

high_quality_transit_areas/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
hqta_data:
2-
# python rail_ferry_brt_stops.py
3-
# python create_hqta_segments.py
4-
# python create_aggregate_stop_frequencies.py
2+
python rail_ferry_brt_stops.py
3+
python create_hqta_segments.py
4+
python create_aggregate_stop_frequencies.py
55
python sjoin_stops_to_segments.py
66
python prep_pairwise_intersections.py
77
python get_intersections.py

high_quality_transit_areas/assemble_hqta_points.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import datetime
1313
import sys
14+
from functools import cache
1415

1516
import _utils
1617
import geopandas as gpd
@@ -19,6 +20,7 @@
1920
import pandas as pd
2021
from calitp_data_analysis import geography_utils, get_fs, utils
2122
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
23+
from calitp_data_analysis.gcs_pandas import GCSPandas
2224
from calitp_data_analysis.sql import query_sql
2325
from loguru import logger
2426
from shared_utils import gtfs_utils_v2
@@ -30,7 +32,17 @@
3032
analysis_date,
3133
)
3234

33-
gcsgp = GCSGeoPandas()
35+
36+
@cache
37+
def gcs_pandas():
38+
return GCSPandas()
39+
40+
41+
@cache
42+
def gcs_geopandas():
43+
return GCSGeoPandas()
44+
45+
3446
fs = get_fs()
3547
catalog = intake.open_catalog("*.yml")
3648

@@ -48,10 +60,14 @@ def combine_stops_by_hq_types(crs: str) -> gpd.GeoDataFrame:
4860

4961
trip_count_cols = ["am_max_trips_hr", "pm_max_trips_hr"]
5062

51-
max_arrivals = pd.read_parquet(
52-
f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet",
53-
columns=["schedule_gtfs_dataset_key", "stop_id"] + trip_count_cols,
54-
).pipe(_utils.primary_rename)
63+
max_arrivals = (
64+
gcs_pandas()
65+
.read_parquet(
66+
f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet",
67+
columns=["schedule_gtfs_dataset_key", "stop_id"] + trip_count_cols,
68+
)
69+
.pipe(_utils.primary_rename)
70+
)
5571

5672
# Combine AM max and PM max into 1 column
5773
# if am_max_trips = 4 and pm_max_trips = 5, we'll choose 4.
@@ -188,15 +204,19 @@ def final_processing_gtfs(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
188204
return gdf3
189205

190206

191-
def read_standardize_mpo_input(mpo_data_path=MPO_DATA_PATH, gcsgp=gcsgp, fs=fs) -> gpd.GeoDataFrame:
207+
def read_standardize_mpo_input(mpo_data_path=MPO_DATA_PATH, fs=fs) -> gpd.GeoDataFrame:
192208
"""
193209
Read in mpo-provided planned major transit stops and enforce schema.
194210
"""
195-
mpo_names = [x.split("/")[-1].split(".")[0] for x in fs.ls(MPO_DATA_PATH) if x.split("/")[-1] != "mpo_input"]
211+
mpo_names = [
212+
x.split("/")[-1].split(".")[0]
213+
for x in fs.ls(MPO_DATA_PATH)
214+
if x.split("/")[-1] and x.split("/")[-1] != "mpo_input"
215+
]
196216

197217
mpo_gdfs = []
198218
for mpo_name in mpo_names:
199-
mpo_gdf = gcsgp.read_file(f"{MPO_DATA_PATH}{mpo_name}.geojson")
219+
mpo_gdf = gcs_geopandas().read_file(f"{MPO_DATA_PATH}{mpo_name}.geojson")
200220
required_cols = ["mpo", "hqta_type", "plan_name"]
201221
optional_cols = ["stop_id", "avg_trips_per_peak_hr", "agency_primary"]
202222
all_cols = required_cols + optional_cols + ["geometry"]

high_quality_transit_areas/branching_derived_intersections.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
from functools import cache
2+
13
import create_aggregate_stop_frequencies
24
import geopandas as gpd
35
import lookback_wrappers
46
import numpy as np
57
import pandas as pd
68
from _utils import append_analysis_name
79
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
10+
from calitp_data_analysis.gcs_pandas import GCSPandas
811
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m
912
from IPython.display import Markdown, display
1013
from segment_speed_utils import gtfs_schedule_wrangling, helpers
@@ -18,9 +21,18 @@
1821
analysis_date,
1922
)
2023

21-
tqdm.pandas()
2224

23-
gcsgp = GCSGeoPandas()
25+
@cache
26+
def gcs_pandas():
27+
return GCSPandas()
28+
29+
30+
@cache
31+
def gcs_geopandas():
32+
return GCSGeoPandas()
33+
34+
35+
tqdm.pandas()
2436

2537

2638
def get_filter_singles(single_route_aggregation: pd.DataFrame, ms_precursor_threshold: int | float) -> pd.DataFrame:
@@ -210,7 +222,7 @@ def match_spatial_format(branching_stops_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFr
210222

211223
shapes = get_shapes_with_lookback(analysis_date, published_operators_dict, lookback_trips_ix)
212224

213-
max_arrivals_by_stop_single = pd.read_parquet(f"{GCS_FILE_PATH}max_arrivals_by_stop_single_route.parquet")
225+
max_arrivals_by_stop_single = gcs_pandas().read_parquet(f"{GCS_FILE_PATH}max_arrivals_by_stop_single_route.parquet")
214226
single_qualify = get_filter_singles(max_arrivals_by_stop_single, MS_TRANSIT_THRESHOLD)
215227

216228
share_counts = {}
@@ -231,4 +243,4 @@ def match_spatial_format(branching_stops_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFr
231243
this_feed_stops = find_stops_this_feed(gtfs_dataset_key, max_arrivals_by_stop_single, unique_qualify_pairs)
232244
hcd_branching_stops += [this_feed_stops]
233245
hcd_branching_stops = pd.concat(hcd_branching_stops).pipe(match_spatial_format)
234-
gcsgp.geo_data_frame_to_parquet(hcd_branching_stops, f"{GCS_FILE_PATH}branching_major_stops.parquet")
246+
gcs_geopandas().geo_data_frame_to_parquet(hcd_branching_stops, f"{GCS_FILE_PATH}branching_major_stops.parquet")

high_quality_transit_areas/create_bus_hqta_types.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import datetime
1414
import sys
15+
from functools import cache
1516

1617
import _utils
1718
import geopandas as gpd
@@ -20,6 +21,7 @@
2021
from _utils import append_analysis_name
2122
from calitp_data_analysis import utils
2223
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
24+
from calitp_data_analysis.gcs_pandas import GCSPandas
2325
from loguru import logger
2426
from prep_pairwise_intersections import prep_bus_corridors
2527
from segment_speed_utils import helpers
@@ -30,15 +32,23 @@
3032
analysis_date,
3133
)
3234

33-
gcsgp = GCSGeoPandas()
35+
36+
@cache
37+
def gcs_pandas():
38+
return GCSPandas()
39+
40+
41+
@cache
42+
def gcs_geopandas():
43+
return GCSGeoPandas()
3444

3545

3646
def buffer_around_intersections(buffer_size: int) -> gpd.GeoDataFrame:
3747
"""
3848
Draw 500 ft buffers around intersections to better catch stops
3949
that might fall within it.
4050
"""
41-
gdf = gcsgp.read_parquet(f"{GCS_FILE_PATH}all_intersections.parquet")
51+
gdf = gcs_geopandas().read_parquet(f"{GCS_FILE_PATH}all_intersections.parquet")
4252

4353
gdf = gdf.assign(geometry=gdf.geometry.buffer(buffer_size))
4454

@@ -150,13 +160,13 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram
150160
print(all_stops.head(3))
151161

152162
# add geometry to branching major stops
153-
major_stop_bus_branching = pd.read_parquet(f"{GCS_FILE_PATH}branching_major_stops.parquet")
163+
major_stop_bus_branching = gcs_pandas().read_parquet(f"{GCS_FILE_PATH}branching_major_stops.parquet")
154164
major_stop_bus_branching = all_stops.merge(
155165
major_stop_bus_branching,
156166
left_on=["schedule_gtfs_dataset_key", "stop_id"],
157167
right_on=["schedule_gtfs_dataset_key_primary", "stop_id"],
158168
).drop(columns=["schedule_gtfs_dataset_key", "analysis_date"])
159-
gcsgp.geo_data_frame_to_parquet(major_stop_bus_branching, f"{GCS_FILE_PATH}branching_major_stops.parquet")
169+
gcs_geopandas().geo_data_frame_to_parquet(major_stop_bus_branching, f"{GCS_FILE_PATH}branching_major_stops.parquet")
160170

161171
# Create hqta_type == major_stop_bus
162172
major_stop_bus = create_major_stop_bus(all_stops, bus_intersections)

high_quality_transit_areas/logs/hqta_processing.log

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,3 +400,35 @@
400400
2026-01-08 16:08:33.699 | INFO | __main__:<module>:273 - D1_assemble_hqta_points 2025-11-05 execution time: 0:00:23.992076
401401
2026-01-08 16:13:53.756 | INFO | __main__:<module>:285 - D1_assemble_hqta_points 2025-11-05 execution time: 0:00:19.840264
402402
2026-01-09 18:01:29.081 | INFO | __main__:<module>:155 - D2_assemble_hqta_polygons 2025-11-05 execution time: 0:00:19.152265
403+
2026-02-05 13:37:46.826 | INFO | __main__:<module>:276 - A1_rail_ferry_brt_stops 2026-01-14
404+
2026-02-05 13:38:33.980 | INFO | __main__:<module>:302 - A1_rail_ferry_brt_stops 2026-01-14 execution time: 0:00:47.153847
405+
2026-02-05 13:49:07.069 | INFO | __main__:<module>:228 - B1_create_hqta_segments execution time: 0:10:07.248304
406+
2026-02-05 13:51:39.982 | INFO | __main__:<module>:443 - B2_create_aggregate_stop_frequencies 2026-01-14 execution time: 0:02:06.559366
407+
2026-02-05 13:55:57.222 | INFO | __main__:<module>:262 - B3_sjoin_stops_to_segments 2026-01-14 execution time: 0:00:25.097832
408+
2026-02-05 21:56:24.489 | INFO | __main__:<module>:179 - C1_prep_pairwise_intersections 2026-01-14 execution time: 0:00:18.614221
409+
2026-02-05 21:56:39.801 | INFO | __main__:<module>:121 - C2_find_intersections 2026-01-14 execution time: 0:00:08.823675
410+
2026-02-05 15:16:20.240 | INFO | __main__:<module>:191 - C3_create_bus_hqta_types 2026-01-14 execution time: 0:00:22.901378
411+
2026-02-05 23:33:42.610 | INFO | __main__:<module>:258 - D1_assemble_hqta_points 2026-01-14 execution time: 0:00:22.953447
412+
2026-02-05 23:34:07.316 | INFO | __main__:<module>:155 - D2_assemble_hqta_polygons 2026-01-14 execution time: 0:00:12.848109
413+
2026-02-05 16:26:57.552 | INFO | __main__:<module>:276 - A1_rail_ferry_brt_stops 2025-12-17
414+
2026-02-05 16:27:39.474 | INFO | __main__:<module>:302 - A1_rail_ferry_brt_stops 2025-12-17 execution time: 0:00:41.922086
415+
2026-02-05 16:31:42.029 | INFO | __main__:<module>:276 - A1_rail_ferry_brt_stops 2025-12-17
416+
2026-02-05 16:32:22.407 | INFO | __main__:<module>:302 - A1_rail_ferry_brt_stops 2025-12-17 execution time: 0:00:40.378720
417+
2026-02-05 16:42:35.536 | INFO | __main__:<module>:228 - B1_create_hqta_segments execution time: 0:10:00.052423
418+
2026-02-05 16:45:02.080 | INFO | __main__:<module>:443 - B2_create_aggregate_stop_frequencies 2025-12-17 execution time: 0:02:00.151462
419+
2026-02-05 16:45:40.313 | INFO | __main__:<module>:262 - B3_sjoin_stops_to_segments 2025-12-17 execution time: 0:00:24.504071
420+
2026-02-06 00:46:06.501 | INFO | __main__:<module>:179 - C1_prep_pairwise_intersections 2025-12-17 execution time: 0:00:17.352202
421+
2026-02-06 00:46:19.973 | INFO | __main__:<module>:121 - C2_find_intersections 2025-12-17 execution time: 0:00:06.640792
422+
2026-02-05 16:48:20.891 | INFO | __main__:<module>:191 - C3_create_bus_hqta_types 2025-12-17 execution time: 0:00:21.139318
423+
2026-02-06 00:48:50.522 | INFO | __main__:<module>:258 - D1_assemble_hqta_points 2025-12-17 execution time: 0:00:18.876628
424+
2026-02-06 00:49:15.393 | INFO | __main__:<module>:155 - D2_assemble_hqta_polygons 2025-12-17 execution time: 0:00:12.884344
425+
2026-02-05 16:50:26.805 | INFO | __main__:<module>:276 - A1_rail_ferry_brt_stops 2026-01-14
426+
2026-02-05 16:51:03.955 | INFO | __main__:<module>:302 - A1_rail_ferry_brt_stops 2026-01-14 execution time: 0:00:37.150331
427+
2026-02-05 17:01:39.440 | INFO | __main__:<module>:228 - B1_create_hqta_segments execution time: 0:10:21.503977
428+
2026-02-05 17:04:16.680 | INFO | __main__:<module>:443 - B2_create_aggregate_stop_frequencies 2026-01-14 execution time: 0:02:08.766302
429+
2026-02-05 17:05:06.806 | INFO | __main__:<module>:262 - B3_sjoin_stops_to_segments 2026-01-14 execution time: 0:00:27.600135
430+
2026-02-06 01:05:35.375 | INFO | __main__:<module>:179 - C1_prep_pairwise_intersections 2026-01-14 execution time: 0:00:19.387152
431+
2026-02-06 01:05:49.204 | INFO | __main__:<module>:121 - C2_find_intersections 2026-01-14 execution time: 0:00:06.962931
432+
2026-02-05 17:07:56.124 | INFO | __main__:<module>:191 - C3_create_bus_hqta_types 2026-01-14 execution time: 0:00:20.951075
433+
2026-02-06 01:08:26.222 | INFO | __main__:<module>:258 - D1_assemble_hqta_points 2026-01-14 execution time: 0:00:19.607352
434+
2026-02-06 01:08:51.252 | INFO | __main__:<module>:155 - D2_assemble_hqta_polygons 2026-01-14 execution time: 0:00:12.909884

0 commit comments

Comments
 (0)