use helper in hqta

edasmalchi · edasmalchi · commit 45442811a95c · 2026-02-06T01:25:38.000Z
diff --git a/high_quality_transit_areas/22_debug_2026.ipynb b/high_quality_transit_areas/22_debug_2026.ipynb
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "92bed811-566b-4bd5-925f-b58e755166ad",
    "metadata": {},
    "outputs": [],
@@ -33,12 +33,90 @@
     "    PROJECT_CRS,\n",
     "    SEGMENT_BUFFER_METERS,\n",
     "    analysis_date,\n",
+    "    MPO_DATA_PATH\n",
     ")\n",
     "\n",
     "from calitp_data_analysis.gcs_geopandas import GCSGeoPandas\n",
     "gcsgp = GCSGeoPandas()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c5054234-5a23-48fe-8bd3-18531ed7194a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from calitp_data_analysis import geography_utils, get_fs, utils\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "2aed4677-6473-412b-b940-8e00c7465d4b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fs = get_fs()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1a09fe2c-fb17-4c0d-83db-82e54a7f5b37",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_standardize_mpo_input(mpo_data_path=MPO_DATA_PATH, fs=fs) -> gpd.GeoDataFrame:\n",
+    "    \"\"\"\n",
+    "    Read in mpo-provided planned major transit stops and enforce schema.\n",
+    "    \"\"\"\n",
+    "    mpo_names = [x.split(\"/\")[-1].split(\".\")[0] for x in fs.ls(MPO_DATA_PATH) if x.split(\"/\")[-1] != \"mpo_input\"]\n",
+    "\n",
+    "    mpo_gdfs = []\n",
+    "    for mpo_name in mpo_names:\n",
+    "        mpo_gdf = gcs_geopandas().read_file(f\"{MPO_DATA_PATH}{mpo_name}.geojson\")\n",
+    "        required_cols = [\"mpo\", \"hqta_type\", \"plan_name\"]\n",
+    "        optional_cols = [\"stop_id\", \"avg_trips_per_peak_hr\", \"agency_primary\"]\n",
+    "        all_cols = required_cols + optional_cols + [\"geometry\"]\n",
+    "        assert set(required_cols).issubset(mpo_gdf.columns)\n",
+    "        filter_cols = [col for col in all_cols if col in mpo_gdf.columns]\n",
+    "        mpo_gdf = mpo_gdf[filter_cols]\n",
+    "        mpo_gdfs += [mpo_gdf]\n",
+    "    return pd.concat(mpo_gdfs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5ab13f33-e089-4846-a42c-dfcbf19cc934",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mpo_names = [x.split(\"/\")[-1].split(\".\")[0] for x in fs.ls(MPO_DATA_PATH) if x.split(\"/\")[-1] != \"mpo_input\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "eb335365-8a7c-465c-9af2-2fcf748d6722",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['', 'mtc', 'sacog', 'sandag', 'scag']"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mpo_names"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "e23cf134-ea3d-47c1-93e9-0c729a417d61",
diff --git a/high_quality_transit_areas/Makefile b/high_quality_transit_areas/Makefile
@@ -1,7 +1,7 @@
 hqta_data:
-#	python rail_ferry_brt_stops.py
-#	python create_hqta_segments.py
-#	python create_aggregate_stop_frequencies.py
+	python rail_ferry_brt_stops.py
+	python create_hqta_segments.py
+	python create_aggregate_stop_frequencies.py
 	python sjoin_stops_to_segments.py
 	python prep_pairwise_intersections.py
 	python get_intersections.py
diff --git a/high_quality_transit_areas/assemble_hqta_points.py b/high_quality_transit_areas/assemble_hqta_points.py
@@ -11,6 +11,7 @@
 
 import datetime
 import sys
+from functools import cache
 
 import _utils
 import geopandas as gpd
@@ -19,6 +20,7 @@
 import pandas as pd
 from calitp_data_analysis import geography_utils, get_fs, utils
 from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
+from calitp_data_analysis.gcs_pandas import GCSPandas
 from calitp_data_analysis.sql import query_sql
 from loguru import logger
 from shared_utils import gtfs_utils_v2
@@ -30,7 +32,17 @@
     analysis_date,
 )
 
-gcsgp = GCSGeoPandas()
+
+@cache
+def gcs_pandas():
+    return GCSPandas()
+
+
+@cache
+def gcs_geopandas():
+    return GCSGeoPandas()
+
+
 fs = get_fs()
 catalog = intake.open_catalog("*.yml")
 
@@ -48,10 +60,14 @@ def combine_stops_by_hq_types(crs: str) -> gpd.GeoDataFrame:
 
     trip_count_cols = ["am_max_trips_hr", "pm_max_trips_hr"]
 
-    max_arrivals = pd.read_parquet(
-        f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet",
-        columns=["schedule_gtfs_dataset_key", "stop_id"] + trip_count_cols,
-    ).pipe(_utils.primary_rename)
+    max_arrivals = (
+        gcs_pandas()
+        .read_parquet(
+            f"{GCS_FILE_PATH}max_arrivals_by_stop.parquet",
+            columns=["schedule_gtfs_dataset_key", "stop_id"] + trip_count_cols,
+        )
+        .pipe(_utils.primary_rename)
+    )
 
     # Combine AM max and PM max into 1 column
     # if am_max_trips = 4 and pm_max_trips = 5, we'll choose 4.
@@ -188,15 +204,19 @@ def final_processing_gtfs(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     return gdf3
 
 
-def read_standardize_mpo_input(mpo_data_path=MPO_DATA_PATH, gcsgp=gcsgp, fs=fs) -> gpd.GeoDataFrame:
+def read_standardize_mpo_input(mpo_data_path=MPO_DATA_PATH, fs=fs) -> gpd.GeoDataFrame:
     """
     Read in mpo-provided planned major transit stops and enforce schema.
     """
-    mpo_names = [x.split("/")[-1].split(".")[0] for x in fs.ls(MPO_DATA_PATH) if x.split("/")[-1] != "mpo_input"]
+    mpo_names = [
+        x.split("/")[-1].split(".")[0]
+        for x in fs.ls(MPO_DATA_PATH)
+        if x.split("/")[-1] and x.split("/")[-1] != "mpo_input"
+    ]
 
     mpo_gdfs = []
     for mpo_name in mpo_names:
-        mpo_gdf = gcsgp.read_file(f"{MPO_DATA_PATH}{mpo_name}.geojson")
+        mpo_gdf = gcs_geopandas().read_file(f"{MPO_DATA_PATH}{mpo_name}.geojson")
         required_cols = ["mpo", "hqta_type", "plan_name"]
         optional_cols = ["stop_id", "avg_trips_per_peak_hr", "agency_primary"]
         all_cols = required_cols + optional_cols + ["geometry"]
diff --git a/high_quality_transit_areas/branching_derived_intersections.py b/high_quality_transit_areas/branching_derived_intersections.py
@@ -1,10 +1,13 @@
+from functools import cache
+
 import create_aggregate_stop_frequencies
 import geopandas as gpd
 import lookback_wrappers
 import numpy as np
 import pandas as pd
 from _utils import append_analysis_name
 from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
+from calitp_data_analysis.gcs_pandas import GCSPandas
 from calitp_data_analysis.geography_utils import CA_NAD83Albers_m
 from IPython.display import Markdown, display
 from segment_speed_utils import gtfs_schedule_wrangling, helpers
@@ -18,9 +21,18 @@
     analysis_date,
 )
 
-tqdm.pandas()
 
-gcsgp = GCSGeoPandas()
+@cache
+def gcs_pandas():
+    return GCSPandas()
+
+
+@cache
+def gcs_geopandas():
+    return GCSGeoPandas()
+
+
+tqdm.pandas()
 
 
 def get_filter_singles(single_route_aggregation: pd.DataFrame, ms_precursor_threshold: int | float) -> pd.DataFrame:
@@ -210,7 +222,7 @@ def match_spatial_format(branching_stops_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFr
 
     shapes = get_shapes_with_lookback(analysis_date, published_operators_dict, lookback_trips_ix)
 
-    max_arrivals_by_stop_single = pd.read_parquet(f"{GCS_FILE_PATH}max_arrivals_by_stop_single_route.parquet")
+    max_arrivals_by_stop_single = gcs_pandas().read_parquet(f"{GCS_FILE_PATH}max_arrivals_by_stop_single_route.parquet")
     single_qualify = get_filter_singles(max_arrivals_by_stop_single, MS_TRANSIT_THRESHOLD)
 
     share_counts = {}
@@ -231,4 +243,4 @@ def match_spatial_format(branching_stops_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFr
         this_feed_stops = find_stops_this_feed(gtfs_dataset_key, max_arrivals_by_stop_single, unique_qualify_pairs)
         hcd_branching_stops += [this_feed_stops]
     hcd_branching_stops = pd.concat(hcd_branching_stops).pipe(match_spatial_format)
-    gcsgp.geo_data_frame_to_parquet(hcd_branching_stops, f"{GCS_FILE_PATH}branching_major_stops.parquet")
+    gcs_geopandas().geo_data_frame_to_parquet(hcd_branching_stops, f"{GCS_FILE_PATH}branching_major_stops.parquet")
diff --git a/high_quality_transit_areas/create_bus_hqta_types.py b/high_quality_transit_areas/create_bus_hqta_types.py
@@ -12,6 +12,7 @@
 
 import datetime
 import sys
+from functools import cache
 
 import _utils
 import geopandas as gpd
@@ -20,6 +21,7 @@
 from _utils import append_analysis_name
 from calitp_data_analysis import utils
 from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
+from calitp_data_analysis.gcs_pandas import GCSPandas
 from loguru import logger
 from prep_pairwise_intersections import prep_bus_corridors
 from segment_speed_utils import helpers
@@ -30,15 +32,23 @@
     analysis_date,
 )
 
-gcsgp = GCSGeoPandas()
+
+@cache
+def gcs_pandas():
+    return GCSPandas()
+
+
+@cache
+def gcs_geopandas():
+    return GCSGeoPandas()
 
 
 def buffer_around_intersections(buffer_size: int) -> gpd.GeoDataFrame:
     """
     Draw 500 ft buffers around intersections to better catch stops
     that might fall within it.
     """
-    gdf = gcsgp.read_parquet(f"{GCS_FILE_PATH}all_intersections.parquet")
+    gdf = gcs_geopandas().read_parquet(f"{GCS_FILE_PATH}all_intersections.parquet")
 
     gdf = gdf.assign(geometry=gdf.geometry.buffer(buffer_size))
 
@@ -150,13 +160,13 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram
     print(all_stops.head(3))
 
     # add geometry to branching major stops
-    major_stop_bus_branching = pd.read_parquet(f"{GCS_FILE_PATH}branching_major_stops.parquet")
+    major_stop_bus_branching = gcs_pandas().read_parquet(f"{GCS_FILE_PATH}branching_major_stops.parquet")
     major_stop_bus_branching = all_stops.merge(
         major_stop_bus_branching,
         left_on=["schedule_gtfs_dataset_key", "stop_id"],
         right_on=["schedule_gtfs_dataset_key_primary", "stop_id"],
     ).drop(columns=["schedule_gtfs_dataset_key", "analysis_date"])
-    gcsgp.geo_data_frame_to_parquet(major_stop_bus_branching, f"{GCS_FILE_PATH}branching_major_stops.parquet")
+    gcs_geopandas().geo_data_frame_to_parquet(major_stop_bus_branching, f"{GCS_FILE_PATH}branching_major_stops.parquet")
 
     # Create hqta_type == major_stop_bus
     major_stop_bus = create_major_stop_bus(all_stops, bus_intersections)
diff --git a/high_quality_transit_areas/logs/hqta_processing.log b/high_quality_transit_areas/logs/hqta_processing.log
@@ -400,3 +400,35 @@
 2026-01-08 16:08:33.699 | INFO     | __main__:<module>:273 - D1_assemble_hqta_points 2025-11-05 execution time: 0:00:23.992076
 2026-01-08 16:13:53.756 | INFO     | __main__:<module>:285 - D1_assemble_hqta_points 2025-11-05 execution time: 0:00:19.840264
 2026-01-09 18:01:29.081 | INFO     | __main__:<module>:155 - D2_assemble_hqta_polygons 2025-11-05 execution time: 0:00:19.152265
+2026-02-05 13:37:46.826 | INFO     | __main__:<module>:276 - A1_rail_ferry_brt_stops 2026-01-14
+2026-02-05 13:38:33.980 | INFO     | __main__:<module>:302 - A1_rail_ferry_brt_stops 2026-01-14 execution time: 0:00:47.153847
+2026-02-05 13:49:07.069 | INFO     | __main__:<module>:228 - B1_create_hqta_segments execution time: 0:10:07.248304
+2026-02-05 13:51:39.982 | INFO     | __main__:<module>:443 - B2_create_aggregate_stop_frequencies 2026-01-14 execution time: 0:02:06.559366
+2026-02-05 13:55:57.222 | INFO     | __main__:<module>:262 - B3_sjoin_stops_to_segments 2026-01-14 execution time: 0:00:25.097832
+2026-02-05 21:56:24.489 | INFO     | __main__:<module>:179 - C1_prep_pairwise_intersections 2026-01-14 execution time: 0:00:18.614221
+2026-02-05 21:56:39.801 | INFO     | __main__:<module>:121 - C2_find_intersections 2026-01-14 execution time: 0:00:08.823675
+2026-02-05 15:16:20.240 | INFO     | __main__:<module>:191 - C3_create_bus_hqta_types 2026-01-14 execution time: 0:00:22.901378
+2026-02-05 23:33:42.610 | INFO     | __main__:<module>:258 - D1_assemble_hqta_points 2026-01-14 execution time: 0:00:22.953447
+2026-02-05 23:34:07.316 | INFO     | __main__:<module>:155 - D2_assemble_hqta_polygons 2026-01-14 execution time: 0:00:12.848109
+2026-02-05 16:26:57.552 | INFO     | __main__:<module>:276 - A1_rail_ferry_brt_stops 2025-12-17
+2026-02-05 16:27:39.474 | INFO     | __main__:<module>:302 - A1_rail_ferry_brt_stops 2025-12-17 execution time: 0:00:41.922086
+2026-02-05 16:31:42.029 | INFO     | __main__:<module>:276 - A1_rail_ferry_brt_stops 2025-12-17
+2026-02-05 16:32:22.407 | INFO     | __main__:<module>:302 - A1_rail_ferry_brt_stops 2025-12-17 execution time: 0:00:40.378720
+2026-02-05 16:42:35.536 | INFO     | __main__:<module>:228 - B1_create_hqta_segments execution time: 0:10:00.052423
+2026-02-05 16:45:02.080 | INFO     | __main__:<module>:443 - B2_create_aggregate_stop_frequencies 2025-12-17 execution time: 0:02:00.151462
+2026-02-05 16:45:40.313 | INFO     | __main__:<module>:262 - B3_sjoin_stops_to_segments 2025-12-17 execution time: 0:00:24.504071
+2026-02-06 00:46:06.501 | INFO     | __main__:<module>:179 - C1_prep_pairwise_intersections 2025-12-17 execution time: 0:00:17.352202
+2026-02-06 00:46:19.973 | INFO     | __main__:<module>:121 - C2_find_intersections 2025-12-17 execution time: 0:00:06.640792
+2026-02-05 16:48:20.891 | INFO     | __main__:<module>:191 - C3_create_bus_hqta_types 2025-12-17 execution time: 0:00:21.139318
+2026-02-06 00:48:50.522 | INFO     | __main__:<module>:258 - D1_assemble_hqta_points 2025-12-17 execution time: 0:00:18.876628
+2026-02-06 00:49:15.393 | INFO     | __main__:<module>:155 - D2_assemble_hqta_polygons 2025-12-17 execution time: 0:00:12.884344
+2026-02-05 16:50:26.805 | INFO     | __main__:<module>:276 - A1_rail_ferry_brt_stops 2026-01-14
+2026-02-05 16:51:03.955 | INFO     | __main__:<module>:302 - A1_rail_ferry_brt_stops 2026-01-14 execution time: 0:00:37.150331
+2026-02-05 17:01:39.440 | INFO     | __main__:<module>:228 - B1_create_hqta_segments execution time: 0:10:21.503977
+2026-02-05 17:04:16.680 | INFO     | __main__:<module>:443 - B2_create_aggregate_stop_frequencies 2026-01-14 execution time: 0:02:08.766302
+2026-02-05 17:05:06.806 | INFO     | __main__:<module>:262 - B3_sjoin_stops_to_segments 2026-01-14 execution time: 0:00:27.600135
+2026-02-06 01:05:35.375 | INFO     | __main__:<module>:179 - C1_prep_pairwise_intersections 2026-01-14 execution time: 0:00:19.387152
+2026-02-06 01:05:49.204 | INFO     | __main__:<module>:121 - C2_find_intersections 2026-01-14 execution time: 0:00:06.962931
+2026-02-05 17:07:56.124 | INFO     | __main__:<module>:191 - C3_create_bus_hqta_types 2026-01-14 execution time: 0:00:20.951075
+2026-02-06 01:08:26.222 | INFO     | __main__:<module>:258 - D1_assemble_hqta_points 2026-01-14 execution time: 0:00:19.607352
+2026-02-06 01:08:51.252 | INFO     | __main__:<module>:155 - D2_assemble_hqta_polygons 2026-01-14 execution time: 0:00:12.909884
diff --git a/high_quality_transit_areas/rail_ferry_brt_stops.py b/high_quality_transit_areas/rail_ferry_brt_stops.py
diff --git a/high_quality_transit_areas/sjoin_stops_to_segments.py b/high_quality_transit_areas/sjoin_stops_to_segments.py
diff --git a/high_quality_transit_areas/update_vars.py b/high_quality_transit_areas/update_vars.py