Skip to content

Commit c9abe05

Browse files
committed
investigate refactor, run vta legacy
1 parent e2768e6 commit c9abe05

File tree

3 files changed

+1722
-88
lines changed

3 files changed

+1722
-88
lines changed

sccp/0_sccp_refactor_scratchpad.ipynb

+335
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "d528f399-cf33-459e-b5dc-92e0b4ae389a",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import pandas as pd\n",
11+
"import geopandas as gpd\n",
12+
"from siuba import *\n",
13+
"import shared_utils\n",
14+
"\n",
15+
"from calitp_data_analysis import get_fs\n",
16+
"from segment_speed_utils import helpers, time_series_utils, gtfs_schedule_wrangling\n",
17+
"from segment_speed_utils.project_vars import SCHED_GCS, SEGMENT_GCS, GTFS_DATA_DICT, analysis_date"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": 2,
23+
"id": "6cd318e5-5c4e-4249-b158-232f4d940770",
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data')"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 3,
33+
"id": "6e86c1a8-d3de-4fc6-9478-c03133b6745c",
34+
"metadata": {},
35+
"outputs": [
36+
{
37+
"data": {
38+
"text/plain": [
39+
"{'dir': '${gcs_paths.SEGMENT_GCS}', 'stage1': '${speeds_tables.vp_dwell}', 'stage2': 'nearest/nearest_vp_shape_segments', 'stage2b': 'nearest/nearest2_vp_shape_segments', 'stage3': 'stop_arrivals', 'stage4': 'speeds_stop_segments', 'trip_stop_cols': ['trip_instance_key', 'stop_sequence'], 'shape_stop_cols': ['shape_array_key', 'shape_id', 'stop_sequence'], 'stop_pair_cols': ['stop_pair', 'stop_pair_name'], 'route_dir_cols': ['route_id', 'direction_id'], 'shape_stop_single_segment': 'rollup_singleday/speeds_shape_stop_segments', 'route_dir_single_segment': 'rollup_singleday/speeds_route_dir_segments', 'route_dir_multi_segment': 'rollup_multiday/speeds_route_dir_segments', 'segments_file': 'segment_options/shape_stop_segments', 'max_speed': '${speed_vars.max_speed}'}"
40+
]
41+
},
42+
"execution_count": 3,
43+
"metadata": {},
44+
"output_type": "execute_result"
45+
}
46+
],
47+
"source": [
48+
"catalog.stop_segments"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": 4,
54+
"id": "f766b50a-c70d-4681-992c-d7f8979ada69",
55+
"metadata": {},
56+
"outputs": [
57+
{
58+
"data": {
59+
"text/plain": [
60+
"'2024-10-16'"
61+
]
62+
},
63+
"execution_count": 4,
64+
"metadata": {},
65+
"output_type": "execute_result"
66+
}
67+
],
68+
"source": [
69+
"analysis_date"
70+
]
71+
},
72+
{
73+
"cell_type": "markdown",
74+
"id": "a5dbe9fc-5997-4232-ae4b-980dd5dff349",
75+
"metadata": {},
76+
"source": [
77+
"## wait, isn't 'rt_stop_times' what I need?\n",
78+
"\n",
79+
"* I basically need the same data Amanda's using for bunching, except I do also care about schedule"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": 5,
85+
"id": "74a301dc-4455-430a-8209-79ed5d431026",
86+
"metadata": {},
87+
"outputs": [
88+
{
89+
"data": {
90+
"text/plain": [
91+
"{'dir': '${gcs_paths.SEGMENT_GCS}', 'stage1': '${speeds_tables.vp_dwell}', 'stage2': 'nearest/nearest_vp_rt_stop_times', 'stage2b': 'nearest/nearest2_vp_rt_stop_times', 'stage3': 'rt_stop_times/stop_arrivals', 'stage4': 'rt_stop_times/speeds', 'trip_stop_cols': ['trip_instance_key', 'stop_sequence'], 'shape_stop_cols': ['shape_array_key', 'shape_id', 'stop_sequence'], 'stop_pair_cols': ['stop_pair', 'stop_pair_name'], 'route_dir_cols': ['route_id', 'direction_id'], 'segments_file': 'segment_options/stop_segments', 'trip_speeds_single_summary': 'rollup_singleday/speeds_trip', 'route_dir_single_summary': 'rollup_singleday/speeds_route_dir', 'route_dir_multi_summary': 'rollup_multiday/speeds_route_dir', 'min_trip_minutes': '${speed_vars.time_min_cutoff}', 'max_trip_minutes': 180, 'max_speed': '${speed_vars.max_speed}', 'min_meters_elapsed': 1609}"
92+
]
93+
},
94+
"execution_count": 5,
95+
"metadata": {},
96+
"output_type": "execute_result"
97+
}
98+
],
99+
"source": [
100+
"catalog.rt_stop_times"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": 6,
106+
"id": "4c8f882f-a60f-4007-9062-6c998088d808",
107+
"metadata": {},
108+
"outputs": [
109+
{
110+
"data": {
111+
"text/plain": [
112+
"{'dir': '${gcs_paths.RT_SCHED_GCS}', 'stop_times_direction': 'stop_times_direction', 'sched_trip_metrics': 'schedule_trip/schedule_trip_metrics', 'sched_route_direction_metrics': 'schedule_route_dir/schedule_route_direction_metrics', 'vp_trip_metrics': 'vp_trip/trip_metrics', 'vp_route_direction_metrics': 'vp_route_dir/route_direction_metrics', 'vp_operator_metrics': 'vp_operator/operator_metrics', 'sched_stop_metrics': 'schedule_stop/schedule_stop_metrics', 'schedule_rt_stop_times': 'schedule_rt_stop_times', 'early_trip_minutes': -5, 'late_trip_minutes': 5}"
113+
]
114+
},
115+
"execution_count": 6,
116+
"metadata": {},
117+
"output_type": "execute_result"
118+
}
119+
],
120+
"source": [
121+
"catalog.rt_vs_schedule_tables"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": 7,
127+
"id": "294b8f67-1486-48e9-8fe9-015b009d8e53",
128+
"metadata": {},
129+
"outputs": [],
130+
"source": [
131+
"path = f'{catalog.schedule_downloads.dir}{catalog.schedule_downloads.trips}_{analysis_date}.parquet'"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 8,
137+
"id": "508d38d4-086c-43e6-8044-6ded0882e105",
138+
"metadata": {},
139+
"outputs": [
140+
{
141+
"data": {
142+
"text/plain": [
143+
"'gs://calitp-analytics-data/data-analyses/rt_delay/compiled_cached_views/trips_2024-10-16.parquet'"
144+
]
145+
},
146+
"execution_count": 8,
147+
"metadata": {},
148+
"output_type": "execute_result"
149+
}
150+
],
151+
"source": [
152+
"path"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": 9,
158+
"id": "dc3543ea-2544-4917-887e-9de4441fb460",
159+
"metadata": {},
160+
"outputs": [],
161+
"source": [
162+
"trips = pd.read_parquet(path)"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": 12,
168+
"id": "7c8ec6df-fd29-4a16-a063-7e03af3dd9d7",
169+
"metadata": {},
170+
"outputs": [],
171+
"source": [
172+
"path = f'{catalog.rt_vs_schedule_tables.dir}{catalog.rt_vs_schedule_tables.schedule_rt_stop_times}_{analysis_date}.parquet'"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": 14,
178+
"id": "9d95c8be-a1d7-4b9f-814f-57f295d3d1df",
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"schedule_rt = pd.read_parquet(path)"
183+
]
184+
},
185+
{
186+
"cell_type": "code",
187+
"execution_count": 15,
188+
"id": "a5a84ce0-3ae4-41c7-a8fb-82caea7aa268",
189+
"metadata": {},
190+
"outputs": [
191+
{
192+
"data": {
193+
"text/html": [
194+
"<div>\n",
195+
"<style scoped>\n",
196+
" .dataframe tbody tr th:only-of-type {\n",
197+
" vertical-align: middle;\n",
198+
" }\n",
199+
"\n",
200+
" .dataframe tbody tr th {\n",
201+
" vertical-align: top;\n",
202+
" }\n",
203+
"\n",
204+
" .dataframe thead th {\n",
205+
" text-align: right;\n",
206+
" }\n",
207+
"</style>\n",
208+
"<table border=\"1\" class=\"dataframe\">\n",
209+
" <thead>\n",
210+
" <tr style=\"text-align: right;\">\n",
211+
" <th></th>\n",
212+
" <th>trip_id</th>\n",
213+
" <th>stop_id</th>\n",
214+
" <th>stop_sequence</th>\n",
215+
" <th>scheduled_arrival_sec</th>\n",
216+
" <th>schedule_gtfs_dataset_key</th>\n",
217+
" <th>trip_instance_key</th>\n",
218+
" <th>rt_arrival_sec</th>\n",
219+
" </tr>\n",
220+
" </thead>\n",
221+
" <tbody>\n",
222+
" <tr>\n",
223+
" <th>0</th>\n",
224+
" <td>t_5912118_b_83127_tn_0</td>\n",
225+
" <td>7728100</td>\n",
226+
" <td>5</td>\n",
227+
" <td>25426.0</td>\n",
228+
" <td>09e16227fc42c4fe90204a9d11581034</td>\n",
229+
" <td>893cb960f2ca78f823bd67adee30eaa3</td>\n",
230+
" <td>25434</td>\n",
231+
" </tr>\n",
232+
" <tr>\n",
233+
" <th>1</th>\n",
234+
" <td>t_5912118_b_83127_tn_0</td>\n",
235+
" <td>7728400</td>\n",
236+
" <td>8</td>\n",
237+
" <td>25559.0</td>\n",
238+
" <td>09e16227fc42c4fe90204a9d11581034</td>\n",
239+
" <td>893cb960f2ca78f823bd67adee30eaa3</td>\n",
240+
" <td>25594</td>\n",
241+
" </tr>\n",
242+
" <tr>\n",
243+
" <th>2</th>\n",
244+
" <td>t_5912118_b_83127_tn_0</td>\n",
245+
" <td>7728900</td>\n",
246+
" <td>12</td>\n",
247+
" <td>25768.0</td>\n",
248+
" <td>09e16227fc42c4fe90204a9d11581034</td>\n",
249+
" <td>893cb960f2ca78f823bd67adee30eaa3</td>\n",
250+
" <td>25751</td>\n",
251+
" </tr>\n",
252+
" </tbody>\n",
253+
"</table>\n",
254+
"</div>"
255+
],
256+
"text/plain": [
257+
" trip_id stop_id stop_sequence scheduled_arrival_sec \\\n",
258+
"0 t_5912118_b_83127_tn_0 7728100 5 25426.0 \n",
259+
"1 t_5912118_b_83127_tn_0 7728400 8 25559.0 \n",
260+
"2 t_5912118_b_83127_tn_0 7728900 12 25768.0 \n",
261+
"\n",
262+
" schedule_gtfs_dataset_key trip_instance_key \\\n",
263+
"0 09e16227fc42c4fe90204a9d11581034 893cb960f2ca78f823bd67adee30eaa3 \n",
264+
"1 09e16227fc42c4fe90204a9d11581034 893cb960f2ca78f823bd67adee30eaa3 \n",
265+
"2 09e16227fc42c4fe90204a9d11581034 893cb960f2ca78f823bd67adee30eaa3 \n",
266+
"\n",
267+
" rt_arrival_sec \n",
268+
"0 25434 \n",
269+
"1 25594 \n",
270+
"2 25751 "
271+
]
272+
},
273+
"execution_count": 15,
274+
"metadata": {},
275+
"output_type": "execute_result"
276+
}
277+
],
278+
"source": [
279+
"schedule_rt >> head(3)"
280+
]
281+
},
282+
{
283+
"cell_type": "markdown",
284+
"id": "158d8fe4-4640-4ae9-84aa-e4b8391f6bdf",
285+
"metadata": {},
286+
"source": [
287+
"Good, but we don't have distance (and thus speed) available..."
288+
]
289+
},
290+
{
291+
"cell_type": "markdown",
292+
"id": "0ef4a641-283b-4b68-bce7-8e3d9beb51c9",
293+
"metadata": {},
294+
"source": [
295+
"get from cut segments?"
296+
]
297+
},
298+
{
299+
"cell_type": "code",
300+
"execution_count": null,
301+
"id": "ceba93c9-accd-4da7-b77c-6c0a7786cb01",
302+
"metadata": {},
303+
"outputs": [],
304+
"source": []
305+
}
306+
],
307+
"metadata": {
308+
"kernelspec": {
309+
"display_name": "Python 3 (ipykernel)",
310+
"language": "python",
311+
"name": "python3"
312+
},
313+
"language_info": {
314+
"codemirror_mode": {
315+
"name": "ipython",
316+
"version": 3
317+
},
318+
"file_extension": ".py",
319+
"mimetype": "text/x-python",
320+
"name": "python",
321+
"nbconvert_exporter": "python",
322+
"pygments_lexer": "ipython3",
323+
"version": "3.9.13"
324+
},
325+
"widgets": {
326+
"application/vnd.jupyter.widget-state+json": {
327+
"state": {},
328+
"version_major": 2,
329+
"version_minor": 0
330+
}
331+
}
332+
},
333+
"nbformat": 4,
334+
"nbformat_minor": 5
335+
}

sccp/corridors_sccp_lpp/2024/BSVII_BoundingBox_SCVTA.geojson

+8
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)