Skip to content

Commit 5ec3538

Browse files
author
tiffanychu90
committed
High level stat: GTFS sched and RT operators year to year
1 parent cbac0f1 commit 5ec3538

File tree

1 file changed

+265
-0
lines changed

1 file changed

+265
-0
lines changed
+265
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "54d7c5a7-d698-4bee-8fde-a5c62e4b5ded",
6+
"metadata": {},
7+
"source": [
8+
"# GTFS Schedule and RT compliant operators\n",
9+
"\n",
10+
"High-level metric to see how many ITP IDs we track year to year with GTFS schedule and RT data\n",
11+
"\n",
12+
"* [Slack request](https://cal-itp.slack.com/archives/C014Q6G3VCJ/p1657141675073339)\n",
13+
"* GTFS Schedule fact daily feeds: https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.gtfs_schedule_fact_daily\n",
14+
" * this is pre-aggregated, let's just grab distinct ITP IDs from here\n",
15+
"* GTFS RT fact files: https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.gtfs_rt_fact_daily_feeds\n",
16+
" * model this after how GTFS schedule does it"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": 1,
22+
"id": "60c34ea5-0e77-4f63-8c2a-a0ef1b30f418",
23+
"metadata": {},
24+
"outputs": [
25+
{
26+
"name": "stderr",
27+
"output_type": "stream",
28+
"text": [
29+
"/opt/conda/lib/python3.10/site-packages/geopandas/_compat.py:111: UserWarning: The Shapely GEOS version (3.10.2-CAPI-1.16.0) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n",
30+
" warnings.warn(\n"
31+
]
32+
}
33+
],
34+
"source": [
35+
"import os\n",
36+
"import pandas as pd\n",
37+
"\n",
38+
"from calitp.tables import tbl\n",
39+
"from siuba import *"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": 2,
45+
"id": "70c563bd-5993-441d-b74b-b63754a4eace",
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"gtfs_sched_operators = (\n",
50+
" tbl.views.gtfs_schedule_fact_daily()\n",
51+
" >> select(_.date, _.n_distinct_itp_ids)\n",
52+
" >> collect()\n",
53+
")\n",
54+
"\n",
55+
"gtfs_rt_operators = (\n",
56+
" tbl.views.gtfs_rt_fact_daily_feeds()\n",
57+
" >> select(_.calitp_itp_id, _.date)\n",
58+
" >> distinct()\n",
59+
" >> group_by(_.date)\n",
60+
" >> summarize(n_distinct_itp_ids = _.calitp_itp_id.nunique())\n",
61+
" >> collect() \n",
62+
")"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": 3,
68+
"id": "89bc4015-881b-4c7d-8c4e-b5d1dc64a6fb",
69+
"metadata": {},
70+
"outputs": [],
71+
"source": [
72+
"def parse_date(df):\n",
73+
" df = df.assign(\n",
74+
" date = pd.to_datetime(df.date)\n",
75+
" ).sort_values(\"date\").reset_index(drop=True)\n",
76+
" \n",
77+
" return df\n",
78+
"\n",
79+
"def select_start_end(df, start, end):\n",
80+
" df2 = parse_date(df)\n",
81+
" \n",
82+
" df3 = df2[(df2.date==start) | \n",
83+
" (df2.date==end)].reset_index(drop=True)\n",
84+
" \n",
85+
" return df3"
86+
]
87+
},
88+
{
89+
"cell_type": "markdown",
90+
"id": "945e61c8-2181-44ff-9905-8b43756be43a",
91+
"metadata": {},
92+
"source": [
93+
"## GTFS Schedule - unique ITP IDs year to year"
94+
]
95+
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": 4,
99+
"id": "8833bace-1229-4aec-8cb3-69a13cd4e650",
100+
"metadata": {},
101+
"outputs": [
102+
{
103+
"data": {
104+
"text/html": [
105+
"<div>\n",
106+
"<style scoped>\n",
107+
" .dataframe tbody tr th:only-of-type {\n",
108+
" vertical-align: middle;\n",
109+
" }\n",
110+
"\n",
111+
" .dataframe tbody tr th {\n",
112+
" vertical-align: top;\n",
113+
" }\n",
114+
"\n",
115+
" .dataframe thead th {\n",
116+
" text-align: right;\n",
117+
" }\n",
118+
"</style>\n",
119+
"<table border=\"1\" class=\"dataframe\">\n",
120+
" <thead>\n",
121+
" <tr style=\"text-align: right;\">\n",
122+
" <th></th>\n",
123+
" <th>date</th>\n",
124+
" <th>n_distinct_itp_ids</th>\n",
125+
" </tr>\n",
126+
" </thead>\n",
127+
" <tbody>\n",
128+
" <tr>\n",
129+
" <th>0</th>\n",
130+
" <td>2021-07-01</td>\n",
131+
" <td>181</td>\n",
132+
" </tr>\n",
133+
" <tr>\n",
134+
" <th>1</th>\n",
135+
" <td>2022-06-30</td>\n",
136+
" <td>195</td>\n",
137+
" </tr>\n",
138+
" </tbody>\n",
139+
"</table>\n",
140+
"</div>"
141+
],
142+
"text/plain": [
143+
" date n_distinct_itp_ids\n",
144+
"0 2021-07-01 181\n",
145+
"1 2022-06-30 195"
146+
]
147+
},
148+
"execution_count": 4,
149+
"metadata": {},
150+
"output_type": "execute_result"
151+
}
152+
],
153+
"source": [
154+
"start_date = \"2021-07-01\"\n",
155+
"end_date = \"2022-06-30\"\n",
156+
"\n",
157+
"gtfs_sched = select_start_end(gtfs_sched_operators, start_date, end_date)\n",
158+
"gtfs_sched"
159+
]
160+
},
161+
{
162+
"cell_type": "markdown",
163+
"id": "d1769281-5dd9-4ce8-b772-a8a5f47e5527",
164+
"metadata": {},
165+
"source": [
166+
"## GTFS RT - unique ITP IDs year to year\n",
167+
"\n",
168+
"* Earliest RT is 7/7/21 (pretty close to 7/1/21!)"
169+
]
170+
},
171+
{
172+
"cell_type": "code",
173+
"execution_count": 5,
174+
"id": "912ad337-fbe1-4b44-99ab-09aa7acf946e",
175+
"metadata": {},
176+
"outputs": [
177+
{
178+
"data": {
179+
"text/html": [
180+
"<div>\n",
181+
"<style scoped>\n",
182+
" .dataframe tbody tr th:only-of-type {\n",
183+
" vertical-align: middle;\n",
184+
" }\n",
185+
"\n",
186+
" .dataframe tbody tr th {\n",
187+
" vertical-align: top;\n",
188+
" }\n",
189+
"\n",
190+
" .dataframe thead th {\n",
191+
" text-align: right;\n",
192+
" }\n",
193+
"</style>\n",
194+
"<table border=\"1\" class=\"dataframe\">\n",
195+
" <thead>\n",
196+
" <tr style=\"text-align: right;\">\n",
197+
" <th></th>\n",
198+
" <th>date</th>\n",
199+
" <th>n_distinct_itp_ids</th>\n",
200+
" </tr>\n",
201+
" </thead>\n",
202+
" <tbody>\n",
203+
" <tr>\n",
204+
" <th>0</th>\n",
205+
" <td>2021-07-07</td>\n",
206+
" <td>29</td>\n",
207+
" </tr>\n",
208+
" <tr>\n",
209+
" <th>1</th>\n",
210+
" <td>2022-06-30</td>\n",
211+
" <td>79</td>\n",
212+
" </tr>\n",
213+
" </tbody>\n",
214+
"</table>\n",
215+
"</div>"
216+
],
217+
"text/plain": [
218+
" date n_distinct_itp_ids\n",
219+
"0 2021-07-07 29\n",
220+
"1 2022-06-30 79"
221+
]
222+
},
223+
"execution_count": 5,
224+
"metadata": {},
225+
"output_type": "execute_result"
226+
}
227+
],
228+
"source": [
229+
"earliest_rt = pd.to_datetime(gtfs_rt_operators.date.min())\n",
230+
"\n",
231+
"gtfs_rt = select_start_end(gtfs_rt_operators, earliest_rt, end_date)\n",
232+
"gtfs_rt"
233+
]
234+
},
235+
{
236+
"cell_type": "code",
237+
"execution_count": null,
238+
"id": "3977ef63-fe57-4d24-a05c-c792c5e6d065",
239+
"metadata": {},
240+
"outputs": [],
241+
"source": []
242+
}
243+
],
244+
"metadata": {
245+
"kernelspec": {
246+
"display_name": "Python 3 (ipykernel)",
247+
"language": "python",
248+
"name": "python3"
249+
},
250+
"language_info": {
251+
"codemirror_mode": {
252+
"name": "ipython",
253+
"version": 3
254+
},
255+
"file_extension": ".py",
256+
"mimetype": "text/x-python",
257+
"name": "python",
258+
"nbconvert_exporter": "python",
259+
"pygments_lexer": "ipython3",
260+
"version": "3.10.4"
261+
}
262+
},
263+
"nbformat": 4,
264+
"nbformat_minor": 5
265+
}

0 commit comments

Comments
 (0)