Skip to content

Commit 68d3462

Browse files
committed
schedule id change analyses
1 parent f2132c2 commit 68d3462

File tree

7 files changed

+767
-1
lines changed

7 files changed

+767
-1
lines changed

_template.tpl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{% extends 'index.md.j2' %}
2+
3+
{% block input_group %}
4+
<details>
5+
6+
<summary>show code</summary>
7+
8+
{{ super() }}
9+
10+
</details>
11+
{% endblock %}

gtfs_report_template/example.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
"**Generating this example:**\n",
2424
"\n",
2525
"```\n",
26-
"jupyter nbconvert --template_file _template.tpl"
26+
"jupyter nbconvert --template_file _template.tpl\n",
27+
"```"
2728
]
2829
},
2930
{

gtfs_schedule_id_changes/gtfs_schedule_id_changes.ipynb

Lines changed: 404 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
<details>
2+
3+
<summary>show code</summary>
4+
5+
6+
7+
```python
8+
from siuba import *
9+
from siuba.sql import LazyTbl
10+
from siuba.dply import vector as vec
11+
from siuba.dply.vector import n
12+
13+
from plotnine import *
14+
15+
from sqlalchemy import create_engine
16+
17+
# TODO: once calitp package is up, should be able to use
18+
# that to get the relevant tables
19+
%run ../_gtfs_schedule_views/ipynb/_setup.ipynb
20+
21+
tbl = AutoTable(
22+
engine,
23+
lambda s: s.replace(".", "_").replace("test_", ""),
24+
lambda s: "test_" not in s and "__staging" not in s
25+
)
26+
```
27+
28+
29+
</details>
30+
31+
<details>
32+
33+
<summary>show code</summary>
34+
35+
36+
37+
```python
38+
DATE_START="2021-04-16"
39+
DATE_END="2021-05-16"
40+
41+
THE_FUTURE="2099-01-01"
42+
43+
44+
EXAMPLE_AGENCY_NAMES = [
45+
"Tahoe Truckee Area Regional Transportation",
46+
"Metro",
47+
"Monterey-Salinas Transit",
48+
"Fairfield and Suisun Transit",
49+
"AC Transit",
50+
"Big Blue Bus"
51+
]
52+
```
53+
54+
55+
</details>
56+
57+
<details>
58+
59+
<summary>show code</summary>
60+
61+
62+
63+
```python
64+
tbl_feeds = (
65+
tbl.gtfs_schedule_calitp_status()
66+
>> select(
67+
_.calitp_itp_id == _.itp_id, _.calitp_url_number == _.url_number, _.agency_name
68+
)
69+
>> filter(_.agency_name.isin(EXAMPLE_AGENCY_NAMES))
70+
71+
>> mutate(agency_name = _.agency_name + " (" + _.calitp_url_number.astype(str) + ")")
72+
)
73+
74+
# will be used to limit the number of feeds shown in data
75+
join_feeds = inner_join(_, tbl_feeds, ["calitp_itp_id", "calitp_url_number"])
76+
77+
tbl_feeds
78+
```
79+
80+
81+
</details>
82+
83+
84+
85+
86+
<div><pre># Source: lazy query
87+
# DB Conn: Engine(bigquery://cal-itp-data-infra/?maximum_bytes_billed=1000000000)
88+
# Preview:
89+
</pre><table border="0" class="dataframe">
90+
<thead>
91+
<tr style="text-align: right;">
92+
<th></th>
93+
<th>calitp_itp_id</th>
94+
<th>calitp_url_number</th>
95+
<th>agency_name</th>
96+
</tr>
97+
</thead>
98+
<tbody>
99+
<tr>
100+
<th>0</th>
101+
<td>0</td>
102+
<td>0</td>
103+
<td>Big Blue Bus (0)</td>
104+
</tr>
105+
<tr>
106+
<th>1</th>
107+
<td>1</td>
108+
<td>1</td>
109+
<td>Fairfield and Suisun Transit (1)</td>
110+
</tr>
111+
<tr>
112+
<th>2</th>
113+
<td>1</td>
114+
<td>2</td>
115+
<td>Fairfield and Suisun Transit (2)</td>
116+
</tr>
117+
<tr>
118+
<th>3</th>
119+
<td>4</td>
120+
<td>0</td>
121+
<td>AC Transit (0)</td>
122+
</tr>
123+
<tr>
124+
<th>4</th>
125+
<td>8</td>
126+
<td>1</td>
127+
<td>Monterey-Salinas Transit (1)</td>
128+
</tr>
129+
</tbody>
130+
</table>
131+
<p>5 rows × 3 columns</p><p># .. may have more rows</p></div>
132+
133+
134+
135+
<details>
136+
137+
<summary>show code</summary>
138+
139+
140+
141+
```python
142+
def query_id_changes(start_table, end_table, id_vars, agg=False):
143+
sym_id_vars = [_[k] for k in id_vars]
144+
145+
is_in_start = start_table >> select(*id_vars) >> mutate(is_in_start=True)
146+
is_in_end = end_table >> select(*id_vars) >> mutate(is_in_end=True)
147+
148+
tallies = (
149+
is_in_start
150+
>> full_join(_, is_in_end, id_vars)
151+
>> count(*sym_id_vars, _.is_in_start, _.is_in_end)
152+
>> mutate(
153+
status=case_when(
154+
_,
155+
{
156+
_.is_in_end.isna(): "Removed",
157+
_.is_in_start.isna(): "Added",
158+
True: "Unchanged",
159+
},
160+
)
161+
)
162+
)
163+
164+
if agg:
165+
return tallies >> count(*sym_id_vars[:-1], _.status)
166+
167+
return tallies
168+
169+
def fetch_date(table, date, future_date = THE_FUTURE):
170+
return table >> filter(_.calitp_extracted_at <= date, _.calitp_deleted_at.fillna(future_date) > date)
171+
```
172+
173+
174+
</details>
175+
176+
## Route id changes
177+
178+
<details>
179+
180+
<summary>show code</summary>
181+
182+
183+
184+
```python
185+
routes_start = (tbl.gtfs_schedule_type2_routes()
186+
>> filter(_.calitp_extracted_at <= DATE_START, _.calitp_deleted_at.fillna(THE_FUTURE) > DATE_START)
187+
)
188+
189+
routes_end = (tbl.gtfs_schedule_type2_routes()
190+
>> filter(_.calitp_extracted_at <= DATE_END, _.calitp_deleted_at.fillna(THE_FUTURE) > DATE_END)
191+
)
192+
```
193+
194+
195+
</details>
196+
197+
<details>
198+
199+
<summary>show code</summary>
200+
201+
202+
203+
```python
204+
keep_keys = ("calitp_itp_id", "calitp_url_number", "route_id")
205+
206+
route_id_changes = (
207+
routes_start
208+
>> select(*keep_keys)
209+
>> mutate(is_in_start=True)
210+
>> full_join(
211+
_, routes_end >> select(*keep_keys) >> mutate(is_in_end=True), keep_keys
212+
)
213+
>> count(_.calitp_itp_id, _.calitp_url_number, _.is_in_start, _.is_in_end)
214+
>> mutate(
215+
status=case_when(
216+
_,
217+
{
218+
_.is_in_end.isna(): "Removed",
219+
_.is_in_start.isna(): "Added",
220+
True: "Unchanged",
221+
},
222+
)
223+
)
224+
)
225+
```
226+
227+
228+
</details>
229+
230+
<details>
231+
232+
<summary>show code</summary>
233+
234+
235+
236+
```python
237+
(
238+
route_id_changes
239+
>> join_feeds
240+
>> collect()
241+
>> ggplot(aes("agency_name", "n", fill="status"))
242+
+ geom_col()
243+
+ theme(axis_text_x=element_text(angle=45, hjust=1))
244+
+ labs(title = "Route ID Changes Between %s - %s"% (DATE_START, DATE_END))
245+
)
246+
```
247+
248+
249+
</details>
250+
251+
252+
253+
![png](gtfs_schedule_id_changes_files/gtfs_schedule_id_changes_7_0.png)
254+
255+
256+
257+
258+
259+
260+
<ggplot: (-9223372036540360753)>
261+
262+
263+
264+
## Trip ID changes
265+
266+
<details>
267+
268+
<summary>show code</summary>
269+
270+
271+
272+
```python
273+
trips_start = fetch_date(tbl.gtfs_schedule_type2_trips(), DATE_START)
274+
trips_end = fetch_date(tbl.gtfs_schedule_type2_trips(), DATE_END)
275+
276+
(
277+
query_id_changes(
278+
trips_start,
279+
trips_end,
280+
["calitp_itp_id", "calitp_url_number", "trip_id"],
281+
agg=True,
282+
)
283+
>> join_feeds
284+
>> collect()
285+
>> ggplot(aes("agency_name", "n", fill="status"))
286+
+ geom_col()
287+
+ theme(axis_text_x=element_text(angle=45, hjust=1))
288+
+ labs(title = "Trip ID Changes Between %s - %s"% (DATE_START, DATE_END))
289+
290+
)
291+
```
292+
293+
294+
</details>
295+
296+
297+
298+
![png](gtfs_schedule_id_changes_files/gtfs_schedule_id_changes_9_0.png)
299+
300+
301+
302+
303+
304+
305+
<ggplot: (314465747)>
306+
307+
308+
309+
<details>
310+
311+
<summary>show code</summary>
312+
313+
314+
315+
```python
316+
stops_start = fetch_date(tbl.gtfs_schedule_type2_stops(), DATE_START)
317+
stops_end = fetch_date(tbl.gtfs_schedule_type2_stops(), DATE_END)
318+
319+
(
320+
query_id_changes(
321+
stops_start,
322+
stops_end,
323+
["calitp_itp_id", "calitp_url_number", "stop_id"],
324+
agg=True,
325+
)
326+
>> join_feeds
327+
>> collect()
328+
>> ggplot(aes("agency_name", "n", fill="status"))
329+
+ geom_col()
330+
+ theme(axis_text_x=element_text(angle=45, hjust=1))
331+
+ labs(title = "Stop ID Changes Between %s - %s"% (DATE_START, DATE_END))
332+
333+
)
334+
```
335+
336+
337+
</details>
338+
339+
340+
341+
![png](gtfs_schedule_id_changes_files/gtfs_schedule_id_changes_10_0.png)
342+
343+
344+
345+
346+
347+
348+
<ggplot: (314438706)>
349+
350+
49.7 KB
Loading
49.8 KB
Loading
50.8 KB
Loading

0 commit comments

Comments
 (0)