Skip to content

Commit 80e7c27

Browse files
committed
ran open data for jan
1 parent dc7cfbe commit 80e7c27

File tree

209 files changed

+269
-596
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

209 files changed

+269
-596
lines changed

.bashrc

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
source .profile
2+
source .profile

_shared_utils/shared_utils/rt_dates.py

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
"nov2024": "2024-11-13",
7676
"dec2024": "2024-12-11",
7777
"jan2025": "2025-01-15",
78+
"feb2025": "2025-02-12"
7879
}
7980

8081
y2023_dates = [

gtfs_digest/.bash_profile

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
source .profile
2+
source .profile

gtfs_digest/03_report.ipynb

+5-11
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
"outputs": [],
5353
"source": [
5454
"# Comment out and leave this cell right below pandas\n",
55-
"organization_name = \"Marin County Transit District\"\n",
56-
"# organization_name = \"Monterey-Salinas Transit\"\n",
55+
"# organization_name = \"Capitol Corridor Joint Powers Authority\"\n",
56+
"organization_name = \"Monterey-Salinas Transit\"\n",
5757
"# organization_name = \"City of Santa Maria\"\n",
5858
"# organization_name = \"City of Montebello\""
5959
]
@@ -63,7 +63,9 @@
6363
"execution_count": null,
6464
"id": "d870c492-ef2c-45f6-ab47-8d46eda7f344",
6565
"metadata": {
66-
"tags": []
66+
"tags": [
67+
"parameters"
68+
]
6769
},
6870
"outputs": [],
6971
"source": [
@@ -540,14 +542,6 @@
540542
"except:\n",
541543
" display(Markdown(f\"\"\"{organization_name} only has schedule data.\"\"\"))"
542544
]
543-
},
544-
{
545-
"cell_type": "code",
546-
"execution_count": null,
547-
"id": "f8f9f2cb-5464-4080-9fda-f342c040d1b9",
548-
"metadata": {},
549-
"outputs": [],
550-
"source": []
551545
}
552546
],
553547
"metadata": {

gtfs_digest/52_feb_run.ipynb

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "cbb6e02b-aa46-4143-96fc-1d3c9df1000d",
6+
"metadata": {},
7+
"source": [
8+
"## Error when running `gtfs_digest/merge_segment_data.py`\n",
9+
"`Traceback (most recent call last):\n",
10+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/utils.py\", line 195, in raise_on_meta_error\n",
11+
" yield\n",
12+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/core.py\", line 7175, in _emulate\n",
13+
" return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))\n",
14+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
15+
" File \"/home/jovyan/data-analyses/_shared_utils/shared_utils/dask_utils.py\", line 134, in import_df_func\n",
16+
" df = pd.read_parquet(\n",
17+
" ^^^^^^^^^^^^^^^^\n",
18+
" File \"/opt/conda/lib/python3.11/site-packages/pandas/io/parquet.py\", line 503, in read_parquet\n",
19+
" return impl.read(\n",
20+
" ^^^^^^^^^^\n",
21+
" File \"/opt/conda/lib/python3.11/site-packages/pandas/io/parquet.py\", line 251, in read\n",
22+
" result = self.api.parquet.read_table(\n",
23+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
24+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/parquet/core.py\", line 1793, in read_table\n",
25+
" dataset = ParquetDataset(\n",
26+
" ^^^^^^^^^^^^^^^\n",
27+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/parquet/core.py\", line 1371, in __init__\n",
28+
" self._dataset = ds.dataset(path_or_paths, filesystem=filesystem,\n",
29+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
30+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/dataset.py\", line 794, in dataset\n",
31+
" return _filesystem_dataset(source, **kwargs)\n",
32+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
33+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/dataset.py\", line 476, in _filesystem_dataset\n",
34+
" fs, paths_or_selector = _ensure_single_source(source, filesystem)\n",
35+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
36+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/dataset.py\", line 441, in _ensure_single_source\n",
37+
" raise FileNotFoundError(path)\n",
38+
"FileNotFoundError: calitp-analytics-data/data-analyses/rt_segment_speeds/rollup_singleday/speeds_route_dir_segments_2024-01-17.parquet\n",
39+
"\n",
40+
"The above exception was the direct cause of the following exception:\n",
41+
"\n",
42+
"Traceback (most recent call last):\n",
43+
" File \"/home/jovyan/data-analyses/gtfs_digest/merge_segment_data.py\", line 93, in <module>\n",
44+
" segment_speeds = concatenate_segment_speeds_by_route_direction(\n",
45+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
46+
" File \"/home/jovyan/data-analyses/gtfs_digest/merge_segment_data.py\", line 40, in concatenate_segment_speeds_by_route_direction\n",
47+
" df = time_series_utils.concatenate_datasets_across_dates(\n",
48+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
49+
" File \"/home/jovyan/data-analyses/rt_segment_speeds/segment_speed_utils/time_series_utils.py\", line 30, in concatenate_datasets_across_dates\n",
50+
" df = dask_utils.get_ddf(\n",
51+
" ^^^^^^^^^^^^^^^^^^^\n",
52+
" File \"/home/jovyan/data-analyses/_shared_utils/shared_utils/dask_utils.py\", line 183, in get_ddf\n",
53+
" ddf = dd.from_map(import_df_func, paths, date_list, data_type=data_type, **kwargs).drop_duplicates()\n",
54+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
55+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/io/io.py\", line 1028, in from_map\n",
56+
" meta = _emulate(\n",
57+
" ^^^^^^^^^\n",
58+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/core.py\", line 7174, in _emulate\n",
59+
" with raise_on_meta_error(funcname(func), udf=udf), check_numeric_only_deprecation():\n",
60+
" File \"/opt/conda/lib/python3.11/contextlib.py\", line 155, in __exit__\n",
61+
" self.gen.throw(typ, value, traceback)\n",
62+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/utils.py\", line 216, in raise_on_meta_error\n",
63+
" raise ValueError(msg) from e\n",
64+
"ValueError: Metadata inference failed in `import_df_func`.\n",
65+
"\n",
66+
"You have supplied a custom function and Dask is unable to \n",
67+
"determine the type of output that that function returns. \n",
68+
"\n",
69+
"To resolve this please provide a meta= keyword.\n",
70+
"The docstring of the Dask function you ran should have more information.\n",
71+
"\n",
72+
"Original error is below:\n",
73+
"------------------------\n",
74+
"FileNotFoundError('calitp-analytics-data/data-analyses/rt_segment_speeds/rollup_singleday/speeds_route_dir_segments_2024-01-17.parquet')\n",
75+
"\n",
76+
"Traceback:\n",
77+
"---------\n",
78+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/utils.py\", line 195, in raise_on_meta_error\n",
79+
" yield\n",
80+
" File \"/opt/conda/lib/python3.11/site-packages/dask/dataframe/core.py\", line 7175, in _emulate\n",
81+
" return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))\n",
82+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
83+
" File \"/home/jovyan/data-analyses/_shared_utils/shared_utils/dask_utils.py\", line 134, in import_df_func\n",
84+
" df = pd.read_parquet(\n",
85+
" ^^^^^^^^^^^^^^^^\n",
86+
" File \"/opt/conda/lib/python3.11/site-packages/pandas/io/parquet.py\", line 503, in read_parquet\n",
87+
" return impl.read(\n",
88+
" ^^^^^^^^^^\n",
89+
" File \"/opt/conda/lib/python3.11/site-packages/pandas/io/parquet.py\", line 251, in read\n",
90+
" result = self.api.parquet.read_table(\n",
91+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
92+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/parquet/core.py\", line 1793, in read_table\n",
93+
" dataset = ParquetDataset(\n",
94+
" ^^^^^^^^^^^^^^^\n",
95+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/parquet/core.py\", line 1371, in __init__\n",
96+
" self._dataset = ds.dataset(path_or_paths, filesystem=filesystem,\n",
97+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
98+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/dataset.py\", line 794, in dataset\n",
99+
" return _filesystem_dataset(source, **kwargs)\n",
100+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
101+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/dataset.py\", line 476, in _filesystem_dataset\n",
102+
" fs, paths_or_selector = _ensure_single_source(source, filesystem)\n",
103+
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
104+
" File \"/opt/conda/lib/python3.11/site-packages/pyarrow/dataset.py\", line 441, in _ensure_single_source\n",
105+
" raise FileNotFoundError(path)\n",
106+
"\n",
107+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
108+
"E0000 00:00:1739983694.449132 625 init.cc:232] grpc_wait_for_shutdown_with_timeout() timed out.`"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": 1,
114+
"id": "3d70f4c3-3572-4d2e-b588-226709d48e25",
115+
"metadata": {
116+
"tags": []
117+
},
118+
"outputs": [],
119+
"source": [
120+
"import _section1_utils as section1\n",
121+
"import geopandas as gpd\n",
122+
"import merge_data\n",
123+
"import merge_operator_data\n",
124+
"import numpy as np\n",
125+
"import pandas as pd\n",
126+
"from segment_speed_utils import gtfs_schedule_wrangling, helpers\n",
127+
"from shared_utils import portfolio_utils\n",
128+
"from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": 2,
134+
"id": "adfc13e3-098e-41e8-883a-497f85afbea5",
135+
"metadata": {
136+
"tags": []
137+
},
138+
"outputs": [],
139+
"source": [
140+
"pd.options.display.max_columns = 100\n",
141+
"pd.options.display.float_format = \"{:.2f}\".format\n",
142+
"pd.set_option(\"display.max_rows\", None)\n",
143+
"pd.set_option(\"display.max_colwidth\", None)"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": 3,
149+
"id": "58095cec-ca10-40e4-9605-51626170f32a",
150+
"metadata": {
151+
"tags": []
152+
},
153+
"outputs": [],
154+
"source": [
155+
"# FileNotFoundError: \n",
156+
"# calitp-analytics-data/data-analyses/rt_segment_speeds/rollup_singleday/speeds_route_dir_segments_2024-01-17.parquet\n",
157+
"# gs://calitp-analytics-data/data-analyses/rt_segment_speeds/rollup_singleday/speeds_route_dir_2024-01-17.parquet"
158+
]
159+
}
160+
],
161+
"metadata": {
162+
"kernelspec": {
163+
"display_name": "Python 3 (ipykernel)",
164+
"language": "python",
165+
"name": "python3"
166+
},
167+
"language_info": {
168+
"codemirror_mode": {
169+
"name": "ipython",
170+
"version": 3
171+
},
172+
"file_extension": ".py",
173+
"mimetype": "text/x-python",
174+
"name": "python",
175+
"nbconvert_exporter": "python",
176+
"pygments_lexer": "ipython3",
177+
"version": "3.11.6"
178+
}
179+
},
180+
"nbformat": 4,
181+
"nbformat_minor": 5
182+
}

gtfs_digest/Makefile

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ assemble_data:
22
python merge_data.py
33
python merge_operator_data.py
44
python merge_operator_service.py
5-
python merge_segment_data.py
65
python publish_public_data.py
76

87

gtfs_funnel/update_vars.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111
apr2024_week + rt_dates.y2025_dates
1212
)
1313

14-
all_dates2 = (
15-
rt_dates.y2023_dates +
16-
oct2023_week + apr2023_week +
17-
apr2024_week + rt_dates.y2025_dates
14+
all_dates2 = (rt_dates.y2025_dates
1815
)
1916

2017
# analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]

portfolio/gtfs_digest/README.md

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# General Transit Feed Specification (GTFS) Digest
2-
The goal of this website is to give you an overview of transit operators that produce GTFS schedule and/or real-time data. We use data from the [National Transit Database](https://www.transit.dot.gov/ntd), [National Association of City Transportation Officials's Transit Route Types](https://nacto.org/publication/transit-street-design-guide/introduction/service-context/transit-route-types/), and [GTFS feeds](https://gtfs.org/) to deliver key insights. You can find details such as the types of routes and the total scheduled hours of public transit service for which an operator runs.
2+
The goal of this website is to give you an overview of transit operators that produce GTFS schedule and/or real-time data either on the individual operator, Caltrans district, or legislative district level.
33

4-
For operators who produce real-time data, we also calculate additional performance metrics for all of their routes. Examples include displaying the number of on-time, early, and late trips, the average speed, and the headway for a route.
4+
We use data from the [National Transit Database](https://www.transit.dot.gov/ntd), [National Association of City Transportation Official’s Transit Route Types](https://nacto.org/publication/transit-street-design-guide/introduction/service-context/transit-route-types/), and [GTFS feeds](https://gtfs.org/) to deliver key insights. You can find details such as the types of routes and the total scheduled hours of public transit service for which an operator runs.
55

6-
GTFS Digest will continue to evolve as we dive into our own data warehouse!
6+
For operators who produce real-time data, we also calculate additional performance metrics for all their routes. Examples include displaying the number of on-time, early, and late trips, the average speed, and the headway for a route.
7+
8+
GTFS Digest will continue to evolve as we dive into our own data warehouse!
79

810
## Definitions and Methodology
911
To read about the methodology behind and the definitions of terms used throughout our work, please visit [here](https://github.com/cal-itp/data-analyses/blob/main/gtfs_digest/methodology.md).

portfolio/gtfs_digest/_toc.yml

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ parts:
2323
- file: district_07-los-angeles.md
2424
sections:
2525
- glob: district_07-los-angeles/*
26+
- file: district_07-los-angeles-ventura.md
27+
sections:
28+
- glob: district_07-los-angeles-ventura/*
2629
- file: district_08-san-bernardino.md
2730
sections:
2831
- glob: district_08-san-bernardino/*
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:a33fd48f931c6c72e9059e48144c049a82e86177c97685597eb2133796fcd3ed
3-
size 2902492
2+
oid sha256:18b3526815a4b77006b0c004e24aff15bd255596f731f9f31d37d7a45d7bb418
3+
size 2898415
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:93f951fb01f3da032a30cde48cde15d390ff8ffa42ecc89447f3d9c8816582bc
3-
size 5331630
2+
oid sha256:f87afbfc904e9261eee89ca20805b6233fb1ced9ddfe7699b41c72a238a8e417
3+
size 79690

portfolio/gtfs_digest/district_01-eureka/02__03_report__district_01-eureka__organization_name_city-of-eureka.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_01-eureka/03__03_report__district_01-eureka__organization_name_curry-public-transit.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_01-eureka/04__03_report__district_01-eureka__organization_name_humboldt-transit-authority.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_01-eureka/05__03_report__district_01-eureka__organization_name_lake-transit-authority.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_01-eureka/06__03_report__district_01-eureka__organization_name_mendocino-transit-authority.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_01-eureka/07__03_report__district_01-eureka__organization_name_point.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_01-eureka/08__03_report__district_01-eureka__organization_name_redwood-coast-transit-authority.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding.md

-1
This file was deleted.

portfolio/gtfs_digest/district_02-redding/00__03_report__district_02-redding__organization_name_lassen-transit-service-agency.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/01__03_report__district_02-redding__organization_name_modoc-transportation-agency.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/02__03_report__district_02-redding__organization_name_plumas-transit-systems.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/03__03_report__district_02-redding__organization_name_redding-area-bus-authority.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/04__03_report__district_02-redding__organization_name_shasta-county.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/05__03_report__district_02-redding__organization_name_siskiyou-county.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/06__03_report__district_02-redding__organization_name_susanville-indian-rancheria.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/07__03_report__district_02-redding__organization_name_tehama-county.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_02-redding/08__03_report__district_02-redding__organization_name_trinity-county.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville.md

-1
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/00__03_report__district_03-marysville__organization_name_amtrak.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/01__03_report__district_03-marysville__organization_name_butte-county-association-of-governments.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/02__03_report__district_03-marysville__organization_name_city-of-auburn.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/03__03_report__district_03-marysville__organization_name_city-of-elk-grove.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/04__03_report__district_03-marysville__organization_name_city-of-rancho-cordova.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/05__03_report__district_03-marysville__organization_name_city-of-roseville.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/06__03_report__district_03-marysville__organization_name_el-dorado-county-transit-authority.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/07__03_report__district_03-marysville__organization_name_glenn-county.ipynb

-3
This file was deleted.

portfolio/gtfs_digest/district_03-marysville/08__03_report__district_03-marysville__organization_name_nevada-county.ipynb

-3
This file was deleted.

0 commit comments

Comments
 (0)