Skip to content

Commit cedce52

Browse files
authored
Merge pull request #1418 from cal-itp/jan_ntd
jan 2025 monthly ntd ridership report publish
2 parents 91fb528 + 902c28c commit cedce52

File tree

30 files changed

+77
-80
lines changed

30 files changed

+77
-80
lines changed

_shared_utils/shared_utils/schedule_gtfs_keys_multi_orgs.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
11
"""
2-
Create the GTFS Digest yaml that
2+
Create the GTFS Digest yaml that
33
sets the parameterization for the analysis site.
44
"""
5+
import sys
6+
57
import pandas as pd
6-
import yaml
78

9+
# import yaml
810
from shared_utils import portfolio_utils
11+
from update_vars import GTFS_DATA_DICT
912

10-
import sys
1113
sys.path.append("../../gtfs_digest/")
12-
from update_vars import GTFS_DATA_DICT
14+
1315

1416
def count_orgs(df: pd.DataFrame) -> list:
1517
"""
1618
Count the number of unique organization_names
1719
to schedule_gtfs_dataset_keys. Filter out any
1820
schedule_gtfs_dataset_keys with less than 2 unique
1921
organization_names. Return these schedule_gtfs_dataset_keys
20-
in a list.
22+
in a list.
2123
"""
2224
agg1 = (
2325
df.groupby(["caltrans_district", "schedule_gtfs_dataset_key"])
@@ -31,12 +33,13 @@ def count_orgs(df: pd.DataFrame) -> list:
3133
multi_org_list = list(agg1.schedule_gtfs_dataset_key.unique())
3234
return multi_org_list
3335

36+
3437
def find_schd_keys_multi_ops() -> pd.DataFrame:
3538
"""
3639
Return a dataframe with all the schedule_gtfs_dataset_keys
37-
that have more than one organization_name that corresponds to it.
40+
that have more than one organization_name that corresponds to it.
3841
This way, we won't include duplicate organizations when publishing
39-
our GTFS products.
42+
our GTFS products.
4043
"""
4144
schd_vp_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.route_schedule_vp}.parquet"
4245

@@ -60,10 +63,8 @@ def find_schd_keys_multi_ops() -> pd.DataFrame:
6063
)
6164

6265
# Sort dataframe to keep the row for district/gtfs_key for the most
63-
# current date
64-
schd_vp_df2 = schd_vp_df.dropna(subset="caltrans_district").sort_values(
65-
by=sort_cols, ascending=[True, False, True]
66-
)
66+
# current date
67+
schd_vp_df2 = schd_vp_df.dropna(subset="caltrans_district").sort_values(by=sort_cols, ascending=[True, False, True])
6768
schd_vp_df3 = schd_vp_df2.drop_duplicates(
6869
subset=[
6970
"organization_name",
@@ -77,30 +78,25 @@ def find_schd_keys_multi_ops() -> pd.DataFrame:
7778
multi_orgs_list = count_orgs(schd_vp_df3)
7879

7980
# Filter out the dataframe to only include schedule_gtfs_keys with multiple orgs
80-
schd_vp_df4 = schd_vp_df3.loc[
81-
schd_vp_df3.schedule_gtfs_dataset_key.isin(multi_orgs_list)
82-
].reset_index(drop=True)
81+
schd_vp_df4 = schd_vp_df3.loc[schd_vp_df3.schedule_gtfs_dataset_key.isin(multi_orgs_list)].reset_index(drop=True)
8382

8483
# Drop duplicates for organization_name
85-
schd_vp_df5 = schd_vp_df4.drop_duplicates(
86-
subset=["caltrans_district", "organization_name"]
87-
).reset_index(drop=True)
84+
schd_vp_df5 = schd_vp_df4.drop_duplicates(subset=["caltrans_district", "organization_name"]).reset_index(drop=True)
8885

8986
# Aggregate the dataframe to find schedule_gtfs_dataset_keys
9087
# with multiple organization_names once more.
9188
multi_orgs_list2 = count_orgs(schd_vp_df5)
9289

9390
# Filter one last time to only include schedule_gtfs_keys with multiple orgs
94-
schd_vp_df6 = schd_vp_df5.loc[
95-
schd_vp_df5.schedule_gtfs_dataset_key.isin(multi_orgs_list2)
96-
].reset_index(drop=True)
91+
schd_vp_df6 = schd_vp_df5.loc[schd_vp_df5.schedule_gtfs_dataset_key.isin(multi_orgs_list2)].reset_index(drop=True)
9792

9893
# Clean
99-
schd_vp_df6= schd_vp_df6.drop(columns = ["service_date"])
100-
schd_vp_df6["combo"] = schd_vp_df6.caltrans_district + " (" + schd_vp_df6.schedule_gtfs_dataset_key + ")"
101-
94+
schd_vp_df6 = schd_vp_df6.drop(columns=["service_date"])
95+
schd_vp_df6["combo"] = schd_vp_df6.caltrans_district + " (" + schd_vp_df6.schedule_gtfs_dataset_key + ")"
96+
10297
return schd_vp_df6
10398

99+
104100
SITE_YML = "./schedule_gtfs_dataset_key_multi_operator.yml"
105101

106102
if __name__ == "__main__":
@@ -109,13 +105,13 @@ def find_schd_keys_multi_ops() -> pd.DataFrame:
109105
portfolio_utils.create_portfolio_yaml_chapters_with_sections(
110106
SITE_YML,
111107
df,
112-
chapter_info = {
108+
chapter_info={
113109
"column": "combo",
114110
"name": "District/Key",
115111
"caption_prefix": "",
116112
"caption_suffix": "",
117113
},
118-
section_info = {
114+
section_info={
119115
"column": "organization_name",
120116
"name": "organization_name",
121117
},

ntd/ntd_dates.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,5 @@ def get_public_filename(monthyear_string: str) -> str:
4242
"oct2024": "2024-12",
4343
"nov2024": "2025-01",
4444
"dec2024": "2025-02",
45+
"jan2025": "2025-03"
4546
}

ntd/update_vars.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44

55
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/ntd/"
66

7-
current_month = "dec2024"
7+
current_month = "jan2025"
88
YEAR, MONTH = ntd_dates.parse_monthyear_string(current_month)
99
PUBLIC_FILENAME = ntd_dates.get_public_filename(current_month)
1010
MONTH_CREATED = ntd_dates.DATES[current_month]
1111

1212
# Check this url each month
1313
# https://www.transit.dot.gov/ntd/data-product/monthly-module-adjusted-data-release
1414
# Depending on if they fixed the Excel, there may be an additional suffix
15-
suffix = "_250203"
15+
suffix = "_250303"
1616
FULL_URL = (
1717
"https://www.transit.dot.gov/sites/fta.dot.gov/files/"
1818
f"{MONTH_CREATED}/{MONTH}%20{YEAR}%20"

portfolio/ntd_monthly_ridership/_config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
title: NTD Monthly Ridership by RTPA
55
author: Cal-ITP
6-
copyright: "2024"
6+
copyright: "2025"
77
#logo: calitp_logo_MAIN.png
88

99
# Force re-execution of notebooks on each build.
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:96c46c7e9b155e39a6f7941c458db111062a3f7b3558812af0cf8e240e74aa2f
3-
size 70902
2+
oid sha256:3a209cd427c0e2a589a8f0bf34efd5f5e20ae9ecdbcab18465242eb720bb93d0
3+
size 70899
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:14f2aacbd7025e20ae6a19dbea3e81d81465c2053714d63be203fb0bd8bc723d
3-
size 263773
2+
oid sha256:8a9b81a778a757b4054e9d6bedc4aa20dc0284fa9cad23bf0866ec9cb462b122
3+
size 266069
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:13900bc44322798730de072277ef19239fff51956da9156b0817e2510008ba53
3-
size 190099
2+
oid sha256:3d698aaee9ae4ca22b1337b147fef912edc5a90142f04b6380655b36e51a65d9
3+
size 191149
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:0fb9ab2a366354c8789df20d1b7db2899a9c2ebb8cfdfb11960477f21c921a5c
3-
size 314600
2+
oid sha256:7c16556f27ccbbfc85e6ea24ad7a386b2eacea5804c116e25606da7af0658ffb
3+
size 317382
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:ba1193f214668b6cc090a6bdb7d0be4feac3a7b651fae41e5506f6b559274aa4
3-
size 265589
2+
oid sha256:dcaaa6298dc650d5a529ddd02a79e751bf4c6a7029d19fe16d4d04491ddb6ffc
3+
size 266396
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:b2b772ea5fc20153373c4ad53bc2f6eaac2e80dce355c8f28dec121e4fd54f6b
3-
size 290229
2+
oid sha256:a8a3c797608f0be432199f632e1420ffc50d421f0eda7b5a29c5cff237daaf64
3+
size 293275

0 commit comments

Comments
 (0)