Skip to content

Commit c587f91

Browse files
committed
save out individual year
1 parent 231e677 commit c587f91

File tree

3 files changed

+83
-31
lines changed

3 files changed

+83
-31
lines changed

_shared_utils/shared_utils/gtfs_analytics_data.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ rt_stop_times:
8383
trip_speeds_single_summary: "rollup_singleday/speeds_trip"
8484
route_dir_timeofday: "rollup_singleday/speeds_route_dir"
8585
segment_peakoffpeak_weekday_month: "rollup_singleday/month_speeds_route_dir_peak_segments" # NEW? what to name
86-
segment_timeofday_weekday_year: "rollup_multiday/year_weekday_speeds_route_dir_segments"
86+
segment_timeofday_weekday_year: "rollup_multiday/weekday_speeds_route_dir_segments"
87+
segments_year_file: "rollup_multiday/stop_segments"
8788
min_trip_minutes: ${speed_vars.time_min_cutoff}
8889
max_trip_minutes: 180
8990
max_speed: ${speed_vars.max_speed}

rt_segment_speeds/logs/avg_speeds.log

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@
1818
2025-03-03 13:43:54.606 | INFO | __main__:summary_speeds_by_peak_offpeak:158 - rt_stop_times summary speed averaging by peak/offpeak for 2025-02-12 execution time: 0:00:13.043791
1919
2025-03-03 13:44:02.598 | INFO | __main__:trip_summary_speeds_by_time_of_day:92 - rt_stop_times summary speed averaging by time-of-day 2025-01-15 execution time: 0:00:07.986208
2020
2025-03-03 13:44:15.279 | INFO | __main__:summary_speeds_by_peak_offpeak:158 - rt_stop_times summary speed averaging by peak/offpeak for 2025-01-15 execution time: 0:00:12.679323
21-
2025-03-13 10:05:19.008 | INFO | __main__:annual_time_of_day_averages:140 - rt_stop_times: weekday/time-of-day averages for ['2023-03-15', '2023-04-10', '2023-04-11', '2023-04-12', '2023-04-13', '2023-04-14', '2023-04-15', '2023-04-16', '2023-05-17', '2023-06-14', '2023-07-12', '2023-08-15', '2023-09-13', '2023-10-09', '2023-10-10', '2023-10-11', '2023-10-12', '2023-10-13', '2023-10-14', '2023-10-15', '2023-11-15', '2023-12-13', '2024-01-17', '2024-02-14', '2024-03-13', '2024-04-15', '2024-04-16', '2024-04-17', '2024-04-18', '2024-04-19', '2024-04-20', '2024-04-21', '2024-05-22', '2024-06-12', '2024-07-17', '2024-08-14', '2024-09-18', '2024-10-14', '2024-10-15', '2024-10-16', '2024-10-17', '2024-10-18', '2024-10-19', '2024-10-20', '2024-11-13', '2024-12-11', '2025-01-15', '2025-02-12'] execution time: 0:04:05.322820
21+
2025-03-14 13:34:30.155 | INFO | __main__:annual_time_of_day_averages:171 - rt_stop_times: weekday/time-of-day averages for 2023 execution time: 0:02:23.782640
22+
2025-03-14 13:36:37.069 | INFO | __main__:annual_time_of_day_averages:171 - rt_stop_times: weekday/time-of-day averages for 2024 execution time: 0:02:06.806331
23+
2025-03-14 13:36:50.516 | INFO | __main__:annual_time_of_day_averages:171 - rt_stop_times: weekday/time-of-day averages for 2025 execution time: 0:00:13.404898

rt_segment_speeds/scripts/average_segment_speeds.py

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import Literal, Optional
1515

1616
from calitp_data_analysis import utils
17+
from calitp_data_analysis.geography_utils import WGS84
1718

1819
from segment_speed_utils import gtfs_schedule_wrangling, segment_calcs, time_series_utils
1920
from shared_utils import publish_utils, time_helpers
@@ -43,8 +44,60 @@ def import_singleday_segment_speeds(
4344
return df
4445

4546

47+
def export_segment_geometry(
48+
year: str,
49+
):
50+
"""
51+
Dedupe segment geometries using columns,
52+
since geometries may slightly differ.
53+
Visual inspection shows start and endpoints might be
54+
slightly different but still capture the same corridor.
55+
56+
Big Blue Bus: stop_pair = "1115__187"
57+
In 2024, there are 4 rows, but the 4 rows are basically the same,
58+
so let's keep the most recent row.
59+
"""
60+
SEGMENTS_FILE = GTFS_DATA_DICT.rt_stop_times.segments_file
61+
EXPORT_FILE = GTFS_DATA_DICT.rt_stop_times.segments_year_file
62+
63+
keep_cols = [
64+
"schedule_gtfs_dataset_key",
65+
"route_id", "direction_id",
66+
"stop_pair",
67+
]
68+
69+
dates_in_year = [
70+
date for date in rt_dates.all_dates if year in date
71+
]
72+
73+
df = time_series_utils.concatenate_datasets_across_dates(
74+
SEGMENT_GCS,
75+
SEGMENTS_FILE,
76+
dates_in_year,
77+
columns = keep_cols + ["geometry"],
78+
data_type = "gdf",
79+
get_pandas= False,
80+
).sort_values(
81+
"service_date", ascending=False
82+
).drop(
83+
columns = "service_date"
84+
).drop_duplicates(
85+
subset = keep_cols
86+
).reset_index(drop=True).to_crs(WGS84)
87+
88+
df = df.compute()
89+
90+
df.to_parquet(
91+
f"{SEGMENT_GCS}{EXPORT_FILE}_{year}.parquet",
92+
)
93+
94+
print(f"exported stop segments for year {year}")
95+
96+
return
97+
98+
4699
def annual_time_of_day_averages(
47-
analysis_date_list: list,
100+
year: str,
48101
segment_type: Literal[SEGMENT_TYPES],
49102
config_path: Optional = GTFS_DATA_DICT
50103
):
@@ -63,6 +116,7 @@ def annual_time_of_day_averages(
63116
dict_inputs = config_path[segment_type]
64117

65118
SPEED_FILE = dict_inputs["segment_timeofday"]
119+
SEGMENTS_YEAR_FILE = dict_inputs["segments_year_file"]
66120
EXPORT_FILE = dict_inputs["segment_timeofday_weekday_year"]
67121

68122
SEGMENT_COLS = [*dict_inputs["segment_cols"]]
@@ -71,6 +125,10 @@ def annual_time_of_day_averages(
71125
OPERATOR_COLS = ["schedule_gtfs_dataset_key"]
72126
CROSSWALK_COLS = [*dict_inputs.crosswalk_cols]
73127

128+
analysis_date_list = [
129+
date for date in rt_dates.all_dates if year in date
130+
]
131+
74132
df = import_singleday_segment_speeds(
75133
SEGMENT_GCS,
76134
SPEED_FILE,
@@ -98,44 +156,26 @@ def annual_time_of_day_averages(
98156
**orig_dtypes,
99157
},
100158
align_dataframes = False
101-
).compute()
102-
103-
104-
publish_utils.if_exists_then_delete(
105-
f"{SEGMENT_GCS}{EXPORT_FILE}"
106-
)
107-
108-
avg_speeds.to_parquet(
109-
f"{SEGMENT_GCS}{EXPORT_FILE}.parquet",
110-
)
111-
'''
112-
speeds_gdf = segment_calcs.merge_in_segment_geometry(
113-
avg_speeds,
114-
analysis_date_list,
115-
segment_type,
116-
SEGMENT_COLS
117-
).pipe(
118-
gtfs_schedule_wrangling.merge_operator_identifiers,
159+
).compute().pipe(
160+
gtfs_schedule_wrangling.merge_operator_identifiers,
119161
analysis_date_list,
120162
columns = CROSSWALK_COLS
121163
)
122164

123-
utils.geoparquet_gcs_export(
124-
speeds_gdf,
125-
SEGMENT_GCS,
126-
EXPORT_FILE
165+
avg_speeds.to_parquet(
166+
f"{SEGMENT_GCS}{EXPORT_FILE}_{year}.parquet"
127167
)
128-
'''
129168

130169
end = datetime.datetime.now()
131170

132171
logger.info(
133-
f"{segment_type}: weekday/time-of-day averages for {analysis_date_list} "
172+
f"{segment_type}: weekday/time-of-day averages for {year} "
134173
f"execution time: {end - start}"
135174
)
136175

137176
return
138177

178+
139179
if __name__ == "__main__":
140180

141181
from shared_utils import rt_dates
@@ -147,9 +187,18 @@ def annual_time_of_day_averages(
147187
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
148188
level="INFO")
149189

190+
# isolate segments per year to allow for export
191+
# rerun previous years when necessary
192+
for year in ["2025"]:
193+
194+
export_segment_geometry(year)
150195

151-
annual_time_of_day_averages(
152-
rt_dates.all_dates,
153-
segment_type = "rt_stop_times",
154-
)
196+
annual_time_of_day_averages(
197+
year,
198+
segment_type = "rt_stop_times",
199+
)
200+
201+
202+
203+
155204

0 commit comments

Comments
 (0)