Skip to content

Commit dad53ac

Browse files
committed
test year averages with segment geom
1 parent db5be6f commit dad53ac

File tree

2 files changed

+1
-51
lines changed

2 files changed

+1
-51
lines changed

_shared_utils/shared_utils/gtfs_analytics_data.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ stop_segments:
8181
shape_stop_cols: ["shape_array_key", "shape_id", "stop_sequence"]
8282
stop_pair_cols: ["stop_pair", "stop_pair_name"]
8383
route_dir_cols: ["route_id", "direction_id"]
84+
segment_cols: ["schedule_gtfs_dataset_key", "route_id", "direction_id", "stop_pair", "geometry"]
8485
shape_stop_single_segment: "rollup_singleday/speeds_shape_stop_segments" #-- stop after Oct 2024
8586
route_dir_single_segment: "rollup_singleday/speeds_route_dir_segments"
8687
route_dir_multi_segment: "rollup_multiday/speeds_route_dir_segments"

rt_segment_speeds/scripts/quarter_year_averages.py

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -58,57 +58,6 @@ def segment_speeds_one_day(
5858
return df
5959

6060

61-
def segment_geom_time_series(
62-
segment_type: str,
63-
analysis_date_list: list
64-
):
65-
"""
66-
One challenge with pulling segment geometry
67-
over a longer period is that we can get duplicates.
68-
Segment geom uses schedule_gtfs_dataset_key,
69-
which over a long enough period, can also change.
70-
71-
We should come up with a way to get rid of dupes,
72-
while also coming up with a way to merge this back onto
73-
segment speed averages.
74-
"""
75-
speed_file = GTFS_DATA_DICT[segment_type]["route_dir_single_segment"]
76-
segment_file = GTFS_DATA_DICT[segment_type]["segments_file"]
77-
78-
operator_df = time_series_utils.concatenate_datasets_across_dates(
79-
SEGMENT_GCS,
80-
speed_file,
81-
analysis_date_list,
82-
data_type = "df",
83-
columns = ["schedule_gtfs_dataset_key", "name", "organization_name"],
84-
get_pandas = True
85-
).drop_duplicates()
86-
87-
segment_gdf = time_series_utils.concatenate_datasets_across_dates(
88-
SEGMENT_GCS,
89-
segment_file,
90-
analysis_date_list,
91-
data_type = "gdf",
92-
get_pandas = False
93-
)
94-
95-
gdf = delayed(pd.merge)(
96-
segment_gdf,
97-
operator_df,
98-
on = ["schedule_gtfs_dataset_key", "service_date"],
99-
how = "inner"
100-
).sort_values(
101-
by=["name", "service_date"], ascending=[True, False]
102-
).drop(
103-
columns = ["schedule_gtfs_dataset_key", "service_date"]
104-
).drop_duplicates().reset_index(drop=True)
105-
# this is dropping dupes with gtfs_dataset_name and organization_name
106-
107-
gdf = compute(gdf)[0]
108-
109-
return gdf
110-
111-
11261
def get_aggregation(df: pd.DataFrame, group_cols: list):
11362
"""
11463
Aggregating across days, take the (mean)p20/p50/p80 speed

0 commit comments

Comments
 (0)