@@ -58,57 +58,6 @@ def segment_speeds_one_day(
5858 return df
5959
6060
61- def segment_geom_time_series (
62- segment_type : str ,
63- analysis_date_list : list
64- ):
65- """
66- One challenge with pulling segment geometry
67- over a longer period is that we can get duplicates.
68- Segment geom uses schedule_gtfs_dataset_key,
69- which over a long enough period, can also change.
70-
71- We should come up with a way to get rid of dupes,
72- while also coming up with a way to merge this back onto
73- segment speed averages.
74- """
75- speed_file = GTFS_DATA_DICT [segment_type ]["route_dir_single_segment" ]
76- segment_file = GTFS_DATA_DICT [segment_type ]["segments_file" ]
77-
78- operator_df = time_series_utils .concatenate_datasets_across_dates (
79- SEGMENT_GCS ,
80- speed_file ,
81- analysis_date_list ,
82- data_type = "df" ,
83- columns = ["schedule_gtfs_dataset_key" , "name" , "organization_name" ],
84- get_pandas = True
85- ).drop_duplicates ()
86-
87- segment_gdf = time_series_utils .concatenate_datasets_across_dates (
88- SEGMENT_GCS ,
89- segment_file ,
90- analysis_date_list ,
91- data_type = "gdf" ,
92- get_pandas = False
93- )
94-
95- gdf = delayed (pd .merge )(
96- segment_gdf ,
97- operator_df ,
98- on = ["schedule_gtfs_dataset_key" , "service_date" ],
99- how = "inner"
100- ).sort_values (
101- by = ["name" , "service_date" ], ascending = [True , False ]
102- ).drop (
103- columns = ["schedule_gtfs_dataset_key" , "service_date" ]
104- ).drop_duplicates ().reset_index (drop = True )
105- # this is dropping dupes with gtfs_dataset_name and organization_name
106-
107- gdf = compute (gdf )[0 ]
108-
109- return gdf
110-
111-
11261def get_aggregation (df : pd .DataFrame , group_cols : list ):
11362 """
11463 Aggregating across days, take the (mean)p20/p50/p80 speed
0 commit comments