1
+ import datetime
2
+ import numpy as np
3
+ import pandas as pd
4
+ import geopandas as gpd
5
+
6
+ from segment_speed_utils import helpers , vp_transform
7
+ from segment_speed_utils .project_vars import SEGMENT_GCS , GTFS_DATA_DICT , PROJECT_CRS
8
+
9
+ from shared_utils import rt_dates
10
+ from shared_utils .rt_utils import MPH_PER_MPS
11
+ from project_condense_resample import project_point_onto_shape
12
+
13
+ analysis_date = rt_dates .DATES ["oct2024" ]
14
+
15
+ def grab_arrays_by_trip (df , meters_interval : int ):
16
+
17
+ intervaled_cutoffs = []
18
+ speed_series = []
19
+
20
+ for row in df .itertuples ():
21
+
22
+ one_trip_distance_arr = getattr (row , "interpolated_distances" )
23
+ one_trip_timestamp_arr = getattr (row , "resampled_timestamps" )
24
+
25
+ start_dist = int (np .floor (one_trip_distance_arr ).min ())
26
+ end_dist = int (np .ceil (one_trip_distance_arr ).max ())
27
+
28
+ intervaled_distance_cutoffs = np .array (range (start_dist , end_dist , meters_interval ))
29
+
30
+ speeds_for_trip = get_speeds_every_interval (
31
+ one_trip_distance_arr ,
32
+ one_trip_timestamp_arr ,
33
+ intervaled_distance_cutoffs
34
+ )
35
+
36
+ intervaled_cutoffs .append (intervaled_distance_cutoffs )
37
+ speed_series .append (speeds_for_trip )
38
+
39
+ df2 = df .assign (
40
+ intervaled_meters = intervaled_cutoffs ,
41
+ speeds = speed_series
42
+ )[["trip_instance_key" , "intervaled_meters" , "speeds" ]]
43
+
44
+ return df2
45
+
46
+
47
+ def get_speeds_every_interval (
48
+ one_trip_distance_arr ,
49
+ one_trip_timestamp_arr ,
50
+ intervaled_distance_cutoffs ,
51
+ ):
52
+
53
+ one_trip_speed_series = []
54
+
55
+ for i in range (0 , len (intervaled_distance_cutoffs ) - 1 ):
56
+ cut1 = intervaled_distance_cutoffs [i ]
57
+ cut2 = intervaled_distance_cutoffs [i + 1 ]
58
+ subset_indices = np .where ((one_trip_distance_arr >= cut1 ) & (one_trip_distance_arr <= cut2 ))
59
+
60
+ subset_distances = one_trip_distance_arr [subset_indices ]
61
+ subset_times = one_trip_timestamp_arr [subset_indices ]
62
+
63
+ # should deltas be returned?
64
+ if len (subset_distances > 0 ):
65
+ one_speed = (
66
+ (subset_distances .max () - subset_distances .min ()) /
67
+ (subset_times .max () - subset_times .min ())
68
+ * MPH_PER_MPS
69
+ )
70
+
71
+ one_trip_speed_series .append (one_speed )
72
+ else :
73
+ one_trip_speed_series .append (np .nan )
74
+ return one_trip_speed_series
75
+
76
+
77
+ def grab_arrays_by_trip2 (
78
+ df ,
79
+ distance_type = "" ,
80
+ intervaled_distance_column_or_meters = ""
81
+ ):
82
+
83
+ intervaled_cutoffs = []
84
+ speed_series = []
85
+
86
+ for row in df .itertuples ():
87
+
88
+ one_trip_distance_arr = getattr (row , "interpolated_distances" )
89
+ one_trip_timestamp_arr = getattr (row , "resampled_timestamps" )
90
+ should_calculate = np .array (getattr (row , "stop_meters_increasing" ))
91
+
92
+
93
+ start_dist = int (np .floor (one_trip_distance_arr ).min ())
94
+ end_dist = int (np .ceil (one_trip_distance_arr ).max ())
95
+
96
+ if distance_type == "equal_intervals" :
97
+ intervaled_distance_cutoffs = np .array (
98
+ range (start_dist , end_dist , intervaled_distance_column_or_meters ))
99
+
100
+ elif distance_type == "stop_to_stop" :
101
+ intervaled_distance_cutoffs = getattr (row , intervaled_distance_column_or_meters )
102
+ #do_not_calculate_indices = np.where(should_calculate == False)[0]
103
+
104
+ speeds_for_trip = get_speeds_every_interval (
105
+ one_trip_distance_arr ,
106
+ one_trip_timestamp_arr ,
107
+ intervaled_distance_cutoffs ,
108
+ )
109
+
110
+ #if len(do_not_calculate_indices) > 0:
111
+ # speeds_for_trip[do_not_calculate_indices] = np.nan
112
+
113
+
114
+ if distance_type == "equal_intervals" :
115
+ intervaled_cutoffs .append (intervaled_distance_cutoffs )
116
+ keep_cols = ["intervaled_meters" , "speeds" ]
117
+ elif distance_type == "stop_to_stop" :
118
+ keep_cols = ["speeds" , "stop_sequence" ]
119
+
120
+ speed_series .append (speeds_for_trip )
121
+
122
+ if distance_type == "equal_intervals" :
123
+ df2 = df .assign (
124
+ intervaled_meters = intervaled_cutoffs ,
125
+ speeds = speed_series
126
+ )
127
+
128
+ elif distance_type == "stop_to_stop" :
129
+ df2 = df .assign (
130
+ speeds = speed_series
131
+ )
132
+
133
+ return df2 [["trip_instance_key" ] + keep_cols ]
134
+
135
+
136
+
137
+
138
+ if __name__ == "__main__" :
139
+ '''
140
+ for b in ["batch0", "batch1"]:
141
+ start = datetime.datetime.now()
142
+
143
+ meters_interval = 250
144
+ df = pd.read_parquet(
145
+ f"{SEGMENT_GCS}vp_condensed/vp_resampled_{b}_{analysis_date}.parquet",
146
+ )
147
+
148
+ results = grab_arrays_by_trip(df, meters_interval)
149
+ results.to_parquet(
150
+ f"{SEGMENT_GCS}rough_speeds_{meters_interval}m_{b}_{analysis_date}.parquet"
151
+ )
152
+
153
+ end = datetime.datetime.now()
154
+ print(f"{b} speeds every {meters_interval}m: {end - start}")
155
+
156
+
157
+ #batch0 speeds every 100m: 0:03:00.469936
158
+ #batch1 speeds every 100m: 0:02:50.197037
159
+ #batch0 speeds every 250m: 0:01:32.080767
160
+ #batch1 speeds every 250m: 0:01:38.365538
161
+ #batch0 speeds every stop: 0:01:05.459700
162
+ #batch1 speeds every stop: 0:00:46.450538
163
+ '''
164
+
165
+ for b in ["batch0" , "batch1" ]:
166
+ start = datetime .datetime .now ()
167
+
168
+ df = pd .read_parquet (
169
+ f"{ SEGMENT_GCS } vp_condensed/vp_resampled_{ b } _{ analysis_date } .parquet" ,
170
+ )
171
+
172
+ subset_trips = df .trip_instance_key .unique ().tolist ()
173
+
174
+ stop_time_cutoffs = pd .read_parquet (
175
+ f"{ SEGMENT_GCS } stop_times_projected_{ analysis_date } .parquet" ,
176
+ filters = [[("trip_instance_key" , "in" , subset_trips )]],
177
+ columns = ["trip_instance_key" , "stop_sequence" , "stop_meters" , "stop_meters_increasing" ]
178
+ )
179
+
180
+ gdf = pd .merge (
181
+ df ,
182
+ stop_time_cutoffs ,
183
+ on = "trip_instance_key" ,
184
+ how = "inner"
185
+ )
186
+
187
+ results = grab_arrays_by_trip2 (
188
+ gdf ,
189
+ distance_type = "stop_to_stop" ,
190
+ intervaled_distance_column_or_meters = "stop_meters" ,
191
+ )
192
+
193
+ results .to_parquet (
194
+ f"{ SEGMENT_GCS } rough_speeds_stop_to_stop_{ b } _{ analysis_date } .parquet"
195
+ )
196
+
197
+ end = datetime .datetime .now ()
198
+ print (f"{ b } speeds every stop: { end - start } " )
0 commit comments