-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
63 lines (56 loc) · 2.32 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gpxpy
import numpy as np
# Loads GPX data
def load_gpx_data(gpx_files):
tracks = []
for file in gpx_files:
print(f"Loading GPX file: {file}")
try:
with open(file, 'r') as f:
gpx = gpxpy.parse(f)
# Check for tracks
for track in gpx.tracks:
for segment in track.segments:
if segment.points:
tracks.append(np.array([(point.latitude, point.longitude) for point in segment.points]))
else:
print("No points in segment.")
# Check for routes
for route in gpx.routes:
if route.points:
tracks.append(np.array([(point.latitude, point.longitude) for point in route.points]))
else:
print("No points in route.")
except Exception as e:
print(f"Error loading GPX file {file}: {e}")
return tracks
# Normalizes the tracks
def normalize_tracks(tracks):
all_points = np.concatenate(tracks, axis=0)
mean_lat, mean_lon = np.mean(all_points, axis=0)
std_lat, std_lon = np.std(all_points, axis=0)
normalized_tracks = []
for track in tracks:
normalized_track = (track - [mean_lat, mean_lon]) / [std_lat, std_lon]
normalized_tracks.append(normalized_track)
return normalized_tracks, (mean_lat, mean_lon), (std_lat, std_lon)
# Prepares sequences
def prepare_sequences(tracks, sequence_length):
sequences = []
next_points = []
for track in tracks:
for i in range(len(track) - sequence_length):
sequences.append(track[i:i + sequence_length])
next_points.append(track[i + sequence_length])
return np.array(sequences), np.array(next_points)
# Main function to preprocess GPX files
def preprocess_gpx(files, sequence_length=5):
tracks = load_gpx_data(files)
if not tracks:
print("No tracks to normalize. Exiting preprocessing.")
return None, None, None, None
print(f"Normalizing {len(tracks)} tracks.")
normalized_tracks, mean_coords, std_coords = normalize_tracks(tracks)
print(f"Preparing sequences from normalized tracks.")
sequences, next_points = prepare_sequences(normalized_tracks, sequence_length)
return sequences, next_points, mean_coords, std_coords