-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
95 lines (84 loc) · 2.73 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
from tqdm import tqdm
def get_paths() -> list[tuple[int, str]]:
"""
Find all .plt files in data set.
Returns a list of tuples, with username as integer and full path for each
file.
"""
filepaths = []
for root, _, files in os.walk("data", topdown=True):
for filename in files:
if filename.endswith(".plt"):
path = os.path.join(root, filename)
username = path.split("\\")[1]
filepaths.append((int(username), path))
return filepaths
def read_data(max_records=None, max_folders=None) -> list:
"""
Read up to MAX_FOLDERS .plt files from disk and parse them into a standard
format.
"""
folders = get_paths()
if max_folders is not None:
folders = folders[:max_folders]
data = []
progress = tqdm(total=len(folders), leave=False)
row_count = 0
for username, path in folders:
if max_records and row_count >= max_records:
break
progress.set_description(f"Processing {path}")
file = open(path, "r")
rows = file.readlines()[6:] # Skip header
rows = [x.strip().split(",") for x in rows]
for row in rows:
if max_records and row_count >= max_records:
break
latitude = float(row[0])
longitude = float(row[1])
altitude = float(row[3])
date = row[5]
time = row[6]
data.append([username, latitude, longitude, altitude, date, time])
row_count += 1
file.close()
progress.update(1)
return data
def pg_parse(data: list):
"""
Parse a list of preprocessed rows and convert them into a format suited to
the Postgres version of the schema.
"""
pg_data = []
for user, lat, lon, alt, date, time in data:
# Note order swapping: (lat, lon) => (lon, lat)
pg_data.append([user, lon, lat, alt, date, time])
return pg_data
def mysql_parse(data: list):
"""
Parse a list of preprocessed rows and convert them into a format suited to
the MySQL version of the schema.
"""
pg_data = []
for row in data:
pg_data.append(row.copy())
return data
def mongo_parse(data: list):
"""
Parse a list of preprocessed rows and convert them into a format suited to
the MondoDB version of the schema.
"""
mongo_data = []
for user, lat, lon, alt, date, time in data:
# Note order swapping: (lat, lon) => (lon, lat)
mongo_data.append(
{
"tp_user": user,
"tp_point": [lon, lat],
"tp_altitude": alt,
"tp_date": date,
"tp_time": time,
}
)
return mongo_data