Skip to content

Commit d36d728

Browse files
committed
Add the code
1 parent 4e13dfb commit d36d728

File tree

2 files changed

+158
-28
lines changed

2 files changed

+158
-28
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import folium
2+
import numpy as np
3+
import pandas as pd
4+
import os
5+
6+
# Load the datasets (assuming they are in the same directory as the script)
7+
OS_PATH = os.path.dirname(os.path.realpath('__file__'))
8+
9+
weather_sensors_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/sensors/metr_la_sensors_weather.csv'))
10+
traffic_sensors_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/sensors/metr_la_sensors_traffic.csv'))
11+
12+
traffic_speed_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/traffic/speed.csv'))
13+
air_temp_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/weather/air_temp_set_1_fahrenheit.csv'))
14+
15+
# Haversine formula to calculate the distance between two geographical points
16+
def haversine(lat1, lon1, lat2, lon2):
17+
lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
18+
dlat = lat2 - lat1
19+
dlon = lon2 - lon1
20+
a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
21+
c = 2 * np.arcsin(np.sqrt(a))
22+
r = 6371 # Radius of Earth in kilometers
23+
return c * r
24+
25+
def find_nearest_weather_sensor(traffic_lat, traffic_lon, weather_df):
26+
distances = weather_df.apply(lambda row: haversine(traffic_lat, traffic_lon, row['lat'], row['long']), axis=1)
27+
return weather_df.iloc[distances.idxmin()]['detid']
28+
29+
# Map each traffic sensor to its nearest weather sensor
30+
traffic_sensors_df['nearest_weather_sensor'] = traffic_sensors_df.apply(
31+
lambda row: find_nearest_weather_sensor(row['lat'], row['long'], weather_sensors_df),
32+
axis=1
33+
)
34+
35+
# Dictionary mapping of traffic sensor to its nearest weather sensor
36+
sensor_to_weather_mapping = dict(zip(traffic_sensors_df['detid'], traffic_sensors_df['nearest_weather_sensor']))
37+
38+
def calculate_centroid(df):
39+
"""
40+
Calculate the centroid (geometric center) of a set of points.
41+
42+
Parameters:
43+
- df: DataFrame containing lat and long columns
44+
45+
Returns:
46+
- A tuple (centroid_lat, centroid_lon).
47+
"""
48+
centroid_lat = df['lat'].mean()
49+
centroid_lon = df['long'].mean()
50+
51+
return centroid_lat, centroid_lon
52+
53+
def generate_sensor_map(traffic_sensors_df, weather_sensors_df, sensor_to_weather_mapping):
54+
"""
55+
Generate a map showing the traffic sensors and weather sensors.
56+
57+
Parameters:
58+
- traffic_sensors_df: DataFrame containing traffic sensor locations.
59+
- weather_sensors_df: DataFrame containing weather sensor locations.
60+
- sensor_to_weather_mapping: Dictionary mapping traffic sensors to their nearest weather sensor.
61+
62+
Returns:
63+
- A folium map object.
64+
"""
65+
66+
centroid_lat, centroid_lon = calculate_centroid(traffic_sensors_df)
67+
68+
# Create a base map
69+
m = folium.Map(location=[centroid_lat, centroid_lon], zoom_start=12) # Centered around San Francisco
70+
71+
# Define distinct colors for weather sensors
72+
colors = [
73+
'red', 'blue', 'green', 'purple', 'orange', 'darkred',
74+
'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
75+
'darkpurple', 'pink', 'lightblue', 'lightgreen',
76+
'gray', 'black', 'lightgray'
77+
]
78+
79+
# Create a mapping from weather sensor to color
80+
weather_sensor_to_color = {detid: colors[i % len(colors)] for i, detid in enumerate(weather_sensors_df['detid'].unique())}
81+
82+
# Plot the weather sensors on the map
83+
for idx, row in weather_sensors_df.iterrows():
84+
folium.CircleMarker(
85+
location=(row['lat'], row['long']),
86+
radius=8,
87+
color=weather_sensor_to_color[row['detid']],
88+
fill=True,
89+
fill_color=weather_sensor_to_color[row['detid']]
90+
).add_to(m)
91+
92+
# Plot the traffic sensors on the map
93+
for idx, row in traffic_sensors_df.iterrows():
94+
nearest_weather_sensor = sensor_to_weather_mapping[row['detid']]
95+
folium.CircleMarker(
96+
location=(row['lat'], row['long']),
97+
radius=4,
98+
color=weather_sensor_to_color[nearest_weather_sensor],
99+
fill=True,
100+
fill_color=weather_sensor_to_color[nearest_weather_sensor]
101+
).add_to(m)
102+
103+
return m
104+
105+
# Generate the map using the function
106+
# Save the merged dataframe to a CSV file
107+
output_folder = os.path.join(OS_PATH, 'output/metr-la')
108+
os.makedirs(output_folder, exist_ok=True) # Create output folder if it doesn't exist
109+
110+
map_output = generate_sensor_map(traffic_sensors_df, weather_sensors_df, sensor_to_weather_mapping)
111+
map_output.save(os.path.join(output_folder, 'sensor_map.html'))
112+

spatial_integration_haversine_mapping.py

+46-28
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@
44

55
# Load the datasets (assuming they are in the same directory as the script)
66
OS_PATH = os.path.dirname(os.path.realpath('__file__'))
7-
weather_sensors_df = os.path.join(OS_PATH, 'data/pems-bay/sensors/pems_bay_sensors_weather.csv')
8-
traffic_sensors_df = os.path.join(OS_PATH, 'data/pems-bay/sensors/pems_bay_sensors_traffic.csv')
97

8+
weather_sensors_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/sensors/metr_la_sensors_weather.csv'))
9+
traffic_sensors_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/sensors/metr_la_sensors_traffic.csv'))
1010

11-
traffic_speed_df = os.path.join(OS_PATH, 'data/pems-bay/traffic/speed.csv')
12-
air_temp_df = os.path.join(OS_PATH, 'data/pems-bay/weather/air_temp_set_1_fahrenheit.csv')
13-
11+
traffic_speed_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/traffic/speed.csv'))
12+
air_temp_df = pd.read_csv(os.path.join(OS_PATH, 'data/metr-la/weather/air_temp_set_1_fahrenheit.csv'))
1413

1514
# Haversine formula to calculate the distance between two geographical points
1615
def haversine(lat1, lon1, lat2, lon2):
@@ -35,32 +34,51 @@ def find_nearest_weather_sensor(traffic_lat, traffic_lon, weather_df):
3534
# Dictionary mapping of traffic sensor to its nearest weather sensor
3635
sensor_to_weather_mapping = dict(zip(traffic_sensors_df['detid'], traffic_sensors_df['nearest_weather_sensor']))
3736

38-
# Merge weather data with traffic data in chunks
39-
chunk_size = 5000
37+
# Function to merge data based on timestamps
38+
def merge_data_on_timestamps(traffic_speed_df, air_temp_df, sensor_to_weather_mapping):
39+
# Initialize merged dataframe with DATETIMESTAMP column
40+
merged_df = pd.DataFrame()
41+
merged_df["DATETIMESTAMP"] = traffic_speed_df["DATETIMESTAMP"]
42+
43+
# Iterate through each sensor in the traffic_speed_df
44+
for sensor in traffic_speed_df.columns[1:]:
45+
# Copy the speed data
46+
merged_df[sensor] = traffic_speed_df[sensor]
47+
48+
# Find corresponding weather sensor
49+
weather_sensor = sensor_to_weather_mapping[int(sensor)]
50+
51+
# Find corresponding temperature data
52+
if f"{weather_sensor}" in air_temp_df.columns:
53+
merged_df[f"{sensor}_temp"] = air_temp_df[f"{weather_sensor}"].reindex_like(traffic_speed_df)
54+
55+
return merged_df
4056

41-
def merge_weather_with_traffic(traffic_data, weather_data, sensor_to_weather_mapping):
42-
merged_data = traffic_data[['DATETIMESTAMP']].copy()
43-
for sensor in traffic_data.columns:
44-
if sensor != "DATETIMESTAMP":
45-
nearest_weather_sensor = sensor_to_weather_mapping.get(sensor, None)
46-
if nearest_weather_sensor:
47-
weather_chunk = weather_data[['Date_Time', nearest_weather_sensor]].rename(columns={nearest_weather_sensor: sensor})
48-
merged_data = merged_data.merge(weather_chunk, left_on="DATETIMESTAMP", right_on="Date_Time", how="left").drop(columns="Date_Time")
49-
return merged_data
57+
# Reorder columns function
58+
def reorder_columns(merged_df):
59+
# Create a list for the reordered columns
60+
speed_cols = [col for col in merged_df.columns if '_temp' not in col]
61+
temp_cols = [col for col in merged_df.columns if '_temp' in col]
62+
# Reorder the columns
63+
final_order = speed_cols + temp_cols
64+
merged_df = merged_df[final_order]
65+
return merged_df
5066

51-
# Example of merging air temperature data with traffic data in chunks
52-
merged_chunks = []
53-
for start_row in range(0, traffic_speed_df.shape[0], chunk_size):
54-
end_row = start_row + chunk_size
55-
traffic_chunk = traffic_speed_df.iloc[start_row:end_row]
56-
merged_chunk = merge_weather_with_traffic(traffic_chunk, air_temp_df, sensor_to_weather_mapping)
57-
merged_chunks.append(merged_chunk)
67+
# Rename speed columns with "_speed" suffix
68+
def rename_speed_columns(merged_df):
69+
speed_cols = [col for col in merged_df.columns if '_temp' not in col and col != "DATETIMESTAMP"]
70+
speed_columns_renamed = {col: f"{col}_speed" for col in speed_cols}
71+
merged_df.rename(columns=speed_columns_renamed, inplace=True)
72+
return merged_df
5873

74+
# Merge, reorder, rename and save the dataframe
75+
merged_df = merge_data_on_timestamps(traffic_speed_df, air_temp_df, sensor_to_weather_mapping)
76+
merged_df = reorder_columns(merged_df)
77+
merged_df = rename_speed_columns(merged_df)
5978

60-
final_merged_df = pd.concat(merged_chunks, axis=0)
6179

62-
# Define output path and save the final merged dataframe
63-
output_folder = os.path.join(OS_PATH, 'output')
80+
# Save the merged dataframe to a CSV file
81+
output_folder = os.path.join(OS_PATH, 'output/metr-la')
6482
os.makedirs(output_folder, exist_ok=True) # Create output folder if it doesn't exist
65-
output_file_path = os.path.join(output_folder, 'merged_traffic_weather_data.csv')
66-
final_merged_df.to_csv(output_file_path, index=False)
83+
merged_file_path = os.path.join(output_folder, 'merged_speed_traffic_and_air_temperature_data.csv')
84+
merged_df.to_csv(merged_file_path, index=False)

0 commit comments

Comments
 (0)