Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add visualization graphs to eval #102

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions quartz_solar_forecast/eval/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import re

def visualize_results(results_df):
'''
Visualize Eval Results

results_df dataframe with the following columns
- timestamp
- pv_id
- horizon_hours
- forecast_power
- generation_power
'''

# Ensure results folder exists
if not os.path.exists('results'):
os.makedirs('results')

# Prediction vs Actual values for each PV ID
pv_ids = results_df['pv_id'].unique()
for pv_id in pv_ids:
df_subset = results_df[results_df['pv_id'] == pv_id]
num_plots = len(df_subset) // 48 # Find out how many full 48-hour plots we can make

for i in range(num_plots):
# Extract data for the ith 48-hour segment
df_segment = df_subset.iloc[i*48:(i+1)*48].copy()

start_timestamp = df_segment.iloc[0]['timestamp']
start_timestamp_str = re.sub(r"[\s\-:]", "_", start_timestamp)
df_segment['timestamp'] = pd.to_datetime(df_segment['timestamp'])

plt.figure(figsize=(10, 6))
sns.lineplot(data=df_segment, x='timestamp', y='generation_power', label='Actual', marker='o')
sns.lineplot(data=df_segment, x='timestamp', y='forecast_power', label='Predicted', marker='o')

# Setting the x-ticks and labels
ax = plt.gca() # Get the current Axes instance
x_ticks = pd.date_range(start=df_segment['timestamp'].iloc[0], periods=len(df_segment), freq='h')
ax.set_xticks(x_ticks) # Set x-ticks to every hour

# Format the x-tick labels to show every 5 hours
ax.xaxis.set_major_locator(mdates.HourLocator(interval=5)) # Show label every 5 hours
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) # Format the datetime
ax.xaxis.set_minor_locator(mdates.HourLocator()) # Keep a tick for every hour

# Plot title, labels, legend, and tight layout
plt.title(f'48HRs Predicted vs. Actual Solar Power Output for {pv_id} (Starting from {start_timestamp})')
plt.xlabel('Timestamp')
plt.ylabel('Power Output (kW)')
plt.legend()
plt.tight_layout()

# Save the figure
plt.savefig(f'results/pred_vs_actual_{pv_id}_{start_timestamp_str}.png')
plt.close()

# Distribution of Errors across all data points
results_df['error'] = results_df['forecast_power'] - results_df['generation_power']
plt.figure(figsize=(10, 6))
sns.histplot(results_df['error'], kde=True, bins=30)
plt.title('Distribution of Prediction Errors')
plt.xlabel('Error (kW)')
plt.ylabel('Frequency')
plt.savefig('results/error_distribution.png')
plt.close()
9 changes: 5 additions & 4 deletions quartz_solar_forecast/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from quartz_solar_forecast.eval.nwp import get_nwp
from quartz_solar_forecast.eval.pv import get_pv_metadata, get_pv_truth
from quartz_solar_forecast.eval.utils import combine_forecast_ground_truth

from quartz_solar_forecast.eval.visualize import visualize_results
from dotenv import load_dotenv

load_dotenv()
Expand All @@ -33,7 +33,7 @@
)


def run_eval(testset_path: str = "dataset/testset.csv"):
def run_eval(testset_path: str = "dataset/testset.csv", visualize: bool = False):

# load testset from csv
testset = pd.read_csv(testset_path)
Expand Down Expand Up @@ -61,7 +61,8 @@ def run_eval(testset_path: str = "dataset/testset.csv"):
metrics(results_df, pv_metadata, include_night=False)

# Visualizations
# TODO

if visualize:
print("Creating visualizations")
visualize_results(results_df)

# run_eval()
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ xarray==2022.12.0
pv-site-prediction==0.1.17
pydantic==2.6.2
huggingface_hub== 0.17.3 # only for evaluation
python-dotenv==1.0.1
python-dotenv==1.0.1
seaborn==0.13.2 # only for evaluation visualization
matplotlib==3.8.3 # only for evaluation visualization
Loading