From 1cb45b60bbac9d17c42f7b8f2a1e7fc95b305a70 Mon Sep 17 00:00:00 2001 From: Jason Feng Date: Tue, 2 Apr 2024 21:26:32 -0700 Subject: [PATCH 1/3] Add visualizations to eval --- quartz_solar_forecast/eval/visualize.py | 71 +++++++++++++++++++++++++ quartz_solar_forecast/evaluation.py | 9 ++-- 2 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 quartz_solar_forecast/eval/visualize.py diff --git a/quartz_solar_forecast/eval/visualize.py b/quartz_solar_forecast/eval/visualize.py new file mode 100644 index 00000000..ba3f9377 --- /dev/null +++ b/quartz_solar_forecast/eval/visualize.py @@ -0,0 +1,71 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import matplotlib.dates as mdates +import re + +def visualize_results(results_df): + ''' + Visualize Eval Results + + results_df dataframe with the following columns + - timestamp + - pv_id + - horizon_hours + - forecast_power + - generation_power + ''' + + # Ensure results folder exists + if not os.path.exists('results'): + os.makedirs('results') + + # Prediction vs Actual values for each PV ID + pv_ids = results_df['pv_id'].unique() + for pv_id in pv_ids: + df_subset = results_df[results_df['pv_id'] == pv_id] + num_plots = len(df_subset) // 48 # Find out how many full 48-hour plots we can make + + for i in range(num_plots): + # Extract data for the ith 48-hour segment + df_segment = df_subset.iloc[i*48:(i+1)*48].copy() + + start_timestamp = df_segment.iloc[0]['timestamp'] + start_timestamp_str = re.sub(r"[\s\-:]", "_", start_timestamp) + df_segment['timestamp'] = pd.to_datetime(df_segment['timestamp']) + + plt.figure(figsize=(10, 6)) + sns.lineplot(data=df_segment, x='timestamp', y='generation_power', label='Actual', marker='o') + sns.lineplot(data=df_segment, x='timestamp', y='forecast_power', label='Predicted', marker='o') + + # Setting the x-ticks and labels + ax = plt.gca() # Get the current Axes instance + x_ticks = pd.date_range(start=df_segment['timestamp'].iloc[0], periods=len(df_segment), freq='h') + ax.set_xticks(x_ticks) # Set x-ticks to every hour + + # Format the x-tick labels to show every 5 hours + ax.xaxis.set_major_locator(mdates.HourLocator(interval=5)) # Show label every 5 hours + ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) # Format the datetime + ax.xaxis.set_minor_locator(mdates.HourLocator()) # Keep a tick for every hour + + # Plot title, labels, legend, and tight layout + plt.title(f'48HRs Predicted vs. Actual Solar Power Output for {pv_id} (Starting from {start_timestamp})') + plt.xlabel('Timestamp') + plt.ylabel('Power Output (kW)') + plt.legend() + plt.tight_layout() + + # Save the figure + plt.savefig(f'results/pred_vs_actual_{pv_id}_{start_timestamp_str}.png') + plt.close() + + # Distribution of Errors across all data points + results_df['error'] = results_df['forecast_power'] - results_df['generation_power'] + plt.figure(figsize=(10, 6)) + sns.histplot(results_df['error'], kde=True, bins=30) + plt.title('Distribution of Prediction Errors') + plt.xlabel('Error (kW)') + plt.ylabel('Frequency') + plt.savefig('results/error_distribution.png') + plt.close() \ No newline at end of file diff --git a/quartz_solar_forecast/evaluation.py b/quartz_solar_forecast/evaluation.py index 4d95fe6a..e207f614 100644 --- a/quartz_solar_forecast/evaluation.py +++ b/quartz_solar_forecast/evaluation.py @@ -16,7 +16,7 @@ from quartz_solar_forecast.eval.nwp import get_nwp from quartz_solar_forecast.eval.pv import get_pv_metadata, get_pv_truth from quartz_solar_forecast.eval.utils import combine_forecast_ground_truth - +from quartz_solar_forecast.eval.visualize import visualize_results from dotenv import load_dotenv load_dotenv() @@ -33,7 +33,7 @@ ) -def run_eval(testset_path: str = "dataset/testset.csv"): +def run_eval(testset_path: str = "../dataset/testset.csv", visualize: bool = False): # load testset from csv testset = pd.read_csv(testset_path) @@ -61,7 +61,8 @@ def run_eval(testset_path: str = "dataset/testset.csv"): metrics(results_df, pv_metadata, include_night=False) # Visualizations - # TODO - + if visualize: + print("Creating visualizations") + visualize_results(results_df) # run_eval() From 568a275dae758af2ce8e10e072ed2664f6c1601c Mon Sep 17 00:00:00 2001 From: Jason Feng Date: Tue, 2 Apr 2024 21:29:27 -0700 Subject: [PATCH 2/3] Add visualizations to eval --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 52a57886..7cc9bf35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,6 @@ xarray==2022.12.0 pv-site-prediction==0.1.17 pydantic==2.6.2 huggingface_hub== 0.17.3 # only for evaluation -python-dotenv==1.0.1 \ No newline at end of file +python-dotenv==1.0.1 +seaborn==0.13.2 # only for evaluation visualization +matplotlib==3.8.3 # only for evaluation visualization \ No newline at end of file From 3d9de9e3b66983d41ecfeea8f2e48c19c2c2cc5e Mon Sep 17 00:00:00 2001 From: Jason Feng Date: Tue, 2 Apr 2024 21:31:01 -0700 Subject: [PATCH 3/3] fix path --- quartz_solar_forecast/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quartz_solar_forecast/evaluation.py b/quartz_solar_forecast/evaluation.py index e207f614..cc2e0a02 100644 --- a/quartz_solar_forecast/evaluation.py +++ b/quartz_solar_forecast/evaluation.py @@ -33,7 +33,7 @@ ) -def run_eval(testset_path: str = "../dataset/testset.csv", visualize: bool = False): +def run_eval(testset_path: str = "dataset/testset.csv", visualize: bool = False): # load testset from csv testset = pd.read_csv(testset_path)