From 43a38ca18b4f1739115e2b418f5b36a21ed66e28 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 28 Jun 2024 16:23:25 +0100 Subject: [PATCH 01/16] add visulization script --- ocf_datapipes/batch/visualise.py | 154 +++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 ocf_datapipes/batch/visualise.py diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py new file mode 100644 index 000000000..5e10e3625 --- /dev/null +++ b/ocf_datapipes/batch/visualise.py @@ -0,0 +1,154 @@ +""" The idea is visualize one of the batches + +This is a bit of a working progress, but the idea is to visualize the batch in a markdown file. +""" +import pandas as pd +import sys + +from ocf_datapipes.batch import NumpyBatch, BatchKey, NWPBatchKey +import torch +import plotly.graph_objects as go + + +def visualize_batch(batch: NumpyBatch, example_id: int = 0): + + # Wind + print("# Batch visualization") + + print(f"We are looking at example {example_id}") + + print("## Wind \n") + keys = [ + BatchKey.wind, + BatchKey.wind_t0_idx, + BatchKey.wind_time_utc, + BatchKey.wind_id, + BatchKey.wind_observed_capacity_mwp, + BatchKey.wind_nominal_capacity_mwp, + BatchKey.wind_time_utc, + BatchKey.wind_latitude, + BatchKey.wind_longitude, + BatchKey.wind_solar_azimuth, + BatchKey.wind_solar_elevation, + ] + for key in keys: + if key in batch.keys(): + print("\n") + value = batch[key] + if isinstance(value, torch.Tensor): + print(f"{key} {value.shape=}") + print(f"Max {value.max()}") + print(f"Min {value.min()}") + elif isinstance(value, int): + print(f"{key} {value}") + else: + print(f"{key} {value}") + + print("## GSP \n") + keys = [ + BatchKey.gsp, + BatchKey.gsp_id, + BatchKey.gsp_time_utc, + BatchKey.gsp_time_utc_fourier, + BatchKey.gsp_x_osgb, + BatchKey.gsp_x_osgb_fourier, + BatchKey.gsp_y_osgb, + BatchKey.gsp_y_osgb_fourier, + BatchKey.gsp_t0_idx, + BatchKey.gsp_effective_capacity_mwp, + BatchKey.gsp_nominal_capacity_mwp, + BatchKey.gsp_solar_azimuth, + BatchKey.gsp_solar_elevation, + ] + for key in keys: + if key in batch.keys(): + print("\n") + print(f"### {key.name}") + value = batch[key] + if isinstance(value, torch.Tensor): + print(f"shape {value.shape=}") + print(f"Max {value.max():.2f}") + print(f"Min {value.min():.2f}") + elif isinstance(value, int): + print(f"{value}") + else: + print(f"{value}") + + # NWP + print("## NWP \n") + + keys = [ + NWPBatchKey.nwp, + NWPBatchKey.nwp_target_time_utc, + NWPBatchKey.nwp_channel_names, + NWPBatchKey.nwp_step, + NWPBatchKey.nwp_t0_idx, + NWPBatchKey.nwp_init_time_utc, + ] + + nwp = batch[BatchKey.nwp] + + nwp_providers = nwp.keys() + for provider in nwp_providers: + print("\n") + print(f"### Provider {provider}") + nwp_provider = nwp[provider] + + # plot nwp main data + nwp_data = nwp_provider[NWPBatchKey.nwp] + # average of lat and lon + nwp_data = nwp_data.mean(dim=(3, 4)) + fig = go.Figure() + for i in range(len(nwp_provider[NWPBatchKey.nwp_channel_names])): + channel = nwp_provider[NWPBatchKey.nwp_channel_names][i] + nwp_data_one_channel = nwp_data[example_id, :, i] + time = nwp_provider[NWPBatchKey.nwp_target_time_utc][example_id] + time = pd.to_datetime(time, unit="s") + fig.add_trace(go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel)) + + fig.update_layout(title=f"{provider} NWP", xaxis_title="Time", yaxis_title="Value") + # fig.show(renderer='browser') + name = f"{provider}_nwp.png" + fig.write_image(name) + print(f"![]({name})") + print("\n") + + for key in keys: + print("\n") + print(f"#### {key.name}") + value = nwp_provider[key] + + if "time" in key.name: + value = pd.to_datetime(value[example_id], unit="s") + print(f"Shape={value.shape}") + print(f"Max {value.max()}") + print(f"Min {value.min()}") + + elif "channel" in key.name: + + # create a table with the channel names with max, min, mean and std + print("| Channel | Max | Min | Mean | Std |") + print("| --- | --- | --- | --- | --- |") + for i in range(len(value)): + channel = value[i] + data = nwp_data[:, :, i] + print( + f"| {channel} | {data.max().item():.2f} | {data.min().item():.2f} | {data.mean().item():.2f} | {data.std().item():.2f} |" + ) + + print(f"Shape={value.shape}") + + elif isinstance(value, torch.Tensor): + print(f"Shape {value.shape=}") + print(f"Max {value.max():.2f}") + print(f"Min {value.min():.2f}") + elif isinstance(value, int): + print(f"{value}") + else: + print(f"{value}") + +# For example you can run it like this +# with open("batch.md", "w") as f: +# sys.stdout = f +# d = torch.load("000000.pt") +# visualize_batch(d, example_id=3) From ebc50594d00b2c623bb9121c350bdf367e84d88a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:27:45 +0000 Subject: [PATCH 02/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/batch/visualise.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 5e10e3625..d1ee597db 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -2,12 +2,12 @@ This is a bit of a working progress, but the idea is to visualize the batch in a markdown file. """ -import pandas as pd -import sys -from ocf_datapipes.batch import NumpyBatch, BatchKey, NWPBatchKey -import torch +import pandas as pd import plotly.graph_objects as go +import torch + +from ocf_datapipes.batch import BatchKey, NumpyBatch, NWPBatchKey def visualize_batch(batch: NumpyBatch, example_id: int = 0): @@ -147,6 +147,7 @@ def visualize_batch(batch: NumpyBatch, example_id: int = 0): else: print(f"{value}") + # For example you can run it like this # with open("batch.md", "w") as f: # sys.stdout = f From 8d922c524116540e82d99755e03973d715122829 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 28 Jun 2024 16:46:09 +0100 Subject: [PATCH 03/16] upgrade to plot all nwp examples, and make table of times --- ocf_datapipes/batch/visualise.py | 47 ++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 5e10e3625..d52790cd7 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -10,13 +10,11 @@ import plotly.graph_objects as go -def visualize_batch(batch: NumpyBatch, example_id: int = 0): +def visualize_batch(batch: NumpyBatch): # Wind print("# Batch visualization") - print(f"We are looking at example {example_id}") - print("## Wind \n") keys = [ BatchKey.wind, @@ -98,20 +96,23 @@ def visualize_batch(batch: NumpyBatch, example_id: int = 0): nwp_data = nwp_provider[NWPBatchKey.nwp] # average of lat and lon nwp_data = nwp_data.mean(dim=(3, 4)) - fig = go.Figure() - for i in range(len(nwp_provider[NWPBatchKey.nwp_channel_names])): - channel = nwp_provider[NWPBatchKey.nwp_channel_names][i] - nwp_data_one_channel = nwp_data[example_id, :, i] - time = nwp_provider[NWPBatchKey.nwp_target_time_utc][example_id] - time = pd.to_datetime(time, unit="s") - fig.add_trace(go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel)) - - fig.update_layout(title=f"{provider} NWP", xaxis_title="Time", yaxis_title="Value") - # fig.show(renderer='browser') - name = f"{provider}_nwp.png" - fig.write_image(name) - print(f"![]({name})") - print("\n") + + for b in range(nwp_data.shape[0]): + + fig = go.Figure() + for i in range(len(nwp_provider[NWPBatchKey.nwp_channel_names])): + channel = nwp_provider[NWPBatchKey.nwp_channel_names][i] + nwp_data_one_channel = nwp_data[b, :, i] + time = nwp_provider[NWPBatchKey.nwp_target_time_utc][b] + time = pd.to_datetime(time, unit="s") + fig.add_trace(go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel)) + + fig.update_layout(title=f"{provider} NWP - example {b}", xaxis_title="Time", yaxis_title="Value") + # fig.show(renderer='browser') + name = f"{provider}_nwp_{b}.png" + fig.write_image(name) + print(f"![]({name})") + print("\n") for key in keys: print("\n") @@ -119,10 +120,14 @@ def visualize_batch(batch: NumpyBatch, example_id: int = 0): value = nwp_provider[key] if "time" in key.name: - value = pd.to_datetime(value[example_id], unit="s") - print(f"Shape={value.shape}") - print(f"Max {value.max()}") - print(f"Min {value.min()}") + + # make a table with example, shape, max, min + print("| Example | Shape | Max | Min |") + print("| --- | --- | --- | --- |") + + for example_id in range(value.shape[0]): + value_ts = pd.to_datetime(value[example_id], unit="s") + print(f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |") elif "channel" in key.name: From 2a7dde452d3aeb666eceb5e98e7520c8a2b2b414 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:47:50 +0000 Subject: [PATCH 04/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/batch/visualise.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 201288530..5d97ff229 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -105,9 +105,13 @@ def visualize_batch(batch: NumpyBatch): nwp_data_one_channel = nwp_data[b, :, i] time = nwp_provider[NWPBatchKey.nwp_target_time_utc][b] time = pd.to_datetime(time, unit="s") - fig.add_trace(go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel)) + fig.add_trace( + go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel) + ) - fig.update_layout(title=f"{provider} NWP - example {b}", xaxis_title="Time", yaxis_title="Value") + fig.update_layout( + title=f"{provider} NWP - example {b}", xaxis_title="Time", yaxis_title="Value" + ) # fig.show(renderer='browser') name = f"{provider}_nwp_{b}.png" fig.write_image(name) @@ -127,7 +131,9 @@ def visualize_batch(batch: NumpyBatch): for example_id in range(value.shape[0]): value_ts = pd.to_datetime(value[example_id], unit="s") - print(f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |") + print( + f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |" + ) elif "channel" in key.name: From e86104e49913739997c991ebc67af1afbb01aae1 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 28 Jun 2024 16:56:51 +0100 Subject: [PATCH 05/16] plot GSP --- ocf_datapipes/batch/visualise.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 201288530..11f9eb06f 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -63,7 +63,21 @@ def visualize_batch(batch: NumpyBatch): print("\n") print(f"### {key.name}") value = batch[key] - if isinstance(value, torch.Tensor): + if key.name == 'gsp': + # plot gsp data + for b in range(value.shape[0]): + fig = go.Figure() + gsp_data = value[b,:,0] + time = pd.to_datetime(batch[BatchKey.gsp_time_utc][b], unit='s') + fig.add_trace(go.Scatter(x=time, y=gsp_data, mode="lines", name=f"GSP")) + fig.update_layout(title=f"GSP - example {b}", xaxis_title="Time", yaxis_title="Value") + # fig.show(renderer='browser') + name = f"gsp_{b}.png" + fig.write_image(name) + print(f"![]({name})") + print("\n") + + elif isinstance(value, torch.Tensor): print(f"shape {value.shape=}") print(f"Max {value.max():.2f}") print(f"Min {value.min():.2f}") @@ -72,6 +86,8 @@ def visualize_batch(batch: NumpyBatch): else: print(f"{value}") + # TODO plot solar azimuth and elevation + # NWP print("## NWP \n") @@ -157,4 +173,4 @@ def visualize_batch(batch: NumpyBatch): # with open("batch.md", "w") as f: # sys.stdout = f # d = torch.load("000000.pt") -# visualize_batch(d, example_id=3) +# visualize_batch(d) From 5a726507d84ac007e5308a86243f4b3b619f46a9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:57:29 +0000 Subject: [PATCH 06/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/batch/visualise.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 50c77bbe1..22f59312d 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -63,14 +63,16 @@ def visualize_batch(batch: NumpyBatch): print("\n") print(f"### {key.name}") value = batch[key] - if key.name == 'gsp': + if key.name == "gsp": # plot gsp data for b in range(value.shape[0]): fig = go.Figure() - gsp_data = value[b,:,0] - time = pd.to_datetime(batch[BatchKey.gsp_time_utc][b], unit='s') - fig.add_trace(go.Scatter(x=time, y=gsp_data, mode="lines", name=f"GSP")) - fig.update_layout(title=f"GSP - example {b}", xaxis_title="Time", yaxis_title="Value") + gsp_data = value[b, :, 0] + time = pd.to_datetime(batch[BatchKey.gsp_time_utc][b], unit="s") + fig.add_trace(go.Scatter(x=time, y=gsp_data, mode="lines", name="GSP")) + fig.update_layout( + title=f"GSP - example {b}", xaxis_title="Time", yaxis_title="Value" + ) # fig.show(renderer='browser') name = f"gsp_{b}.png" fig.write_image(name) From 20e5df2ff1ad3fca29b299a706a8dbfe8316daac Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 28 Jun 2024 17:09:46 +0100 Subject: [PATCH 07/16] lint --- ocf_datapipes/batch/visualise.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 22f59312d..a6e7857c6 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -11,6 +11,7 @@ def visualize_batch(batch: NumpyBatch): + """Visualize the batch in a markdown file""" # Wind print("# Batch visualization") @@ -162,7 +163,11 @@ def visualize_batch(batch: NumpyBatch): channel = value[i] data = nwp_data[:, :, i] print( - f"| {channel} | {data.max().item():.2f} | {data.min().item():.2f} | {data.mean().item():.2f} | {data.std().item():.2f} |" + f"| {channel} " + f"| {data.max().item():.2f} " + f"| {data.min().item():.2f} " + f"| {data.mean().item():.2f} " + f"| {data.std().item():.2f} |" ) print(f"Shape={value.shape}") From cae02db21be3c2aa44c99e6b968bd096822a5051 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 28 Jun 2024 17:22:29 +0100 Subject: [PATCH 08/16] add satellite visualization --- ocf_datapipes/batch/visualise.py | 84 ++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index a6e7857c6..120276193 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -181,6 +181,90 @@ def visualize_batch(batch: NumpyBatch): else: print(f"{value}") + # Satellite + print("## Satellite \n") + keys = [ + BatchKey.satellite_actual, + BatchKey.satellite_t0_idx, + BatchKey.satellite_time_utc, + BatchKey.satellite_time_utc, + BatchKey.satellite_x_geostationary, + BatchKey.satellite_y_geostationary, + ] + + for key in keys: + + print("\n") + print(f"#### {key.name}") + value = batch[key] + + if "satellite_actual" in key.name: + + print(value.shape) + + # average of lat and lon + value = value.mean(dim=(3, 4)) + + for b in range(value.shape[0]): + + fig = go.Figure() + for i in range(value.shape[2]): + satellite_data_one_channel = value[b, :, i] + time = batch[BatchKey.satellite_time_utc][b] + time = pd.to_datetime(time, unit="s") + fig.add_trace( + go.Scatter(x=time, y=satellite_data_one_channel, mode="lines") + ) + + fig.update_layout( + title=f"Satellite - example {b}", xaxis_title="Time", yaxis_title="Value" + ) + # fig.show(renderer='browser') + name = f"satellite_{b}.png" + fig.write_image(name) + print(f"![]({name})") + print("\n") + + elif "time" in key.name: + + # make a table with example, shape, max, min + print("| Example | Shape | Max | Min |") + print("| --- | --- | --- | --- |") + + for example_id in range(value.shape[0]): + value_ts = pd.to_datetime(value[example_id], unit="s") + print( + f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |" + ) + + elif "channel" in key.name: + + # create a table with the channel names with max, min, mean and std + print("| Channel | Max | Min | Mean | Std |") + print("| --- | --- | --- | --- | --- |") + for i in range(len(value)): + channel = value[i] + data = nwp_data[:, :, i] + print( + f"| {channel} " + f"| {data.max().item():.2f} " + f"| {data.min().item():.2f} " + f"| {data.mean().item():.2f} " + f"| {data.std().item():.2f} |" + ) + + print(f"Shape={value.shape}") + + elif isinstance(value, torch.Tensor): + print(f"Shape {value.shape=}") + print(f"Max {value.max():.2f}") + print(f"Min {value.min():.2f}") + elif isinstance(value, int): + print(f"{value}") + else: + print(f"{value}") + + # For example you can run it like this # with open("batch.md", "w") as f: From e708d590467f69e180d3153239a05e1f8963c98b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Jun 2024 16:23:11 +0000 Subject: [PATCH 09/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/batch/visualise.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 120276193..63a8aae49 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -212,9 +212,7 @@ def visualize_batch(batch: NumpyBatch): satellite_data_one_channel = value[b, :, i] time = batch[BatchKey.satellite_time_utc][b] time = pd.to_datetime(time, unit="s") - fig.add_trace( - go.Scatter(x=time, y=satellite_data_one_channel, mode="lines") - ) + fig.add_trace(go.Scatter(x=time, y=satellite_data_one_channel, mode="lines")) fig.update_layout( title=f"Satellite - example {b}", xaxis_title="Time", yaxis_title="Value" @@ -233,9 +231,7 @@ def visualize_batch(batch: NumpyBatch): for example_id in range(value.shape[0]): value_ts = pd.to_datetime(value[example_id], unit="s") - print( - f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |" - ) + print(f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |") elif "channel" in key.name: @@ -265,7 +261,6 @@ def visualize_batch(batch: NumpyBatch): print(f"{value}") - # For example you can run it like this # with open("batch.md", "w") as f: # sys.stdout = f From caa8e12d430cab771e2e0715265cfd9054486462 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 4 Sep 2024 11:34:05 +0100 Subject: [PATCH 10/16] upgrade to visualization - add limit examples - add dir - prints straight to report --- ocf_datapipes/batch/visualise.py | 461 ++++++++++++++++--------------- 1 file changed, 244 insertions(+), 217 deletions(-) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 63a8aae49..578f8e7a3 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -3,6 +3,7 @@ This is a bit of a working progress, but the idea is to visualize the batch in a markdown file. """ +import os import pandas as pd import plotly.graph_objects as go import torch @@ -10,155 +11,259 @@ from ocf_datapipes.batch import BatchKey, NumpyBatch, NWPBatchKey -def visualize_batch(batch: NumpyBatch): +def visualize_batch(batch: NumpyBatch, folder=".", output_file="report.md", limit_examples=None): """Visualize the batch in a markdown file""" - # Wind - print("# Batch visualization") - - print("## Wind \n") - keys = [ - BatchKey.wind, - BatchKey.wind_t0_idx, - BatchKey.wind_time_utc, - BatchKey.wind_id, - BatchKey.wind_observed_capacity_mwp, - BatchKey.wind_nominal_capacity_mwp, - BatchKey.wind_time_utc, - BatchKey.wind_latitude, - BatchKey.wind_longitude, - BatchKey.wind_solar_azimuth, - BatchKey.wind_solar_elevation, - ] - for key in keys: - if key in batch.keys(): - print("\n") - value = batch[key] - if isinstance(value, torch.Tensor): - print(f"{key} {value.shape=}") - print(f"Max {value.max()}") - print(f"Min {value.min()}") - elif isinstance(value, int): - print(f"{key} {value}") - else: - print(f"{key} {value}") - - print("## GSP \n") - keys = [ - BatchKey.gsp, - BatchKey.gsp_id, - BatchKey.gsp_time_utc, - BatchKey.gsp_time_utc_fourier, - BatchKey.gsp_x_osgb, - BatchKey.gsp_x_osgb_fourier, - BatchKey.gsp_y_osgb, - BatchKey.gsp_y_osgb_fourier, - BatchKey.gsp_t0_idx, - BatchKey.gsp_effective_capacity_mwp, - BatchKey.gsp_nominal_capacity_mwp, - BatchKey.gsp_solar_azimuth, - BatchKey.gsp_solar_elevation, - ] - for key in keys: - if key in batch.keys(): - print("\n") - print(f"### {key.name}") - value = batch[key] - if key.name == "gsp": - # plot gsp data - for b in range(value.shape[0]): - fig = go.Figure() - gsp_data = value[b, :, 0] - time = pd.to_datetime(batch[BatchKey.gsp_time_utc][b], unit="s") - fig.add_trace(go.Scatter(x=time, y=gsp_data, mode="lines", name="GSP")) - fig.update_layout( - title=f"GSP - example {b}", xaxis_title="Time", yaxis_title="Value" + # create dir if it does not exist + for d in [folder, f"{folder}/gsp", f"{folder}/nwp", f"{folder}/satellite"]: + if not os.path.exists(d): + os.makedirs(d) + + with open(f"{folder}/{output_file}", "a") as f: + # Wind + print("# Batch visualization", file=f) + + print("## Wind \n", file=f) + keys = [ + BatchKey.wind, + BatchKey.wind_t0_idx, + BatchKey.wind_time_utc, + BatchKey.wind_id, + BatchKey.wind_observed_capacity_mwp, + BatchKey.wind_nominal_capacity_mwp, + BatchKey.wind_time_utc, + BatchKey.wind_latitude, + BatchKey.wind_longitude, + BatchKey.wind_solar_azimuth, + BatchKey.wind_solar_elevation, + ] + for key in keys: + if key in batch.keys(): + print("\n", file=f) + value = batch[key] + if isinstance(value, torch.Tensor): + print(f"{key} {value.shape=}", file=f) + print(f"Max {value.max()}", file=f) + print(f"Min {value.min()}", file=f) + elif isinstance(value, int): + print(f"{key} {value}", file=f) + else: + print(f"{key} {value}", file=f) + + print("## GSP \n", file=f) + keys = [ + BatchKey.gsp, + BatchKey.gsp_id, + BatchKey.gsp_time_utc, + BatchKey.gsp_time_utc_fourier, + BatchKey.gsp_x_osgb, + BatchKey.gsp_x_osgb_fourier, + BatchKey.gsp_y_osgb, + BatchKey.gsp_y_osgb_fourier, + BatchKey.gsp_t0_idx, + BatchKey.gsp_effective_capacity_mwp, + BatchKey.gsp_nominal_capacity_mwp, + BatchKey.gsp_solar_azimuth, + BatchKey.gsp_solar_elevation, + ] + for key in keys: + if key in batch.keys(): + print("\n", file=f) + print(f"### {key.name}", file=f) + value = batch[key] + if key.name == "gsp": + # plot gsp data + n_examples = value.shape[0] + if limit_examples is not None: + n_examples = min(n_examples, limit_examples) + + for b in range(n_examples): + fig = go.Figure() + gsp_data = value[b, :, 0] + time = pd.to_datetime(batch[BatchKey.gsp_time_utc][b], unit="s") + fig.add_trace(go.Scatter(x=time, y=gsp_data, mode="lines", name="GSP")) + fig.update_layout( + title=f"GSP - example {b}", xaxis_title="Time", yaxis_title="Value" + ) + # fig.show(renderer='browser') + name = f"gsp/gsp_{b}.png" + fig.write_image(f"{folder}/{name}") + print(f"![](./{name})", file=f) + print("\n", file=f) + + elif isinstance(value, torch.Tensor): + print(f"shape {value.shape=}", file=f) + print(f"Max {value.max():.2f}", file=f) + print(f"Min {value.min():.2f}", file=f) + elif isinstance(value, int): + print(f"{value}", file=f) + else: + print(f"{value}", file=f) + + # TODO plot solar azimuth and elevation + + # NWP + print("## NWP \n", file=f) + + keys = [ + NWPBatchKey.nwp, + NWPBatchKey.nwp_target_time_utc, + NWPBatchKey.nwp_channel_names, + NWPBatchKey.nwp_step, + NWPBatchKey.nwp_t0_idx, + NWPBatchKey.nwp_init_time_utc, + ] + + nwp = batch[BatchKey.nwp] + + nwp_providers = nwp.keys() + for provider in nwp_providers: + print("\n", file=f) + print(f"### Provider {provider}", file=f) + nwp_provider = nwp[provider] + + # plot nwp main data + nwp_data = nwp_provider[NWPBatchKey.nwp] + # average of lat and lon + nwp_data = nwp_data.mean(dim=(3, 4)) + + n_examples = nwp_data.shape[0] + if limit_examples is not None: + n_examples = min(n_examples, limit_examples) + + for b in range(n_examples): + + fig = go.Figure() + for i in range(len(nwp_provider[NWPBatchKey.nwp_channel_names])): + channel = nwp_provider[NWPBatchKey.nwp_channel_names][i] + nwp_data_one_channel = nwp_data[b, :, i] + time = nwp_provider[NWPBatchKey.nwp_target_time_utc][b] + time = pd.to_datetime(time, unit="s") + fig.add_trace( + go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel) ) - # fig.show(renderer='browser') - name = f"gsp_{b}.png" - fig.write_image(name) - print(f"![]({name})") - print("\n") - elif isinstance(value, torch.Tensor): - print(f"shape {value.shape=}") - print(f"Max {value.max():.2f}") - print(f"Min {value.min():.2f}") - elif isinstance(value, int): - print(f"{value}") - else: - print(f"{value}") - - # TODO plot solar azimuth and elevation - - # NWP - print("## NWP \n") - - keys = [ - NWPBatchKey.nwp, - NWPBatchKey.nwp_target_time_utc, - NWPBatchKey.nwp_channel_names, - NWPBatchKey.nwp_step, - NWPBatchKey.nwp_t0_idx, - NWPBatchKey.nwp_init_time_utc, - ] - - nwp = batch[BatchKey.nwp] - - nwp_providers = nwp.keys() - for provider in nwp_providers: - print("\n") - print(f"### Provider {provider}") - nwp_provider = nwp[provider] - - # plot nwp main data - nwp_data = nwp_provider[NWPBatchKey.nwp] - # average of lat and lon - nwp_data = nwp_data.mean(dim=(3, 4)) - - for b in range(nwp_data.shape[0]): - - fig = go.Figure() - for i in range(len(nwp_provider[NWPBatchKey.nwp_channel_names])): - channel = nwp_provider[NWPBatchKey.nwp_channel_names][i] - nwp_data_one_channel = nwp_data[b, :, i] - time = nwp_provider[NWPBatchKey.nwp_target_time_utc][b] - time = pd.to_datetime(time, unit="s") - fig.add_trace( - go.Scatter(x=time, y=nwp_data_one_channel, mode="lines", name=channel) + fig.update_layout( + title=f"{provider} NWP - example {b}", xaxis_title="Time", yaxis_title="Value" ) - - fig.update_layout( - title=f"{provider} NWP - example {b}", xaxis_title="Time", yaxis_title="Value" - ) - # fig.show(renderer='browser') - name = f"{provider}_nwp_{b}.png" - fig.write_image(name) - print(f"![]({name})") - print("\n") + # fig.show(renderer='browser') + name = f"nwp/{provider}_nwp_{b}.png" + fig.write_image(f"{folder}/{name}") + print(f"![](./{name})", file=f) + print("\n", file=f) + + for key in keys: + print("\n", file=f) + print(f"#### {key.name}", file=f) + value = nwp_provider[key] + + if "time" in key.name: + + # make a table with example, shape, max, min + print("| Example | Shape | Max | Min |", file=f) + print("| --- | --- | --- | --- |", file=f) + + for example_id in range(n_examples): + value_ts = pd.to_datetime(value[example_id], unit="s") + print( + f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |", + file=f, + ) + + elif "channel" in key.name: + + # create a table with the channel names with max, min, mean and std + print("| Channel | Max | Min | Mean | Std |", file=f) + print("| --- | --- | --- | --- | --- |", file=f) + for i in range(len(value)): + channel = value[i] + data = nwp_data[:, :, i] + print( + f"| {channel} " + f"| {data.max().item():.2f} " + f"| {data.min().item():.2f} " + f"| {data.mean().item():.2f} " + f"| {data.std().item():.2f} |", + file=f, + ) + + print(f"Shape={value.shape}", file=f) + + elif isinstance(value, torch.Tensor): + print(f"Shape {value.shape=}", file=f) + print(f"Max {value.max():.2f}", file=f) + print(f"Min {value.min():.2f}", file=f) + elif isinstance(value, int): + print(f"{value}", file=f) + else: + print(f"{value}", file=f) + + # Satellite + print("## Satellite \n", file=f) + keys = [ + BatchKey.satellite_actual, + BatchKey.satellite_t0_idx, + BatchKey.satellite_time_utc, + BatchKey.satellite_time_utc, + BatchKey.satellite_x_geostationary, + BatchKey.satellite_y_geostationary, + ] for key in keys: - print("\n") - print(f"#### {key.name}") - value = nwp_provider[key] - if "time" in key.name: + print("\n", file=f) + print(f"#### {key.name}", file=f) + value = batch[key] + + if "satellite_actual" in key.name: + + print(value.shape, file=f) + + # average of lat and lon + value = value.mean(dim=(3, 4)) + + n_examples = value.shape[0] + if limit_examples is not None: + n_examples = min(n_examples, limit_examples) + + for b in range(n_examples): + + fig = go.Figure() + for i in range(value.shape[2]): + satellite_data_one_channel = value[b, :, i] + time = batch[BatchKey.satellite_time_utc][b] + time = pd.to_datetime(time, unit="s") + fig.add_trace( + go.Scatter(x=time, y=satellite_data_one_channel, mode="lines") + ) + + fig.update_layout( + title=f"Satellite - example {b}", xaxis_title="Time", yaxis_title="Value" + ) + # fig.show(renderer='browser') + name = f"satellite/satellite_{b}.png" + fig.write_image(f"{folder}/{name}") + print(f"![](./{name})", file=f) + print("\n", file=f) + + elif "time" in key.name: # make a table with example, shape, max, min - print("| Example | Shape | Max | Min |") - print("| --- | --- | --- | --- |") + print("| Example | Shape | Max | Min |", file=f) + print("| --- | --- | --- | --- |", file=f) - for example_id in range(value.shape[0]): + for example_id in range(n_examples): value_ts = pd.to_datetime(value[example_id], unit="s") print( - f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |" + f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |", + file=f, ) elif "channel" in key.name: # create a table with the channel names with max, min, mean and std - print("| Channel | Max | Min | Mean | Std |") - print("| --- | --- | --- | --- | --- |") + print("| Channel | Max | Min | Mean | Std |", file=f) + print("| --- | --- | --- | --- | --- |", file=f) for i in range(len(value)): channel = value[i] data = nwp_data[:, :, i] @@ -167,98 +272,20 @@ def visualize_batch(batch: NumpyBatch): f"| {data.max().item():.2f} " f"| {data.min().item():.2f} " f"| {data.mean().item():.2f} " - f"| {data.std().item():.2f} |" + f"| {data.std().item():.2f} |", + file=f, ) - print(f"Shape={value.shape}") + print(f"Shape={value.shape}", file=f) elif isinstance(value, torch.Tensor): - print(f"Shape {value.shape=}") - print(f"Max {value.max():.2f}") - print(f"Min {value.min():.2f}") + print(f"Shape {value.shape=}", file=f) + print(f"Max {value.max():.2f}", file=f) + print(f"Min {value.min():.2f}", file=f) elif isinstance(value, int): - print(f"{value}") + print(f"{value}", file=f) else: - print(f"{value}") - - # Satellite - print("## Satellite \n") - keys = [ - BatchKey.satellite_actual, - BatchKey.satellite_t0_idx, - BatchKey.satellite_time_utc, - BatchKey.satellite_time_utc, - BatchKey.satellite_x_geostationary, - BatchKey.satellite_y_geostationary, - ] - - for key in keys: - - print("\n") - print(f"#### {key.name}") - value = batch[key] - - if "satellite_actual" in key.name: - - print(value.shape) - - # average of lat and lon - value = value.mean(dim=(3, 4)) - - for b in range(value.shape[0]): - - fig = go.Figure() - for i in range(value.shape[2]): - satellite_data_one_channel = value[b, :, i] - time = batch[BatchKey.satellite_time_utc][b] - time = pd.to_datetime(time, unit="s") - fig.add_trace(go.Scatter(x=time, y=satellite_data_one_channel, mode="lines")) - - fig.update_layout( - title=f"Satellite - example {b}", xaxis_title="Time", yaxis_title="Value" - ) - # fig.show(renderer='browser') - name = f"satellite_{b}.png" - fig.write_image(name) - print(f"![]({name})") - print("\n") - - elif "time" in key.name: - - # make a table with example, shape, max, min - print("| Example | Shape | Max | Min |") - print("| --- | --- | --- | --- |") - - for example_id in range(value.shape[0]): - value_ts = pd.to_datetime(value[example_id], unit="s") - print(f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |") - - elif "channel" in key.name: - - # create a table with the channel names with max, min, mean and std - print("| Channel | Max | Min | Mean | Std |") - print("| --- | --- | --- | --- | --- |") - for i in range(len(value)): - channel = value[i] - data = nwp_data[:, :, i] - print( - f"| {channel} " - f"| {data.max().item():.2f} " - f"| {data.min().item():.2f} " - f"| {data.mean().item():.2f} " - f"| {data.std().item():.2f} |" - ) - - print(f"Shape={value.shape}") - - elif isinstance(value, torch.Tensor): - print(f"Shape {value.shape=}") - print(f"Max {value.max():.2f}") - print(f"Min {value.min():.2f}") - elif isinstance(value, int): - print(f"{value}") - else: - print(f"{value}") + print(f"{value}", file=f) # For example you can run it like this From 6bab08b04b3d6dcc8b6f6c78ec37c38421c9c71b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Sep 2024 10:34:37 +0000 Subject: [PATCH 11/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/batch/visualise.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/batch/visualise.py index 578f8e7a3..03ed22fc2 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/batch/visualise.py @@ -4,6 +4,7 @@ """ import os + import pandas as pd import plotly.graph_objects as go import torch From 62c56f0b4c9f406f9d5146873a2c759d316c83b9 Mon Sep 17 00:00:00 2001 From: AUdaltsova Date: Thu, 26 Sep 2024 17:35:39 +0100 Subject: [PATCH 12/16] move to visualise fold and refactor to british spelling --- .../{batch/visualise.py => visualisation/batch.py} | 10 +++++----- ocf_datapipes/visualization/batch.py | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) rename ocf_datapipes/{batch/visualise.py => visualisation/batch.py} (97%) delete mode 100644 ocf_datapipes/visualization/batch.py diff --git a/ocf_datapipes/batch/visualise.py b/ocf_datapipes/visualisation/batch.py similarity index 97% rename from ocf_datapipes/batch/visualise.py rename to ocf_datapipes/visualisation/batch.py index 03ed22fc2..57e5a68ff 100644 --- a/ocf_datapipes/batch/visualise.py +++ b/ocf_datapipes/visualisation/batch.py @@ -1,6 +1,6 @@ -""" The idea is visualize one of the batches +""" The idea is visualise one of the batches -This is a bit of a working progress, but the idea is to visualize the batch in a markdown file. +This is a bit of a work in progress, but the idea is to visualise the batch in a markdown file. """ import os @@ -12,7 +12,7 @@ from ocf_datapipes.batch import BatchKey, NumpyBatch, NWPBatchKey -def visualize_batch(batch: NumpyBatch, folder=".", output_file="report.md", limit_examples=None): +def visualise_batch(batch: NumpyBatch, folder=".", output_file="report.md", limit_examples=None): """Visualize the batch in a markdown file""" # create dir if it does not exist @@ -22,7 +22,7 @@ def visualize_batch(batch: NumpyBatch, folder=".", output_file="report.md", limi with open(f"{folder}/{output_file}", "a") as f: # Wind - print("# Batch visualization", file=f) + print("# Batch visualisation", file=f) print("## Wind \n", file=f) keys = [ @@ -293,4 +293,4 @@ def visualize_batch(batch: NumpyBatch, folder=".", output_file="report.md", limi # with open("batch.md", "w") as f: # sys.stdout = f # d = torch.load("000000.pt") -# visualize_batch(d) +# visualise_batch(d) diff --git a/ocf_datapipes/visualization/batch.py b/ocf_datapipes/visualization/batch.py deleted file mode 100644 index f5005e28b..000000000 --- a/ocf_datapipes/visualization/batch.py +++ /dev/null @@ -1 +0,0 @@ -"""Visualization of batch data.""" From 6f720a193421b85d836d356d9836f40c0a448310 Mon Sep 17 00:00:00 2001 From: AUdaltsova Date: Thu, 26 Sep 2024 17:36:49 +0100 Subject: [PATCH 13/16] add visualise_batch to init --- ocf_datapipes/{visualization => visualisation}/README.md | 0 ocf_datapipes/{visualization => visualisation}/__init__.py | 2 ++ 2 files changed, 2 insertions(+) rename ocf_datapipes/{visualization => visualisation}/README.md (100%) rename ocf_datapipes/{visualization => visualisation}/__init__.py (57%) diff --git a/ocf_datapipes/visualization/README.md b/ocf_datapipes/visualisation/README.md similarity index 100% rename from ocf_datapipes/visualization/README.md rename to ocf_datapipes/visualisation/README.md diff --git a/ocf_datapipes/visualization/__init__.py b/ocf_datapipes/visualisation/__init__.py similarity index 57% rename from ocf_datapipes/visualization/__init__.py rename to ocf_datapipes/visualisation/__init__.py index e310d3b1b..d654617a1 100644 --- a/ocf_datapipes/visualization/__init__.py +++ b/ocf_datapipes/visualisation/__init__.py @@ -1 +1,3 @@ """Tools for plotting and visualizing data.""" + +from .batch import visualise_batch \ No newline at end of file From 73d156194c048ffac957b8b83584eeac419f228f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:38:03 +0000 Subject: [PATCH 14/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/visualisation/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocf_datapipes/visualisation/__init__.py b/ocf_datapipes/visualisation/__init__.py index d654617a1..bbc352f04 100644 --- a/ocf_datapipes/visualisation/__init__.py +++ b/ocf_datapipes/visualisation/__init__.py @@ -1,3 +1,3 @@ """Tools for plotting and visualizing data.""" -from .batch import visualise_batch \ No newline at end of file +from .batch import visualise_batch From b7d2d643f3ed87d8fd2d7ac0ee4e850da82c7d16 Mon Sep 17 00:00:00 2001 From: AUdaltsova Date: Thu, 26 Sep 2024 17:50:54 +0100 Subject: [PATCH 15/16] linting --- ocf_datapipes/visualisation/batch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocf_datapipes/visualisation/batch.py b/ocf_datapipes/visualisation/batch.py index 57e5a68ff..f12c0dfa8 100644 --- a/ocf_datapipes/visualisation/batch.py +++ b/ocf_datapipes/visualisation/batch.py @@ -167,7 +167,8 @@ def visualise_batch(batch: NumpyBatch, folder=".", output_file="report.md", limi for example_id in range(n_examples): value_ts = pd.to_datetime(value[example_id], unit="s") print( - f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |", + f"""| {example_id} | {len(value_ts)} | + {value_ts.max()} | {value_ts.min()} |""", file=f, ) From b30d5434b26bae677f71376606e6e15d88c8b4df Mon Sep 17 00:00:00 2001 From: AUdaltsova Date: Thu, 26 Sep 2024 17:57:48 +0100 Subject: [PATCH 16/16] linting --- ocf_datapipes/visualisation/batch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ocf_datapipes/visualisation/batch.py b/ocf_datapipes/visualisation/batch.py index f12c0dfa8..e449a4f83 100644 --- a/ocf_datapipes/visualisation/batch.py +++ b/ocf_datapipes/visualisation/batch.py @@ -167,8 +167,8 @@ def visualise_batch(batch: NumpyBatch, folder=".", output_file="report.md", limi for example_id in range(n_examples): value_ts = pd.to_datetime(value[example_id], unit="s") print( - f"""| {example_id} | {len(value_ts)} | - {value_ts.max()} | {value_ts.min()} |""", + f"| {example_id} | {len(value_ts)} " + f"| {value_ts.max()} | {value_ts.min()} |", file=f, ) @@ -257,7 +257,8 @@ def visualise_batch(batch: NumpyBatch, folder=".", output_file="report.md", limi for example_id in range(n_examples): value_ts = pd.to_datetime(value[example_id], unit="s") print( - f"| {example_id} | {len(value_ts)} | {value_ts.max()} | {value_ts.min()} |", + f"| {example_id} | {len(value_ts)} " + f"| {value_ts.max()} | {value_ts.min()} |", file=f, )