From 37e5634cbfefec09e159ab70daf14557c66f0f6c Mon Sep 17 00:00:00 2001 From: James Fulton Date: Tue, 4 Feb 2025 14:40:35 +0000 Subject: [PATCH] save over old files + save multiple copies --- src/cloudcasting_app/app.py | 19 +++++++++++++++++-- src/cloudcasting_app/data.py | 2 +- tests/test_app.py | 12 +++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/cloudcasting_app/app.py b/src/cloudcasting_app/app.py index 5a08b58..468f367 100644 --- a/src/cloudcasting_app/app.py +++ b/src/cloudcasting_app/app.py @@ -3,7 +3,7 @@ This app expects these environmental variables to be available: SATELLITE_ZARR_PATH (str): The path of the input satellite data - OUTPUT_PREDICTION_ZARR_PATH (str): The path to save the predictions to + OUTPUT_PREDICTION_DIRECTORY (str): The path of the directory to save the predictions to """ from importlib.metadata import PackageNotFoundError, version @@ -12,6 +12,7 @@ import yaml import hydra import typer +import fsspec import pandas as pd import xarray as xr @@ -131,7 +132,21 @@ def app(t0=None): ds_y_hat = da_y_hat.to_dataset(name="sat_pred") ds_y_hat.sat_pred.attrs.update(ds.data.attrs) - ds_y_hat.to_zarr(os.environ["OUTPUT_PREDICTION_ZARR_PATH"]) + # Save predictions to latest path and to path with timestring + out_dir = os.environ["OUTPUT_PREDICTION_DIRECTORY"] + + latest_zarr_path = f"{out_dir}/latest.zarr" + t0_string_zarr_path = t0.strftime(f"{out_dir}/%Y-%m-%dT%H:%M.zarr") + + fs, _ = fsspec.core.url_to_fs(out_dir) + for path in [latest_zarr_path, t0_string_zarr_path]: + + # Remove the path if it exists already + if fs.exists(path): + logger.info(f"Removing path: {path}") + fs.rm(path, recursive=True) + + ds_y_hat.to_zarr(path) if __name__ == "__main__": diff --git a/src/cloudcasting_app/data.py b/src/cloudcasting_app/data.py index d890f4f..0164319 100644 --- a/src/cloudcasting_app/data.py +++ b/src/cloudcasting_app/data.py @@ -105,7 +105,7 @@ def download_all_sat_data() -> bool: # download 5 minute satellite data sat_5_dl_path = os.environ["SATELLITE_ZARR_PATH"] - fs = fsspec.open(sat_5_dl_path).fs + fs, _ = fsspec.core.url_to_fs(sat_5_dl_path) if fs.exists(sat_5_dl_path): sat_available = True logger.info(f"Downloading 5-minute satellite data") diff --git a/tests/test_app.py b/tests/test_app.py index fc10c02..5fad7ff 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -14,15 +14,21 @@ def test_app(sat_5_data, tmp_path, test_t0): # In production sat zarr is zipped os.environ["SATELLITE_ZARR_PATH"] = "temp_sat.zarr.zip" - os.environ["OUTPUT_PREDICTION_ZARR_PATH"] = "sat_prediction.zarr" + os.environ["OUTPUT_PREDICTION_DIRECTORY"] = f"{tmp_path}" with zarr.storage.ZipStore("temp_sat.zarr.zip", mode="x") as store: sat_5_data.to_zarr(store) app() - + + # Check the two output files have been created + latest_zarr_path = f"{tmp_path}/latest.zarr" + t0_string_zarr_path = test_t0.strftime(f"{tmp_path}/%Y-%m-%dT%H:%M.zarr") + assert os.path.exists(latest_zarr_path) + assert os.path.exists(t0_string_zarr_path) + # Load the predictions and check them - ds_y_hat = xr.open_zarr(os.environ["OUTPUT_PREDICTION_ZARR_PATH"]) + ds_y_hat = xr.open_zarr(latest_zarr_path) assert "sat_pred" in ds_y_hat assert (