Skip to content

Commit

Permalink
Merge pull request #33 from ThomasBouche/feature/issue28
Browse files Browse the repository at this point in the history
Add option Temporary Directory
  • Loading branch information
ThomasBouche authored Jan 28, 2025
2 parents d094d0e + dd7905f commit 6b58ec2
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 15 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# Changelog

## TODO
All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.0] - January, 2024

### Features

* Add an optional temporary directory: `temp_dir` (#28).
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ df_arome = arome_client.get_coverage(
pressures=None, # Optional: pressure level
long = (-5.1413, 9.5602), # Optional: longitude
lat = (41.33356, 51.0889), # Optional: latitude
coverage_id=None # Optional: an alternative to indicator/run/interval
coverage_id=None, # Optional: an alternative to indicator/run/interval
temp_dir=None, # Optional: Directory to store the temporary file
)
```
Note: The coverage_id can be used instead of indicator, run, and interval.
Expand Down
3 changes: 2 additions & 1 deletion docs/pages/how_to.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ df_arome = arome_client.get_coverage(
pressures=None, # Optional: pressure level
long = (-5.1413, 9.5602), # Optional: longitude
lat = (41.33356, 51.0889), # Optional: latitude
coverage_id=None # Optional: an alternative to indicator/run/interval
coverage_id=None, # Optional: an alternative to indicator/run/interval
temp_dir=None, # Optional: Directory to store the temporary file
)
```

Expand Down
48 changes: 37 additions & 11 deletions src/meteole/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import datetime as dt
import logging
import os
import re
import shutil
import tempfile
from abc import ABC, abstractmethod
from functools import reduce
Expand Down Expand Up @@ -147,6 +149,7 @@ def get_coverage(
run: str | None = None,
interval: str | None = None,
coverage_id: str = "",
temp_dir: str | None = None,
) -> pd.DataFrame:
"""Return the coverage data (i.e., the weather forecast data).
Expand All @@ -163,6 +166,7 @@ def get_coverage(
raises an error if specified. Defaults to "P1D" for time-aggregated indicators such
as TOTAL_PRECIPITATION.
coverage_id: An id of a coverage, use get_capabilities() to get them.
temp_dir (str | None): Directory to store the temporary file. Defaults to None.
Returns:
pd.DataFrame: The complete run for the specified execution.
Expand Down Expand Up @@ -192,6 +196,7 @@ def get_coverage(
forecast_horizon=forecast_horizon,
lat=lat,
long=long,
temp_dir=temp_dir,
)
for forecast_horizon in forecast_horizons
for pressure in pressures
Expand Down Expand Up @@ -403,7 +408,11 @@ def _get_coverage_description(self, coverage_id: str) -> dict[Any, Any]:
response = self._client.get(url, params=params)
return xmltodict.parse(response.text)

def _grib_bytes_to_df(self, grib_str: bytes) -> pd.DataFrame:
def _grib_bytes_to_df(
self,
grib_str: bytes,
temp_dir: str | None = None,
) -> pd.DataFrame:
"""(Protected)
Converts GRIB data (in binary format) into a pandas DataFrame.
Expand All @@ -413,6 +422,7 @@ def _grib_bytes_to_df(self, grib_str: bytes) -> pd.DataFrame:
Args:
grib_str (bytes): Binary GRIB data as a byte string.
temp_dir (str | None): Directory to store the temporary file. Defaults to None.
Returns:
pd.DataFrame: A pandas DataFrame containing the extracted GRIB data,
Expand All @@ -427,8 +437,18 @@ def _grib_bytes_to_df(self, grib_str: bytes) -> pd.DataFrame:
- The temporary file used for parsing is automatically deleted after use.
- Ensure the input GRIB data is valid and encoded in a binary format.
"""
created_temp_dir = False

if temp_dir:
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
created_temp_dir = True
temp_subdir = os.path.join(temp_dir, "temp_grib")
os.makedirs(temp_subdir, exist_ok=True)
else:
temp_subdir = tempfile.mkdtemp()

with tempfile.NamedTemporaryFile() as temp_file:
with tempfile.NamedTemporaryFile(dir=temp_subdir, delete=False) as temp_file:
# Write the GRIB binary data to the temporary file
temp_file.write(grib_str)
temp_file.flush() # Ensure the data is written to disk
Expand All @@ -439,6 +459,11 @@ def _grib_bytes_to_df(self, grib_str: bytes) -> pd.DataFrame:
# Convert the Dataset to a pandas DataFrame
df = ds.to_dataframe().reset_index()

if created_temp_dir and temp_dir is not None:
shutil.rmtree(temp_dir)
else:
shutil.rmtree(temp_subdir)

return df

def _get_data_single_forecast(
Expand All @@ -449,6 +474,7 @@ def _get_data_single_forecast(
height: int | None,
lat: tuple,
long: tuple,
temp_dir: str | None = None,
) -> pd.DataFrame:
"""(Protected)
Return the forecast's data for a given time and indicator.
Expand All @@ -460,6 +486,7 @@ def _get_data_single_forecast(
forecast_horizon (int): the forecast horizon in hours (how many hours ahead)
lat (tuple): minimum and maximum latitude
long (tuple): minimum and maximum longitude
temp_dir (str | None): Directory to store the temporary file. Defaults to None.
Returns:
pd.DataFrame: The forecast for the specified time.
Expand All @@ -474,7 +501,7 @@ def _get_data_single_forecast(
long=long,
)

df: pd.DataFrame = self._grib_bytes_to_df(grib_binary)
df: pd.DataFrame = self._grib_bytes_to_df(grib_binary, temp_dir=temp_dir)

# Drop and rename columns
df.drop(columns=["surface", "valid_time"], errors="ignore", inplace=True)
Expand Down Expand Up @@ -521,10 +548,7 @@ def _get_coverage_file(
long: tuple = (-12, 16),
) -> bytes:
"""(Protected)
Retrieves raster data for a specified model prediction and saves it to a file.
If no `filepath` is provided, the file is saved to a default cache directory under
the current working directory.
Retrieves data for a specified model prediction.
Args:
coverage_id (str): The coverage ID to retrieve. Use `get_coverage` to list available coverage IDs.
Expand All @@ -537,10 +561,6 @@ def _get_coverage_file(
Defaults to (37.5, 55.4), covering the latitudes of France.
long (tuple[float, float], optional): Tuple specifying the minimum and maximum longitudes.
Defaults to (-12, 16), covering the longitudes of France.
file_format (str, optional): The format of the raster file. Supported formats are "grib" and "tiff".
Defaults to "grib".
filepath (Path, optional): The file path where the raster file will be saved. If not specified,
the file is saved to a cache directory.
Returns:
Path: The file path to the saved raster data.
Expand Down Expand Up @@ -605,6 +625,7 @@ def get_combined_coverage(
lat: tuple = FRANCE_METRO_LATITUDES,
long: tuple = FRANCE_METRO_LONGITUDES,
forecast_horizons: list[int] | None = None,
temp_dir: str | None = None,
) -> pd.DataFrame:
"""
Get a combined DataFrame of coverage data for multiple indicators and different runs.
Expand All @@ -624,6 +645,7 @@ def get_combined_coverage(
lat (tuple): The latitude range as (min_latitude, max_latitude). Defaults to FRANCE_METRO_LATITUDES.
long (tuple): The longitude range as (min_longitude, max_longitude). Defaults to FRANCE_METRO_LONGITUDES.
forecast_horizons (list[int] | None): A list of forecast horizon values in hours. Defaults to None.
temp_dir (str | None): Directory to store the temporary file. Defaults to None.
Returns:
pd.DataFrame: A combined DataFrame containing coverage data for all specified runs and indicators.
Expand All @@ -643,6 +665,7 @@ def get_combined_coverage(
pressures=pressures,
intervals=intervals,
forecast_horizons=forecast_horizons,
temp_dir=temp_dir,
)
for run in runs
]
Expand All @@ -658,6 +681,7 @@ def _get_combined_coverage_for_single_run(
lat: tuple = FRANCE_METRO_LATITUDES,
long: tuple = FRANCE_METRO_LONGITUDES,
forecast_horizons: list[int] | None = None,
temp_dir: str | None = None,
) -> pd.DataFrame:
"""(Protected)
Get a combined DataFrame of coverage data for a given run considering a list of indicators.
Expand All @@ -677,6 +701,7 @@ def _get_combined_coverage_for_single_run(
lat (tuple): The latitude range as (min_latitude, max_latitude). Defaults to FRANCE_METRO_LATITUDES.
long (tuple): The longitude range as (min_longitude, max_longitude). Defaults to FRANCE_METRO_LONGITUDES.
forecast_horizons (list[int] | None): A list of forecast horizon values in hours. Defaults to None.
temp_dir (str | None): Directory to store the temporary file. Defaults to None.
Returns:
pd.DataFrame: A combined DataFrame containing coverage data for all specified runs and indicators.
Expand Down Expand Up @@ -737,6 +762,7 @@ def _check_params_length(params: list[Any] | None, arg_name: str) -> list[Any]:
heights=[height] if height is not None else [],
pressures=[pressure] if pressure is not None else [],
forecast_horizons=forecast_horizons,
temp_dir=temp_dir,
)
for coverage_id, height, pressure in zip(coverage_ids, heights, pressures)
]
Expand Down
8 changes: 7 additions & 1 deletion tests/test_forecasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,13 @@ def test_get_coverage(self, mock_get_data_single_forecast, mock_get_capabilities
)

mock_get_data_single_forecast.assert_called_once_with(
coverage_id="toto", height=2, pressure=None, forecast_horizon=0, lat=(37.5, 55.4), long=(-12, 16)
coverage_id="toto",
height=2,
pressure=None,
forecast_horizon=0,
lat=(37.5, 55.4),
long=(-12, 16),
temp_dir=None,
)

@patch("meteole._arome.AromeForecast.get_coverage_description")
Expand Down

0 comments on commit 6b58ec2

Please sign in to comment.