From 5adbcb48989a43defee123bf9e325301d323ffc3 Mon Sep 17 00:00:00 2001 From: Thomas BOUCHE Date: Fri, 3 Jan 2025 17:19:42 +0100 Subject: [PATCH 1/8] add get_combined_coverage --- .pre-commit-config.yaml | 4 + src/meteole/_arpege.py | 6 +- src/meteole/forecast.py | 211 ++++++++-- ...tch_forecast_for_multiple_indicators.ipynb | 363 ++++++++++++++++++ tutorial/Fetch_forecasts.ipynb | 180 +++++++++ .../{vigilance.ipynb => Get_vigilance.ipynb} | 0 tutorial/arome.ipynb | 89 ----- tutorial/arpege.ipynb | 97 ----- 8 files changed, 735 insertions(+), 215 deletions(-) create mode 100644 tutorial/Fetch_forecast_for_multiple_indicators.ipynb create mode 100644 tutorial/Fetch_forecasts.ipynb rename tutorial/{vigilance.ipynb => Get_vigilance.ipynb} (100%) delete mode 100644 tutorial/arome.ipynb delete mode 100644 tutorial/arpege.ipynb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 28f934b..ae94b60 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,3 +53,7 @@ repos: - id: conventional-pre-commit stages: [commit-msg] args: [feat, fix, ci, chore, test, docs] + - repo: https://github.com/kynan/nbstripout + rev: 0.7.1 + hooks: + - id: nbstripout diff --git a/src/meteole/_arpege.py b/src/meteole/_arpege.py index ee1d514..f676c45 100644 --- a/src/meteole/_arpege.py +++ b/src/meteole/_arpege.py @@ -22,8 +22,6 @@ "WIND_SPEED_GUST__SPECIFIC_HEIGHT_LEVEL_ABOVE_GROUND", "WIND_SPEED__SPECIFIC_HEIGHT_LEVEL_ABOVE_GROUND", "WIND_SPEED__ISOBARIC_SURFACE", - "DOWNWARD_SHORT_WAVE_RADIATION_FLUX__GROUND_OR_WATER_SURFACE", - "SHORT_WAVE_RADIATION_FLUX__GROUND_OR_WATER_SURFACE", "RELATIVE_HUMIDITY__SPECIFIC_HEIGHT_LEVEL_ABOVE_GROUND", "RELATIVE_HUMIDITY__ISOBARIC_SURFACE", "PLANETARY_BOUNDARY_LAYER_HEIGHT__GROUND_OR_WATER_SURFACE", @@ -57,13 +55,15 @@ "V_COMPONENT_OF_WIND__POTENTIAL_VORTICITY_SURFACE_1500", "V_COMPONENT_OF_WIND__POTENTIAL_VORTICITY_SURFACE_2000", "GEOPOTENTIAL__ISOBARIC_SURFACE", + "TOTAL_CLOUD_COVER__GROUND_OR_WATER_SURFACE", ] ARPEGE_OTHER_INDICATORS: list[str] = [ "TOTAL_WATER_PRECIPITATION__GROUND_OR_WATER_SURFACE", - "TOTAL_CLOUD_COVER__GROUND_OR_WATER_SURFACE", "TOTAL_SNOW_PRECIPITATION__GROUND_OR_WATER_SURFACE", "TOTAL_PRECIPITATION__GROUND_OR_WATER_SURFACE", + "DOWNWARD_SHORT_WAVE_RADIATION_FLUX__GROUND_OR_WATER_SURFACE", + "SHORT_WAVE_RADIATION_FLUX__GROUND_OR_WATER_SURFACE", ] diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index 8b1e0f2..6de4200 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -4,9 +4,11 @@ import glob import logging import os +import re from abc import ABC, abstractmethod +from functools import reduce from pathlib import Path -from typing import Any +from typing import Any, Dict, List, Optional, Tuple from warnings import warn import pandas as pd @@ -152,7 +154,7 @@ def get_coverage( if indicator: coverage_id = self._get_coverage_id(indicator, run, interval) - logger.debug(f"Using `coverage_id={coverage_id}`") + logger.info(f"Using `coverage_id={coverage_id}`") axis = self.get_coverage_description(coverage_id) @@ -178,28 +180,6 @@ def get_coverage( return pd.concat(df_list, axis=0).reset_index(drop=True) - def get_coverages( - self, - coverage_ids: list[str], - lat: tuple = FRANCE_METRO_LATITUDES, - long: tuple = FRANCE_METRO_LONGITUDES, - ) -> pd.DataFrame: - """ - Convenient function to quickly fetch a list of indicators using defaults `heights` and `forecast_horizons` - - For finer control over heights and forecast_horizons use :meth:`get_coverage` - """ - coverages = [ - self.get_coverage( - coverage_id, - lat, - long, - ) - for coverage_id in coverage_ids - ] - - return pd.concat(coverages, axis=0) - def _build_capabilities(self) -> pd.DataFrame: "Returns the coverage dataframe containing the details of all available coverage_ids" @@ -442,13 +422,35 @@ def _get_data_single_forecast( df.rename( columns={ "time": "run", - "heightAboveGround": "height", - "isobaricInhPa": "pressure", "step": "forecast_horizon", }, inplace=True, ) + known_columns = {"latitude", "longitude", "run", "forecast_horizon", "heightAboveGround", "isobaricInhPa"} + indicator_column = (set(df.columns) - known_columns).pop() + + if indicator_column == "unknown": + base_name = "".join([word[0] for word in coverage_id.split("__")[0].split("_")]).lower() + else: + base_name = re.sub(r"\d.*", "", indicator_column) + + if "heightAboveGround" in df.columns: + suffix = f"_{int(df['heightAboveGround'].iloc[0])}m" + elif "isobaricInhPa" in df.columns: + suffix = f"_{int(df['isobaricInhPa'].iloc[0])}hpa" + else: + suffix = "" + + new_indicator_column = f"{base_name}{suffix}" + df.rename(columns={indicator_column: new_indicator_column}, inplace=True) + + df.drop( + columns=["isobaricInhPa", "heightAboveGround", "meanSea", "potentialVorticity"], + errors="ignore", + inplace=True, + ) + return df def _get_coverage_file( @@ -540,3 +542,160 @@ def _get_available_feature(grid_axis, feature_name): features = feature_grid_axis[0]["gmlrgrid:GeneralGridAxis"]["gmlrgrid:coefficients"].split(" ") features = [int(feature) for feature in features] return features + + def get_combined_coverage( + self, + indicator_names: List[str], + runs: List[str], + heights: Optional[List[int]] = None, + pressures: Optional[List[int]] = None, + intervals: Optional[List[str]] = None, + lat: tuple = FRANCE_METRO_LATITUDES, + long: tuple = FRANCE_METRO_LONGITUDES, + forecast_horizons: List[int] | None = None, + ) -> pd.DataFrame: + """ + Get a combined DataFrame of coverage data for multiple coverage_ids with different runs. + Parameters: + indicator_names (List[str]): List of indicator names. + runs (List[str]): List of runs for each indicator. Format "YYYY-MM-DDTHH:MM:SSZ". + heights (List[int]): List of heights in meters. + pressures (List[int]): pressures in hPa + intervals (Optional[List[str]]): List of aggregation periods. Must be None for instant indicators, otherwise raises. Defaults to P1D for time-aggregated indicators like TOTAL_PRECIPITATION. + lat (tuple): Minimum and maximum latitude. + long (tuple): Minimum and maximum longitude. + forecast_horizons (list): list of integers, representing the forecast horizon in hours + Returns: + pd.DataFrame: Combined DataFrame with coverage data for all coverage_ids. + Raises: + ValueError: If the length of heights does not match the length of indicator_names. + """ + if len(runs) != len(set(runs)): + raise ValueError("The run in 'runs' must be different.") + + if heights is not None: + if len(heights) != len(indicator_names): + raise ValueError( + "The length of heights must match the length of indicator_names. If you want multiple heights for a single indicator, you need to create multiple entries in indicator_names." + ) + else: + heights = [] + if pressures is not None: + if len(pressures) != len(indicator_names): + raise ValueError( + "The length of pressures must match the length of indicator_names. If you want multiple pressures for a single indicator, you need to create multiple entries in indicator_names." + ) + else: + pressures = [] + + if intervals and len(intervals) != len(indicator_names): + raise ValueError("The length of intervals must match the length of indicator_names if provided.") + + coverage_ids_by_run: Dict[str, List[Tuple[str, Optional[int], Optional[int]]]] = dict() + + for run in runs: + if run not in coverage_ids_by_run: + coverage_ids_by_run[run] = [] + for i, indicator in enumerate(indicator_names): + height_value: Optional[int] = heights[i] if heights != [] else None + pressure_value: Optional[int] = pressures[i] if pressures != [] else None + coverage_id: str = self._get_coverage_id(indicator, run, intervals[i] if intervals else None) + coverage_ids_by_run[run].append((coverage_id, height_value, pressure_value)) + + if forecast_horizons is None: + list_coverage_id = [cid for cid, _, _ in coverage_ids_by_run[run]] + forecast_horizons = [self.find_common_forecast_horizons(list_coverage_id)[0]] + logger.info(f"Using common forecast_horizons `forecast_horizons={forecast_horizons}`.") + + # Check forecast_horizons is valid for all indicators + if forecast_horizons is not None: + coverage_ids = [cid for run_coverage in coverage_ids_by_run.values() for cid, _, _ in run_coverage] + invalid_coverage_ids = self.validate_forecast_horizons(coverage_ids, forecast_horizons) + if invalid_coverage_ids: + raise ValueError(f"{forecast_horizons} are not valid for this coverage_ids : {invalid_coverage_ids}") + + coverages_by_run = {} + + for run, coverage_ids in coverage_ids_by_run.items(): + coverages = [ + self.get_coverage( + coverage_id=coverage_id, + lat=lat, + long=long, + heights=[height] if height is not None else [], + pressures=[pressure] if pressure is not None else [], + forecast_horizons=forecast_horizons, + ) + for coverage_id, height, pressure in coverage_ids + ] + coverages_by_run[run] = reduce( + lambda left, right: pd.merge( + left, + right, + on=["latitude", "longitude", "run", "forecast_horizon"], + how="inner", + validate="one_to_one", + ), + coverages, + ) + + final_df = pd.concat(coverages_by_run.values(), axis=0).reset_index(drop=True) + + return final_df + + def get_forecast_horizons(self, coverage_ids: List[str]) -> List[List[int]]: + """ + Retrieve the times for each coverage_id. + Parameters: + coverage_ids (List[str]): List of coverage IDs. + Returns: + List[List[int]]: List of times for each coverage ID. + """ + indicator_times = [] + for coverage_id in coverage_ids: + times = self.get_coverage_description(coverage_id)["forecast_horizons"] + indicator_times.append(times) + return indicator_times + + def find_common_forecast_horizons( + self, + list_coverage_id: List[str], + ) -> List[int]: + """ + Find common forecast_horizons among coverage IDs. + indicator_names (List[str]): List of indicator names. + run (Optional[str]): Identifies the model inference. Defaults to latest if None. Format "YYYY-MM-DDTHH:MM:SSZ". + intervals (Optional[List[str]]): List of aggregation periods. Must be None for instant indicators, otherwise raises. Defaults to P1D for time-aggregated indicators like TOTAL_PRECIPITATION. + Returns: + List[int]: Common forecast_horizons + """ + indicator_forecast_horizons = self.get_forecast_horizons(list_coverage_id) + + common_forecast_horizons = indicator_forecast_horizons[0] + for times in indicator_forecast_horizons[1:]: + common_forecast_horizons = [time for time in common_forecast_horizons if time in times] + + all_times = [] + for times in indicator_forecast_horizons: + all_times.extend(times) + + return sorted(common_forecast_horizons) + + def validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: List[int]) -> List[str]: + """ + Validate forecast_horizons for a list of coverage IDs. + Parameters: + coverage_ids (List[str]): List of coverage IDs. + forecast_horizons (List[int]): List of time forecasts to validate. + Returns: + List[str]: List of invalid coverage IDs. + """ + indicator_forecast_horizons = self.get_forecast_horizons(coverage_ids) + + invalid_coverage_ids = [ + coverage_id + for coverage_id, times in zip(coverage_ids, indicator_forecast_horizons) + if not set(forecast_horizons).issubset(times) + ] + + return invalid_coverage_ids \ No newline at end of file diff --git a/tutorial/Fetch_forecast_for_multiple_indicators.ipynb b/tutorial/Fetch_forecast_for_multiple_indicators.ipynb new file mode 100644 index 0000000..2275efd --- /dev/null +++ b/tutorial/Fetch_forecast_for_multiple_indicators.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fetch Forecast for multiple indicators\n", + "\n", + "Welcome to this tutorial! In this notebook, we will explore various methods to retrieve weather data from the ARPEGE and AROME models using a list of indicators. This tutorial aims to provide a comprehensive guide on how to efficiently access data for different scenarios.\n", + "\n", + "We will cover the following cases:\n", + "- Retrieving data based on a list of indicators\n", + "- For a single run date\n", + "- Specifying the \"interval\" parameter for each indicator\n", + "- Specifying the \"forecast_horizon\" parameter\n", + "- Specifying the \"heights\" or \"pressures\" parameters for each indicator\n", + "- For multiple runs\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from meteole import ArpegeForecast\n", + "import random" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Requirements notice** : TODO Link to the documentation to have application_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "APP_ID = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init Client Arpege\n", + "\n", + "To get Arome Forecast, import `AromeForecast`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = ArpegeForecast(application_id=APP_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Select random indicators to get\n", + "random_indicators = random.sample(client.indicators, 5)\n", + "print(f\"Selected Indicators: {random_indicators}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### For One Run\n", + "\n", + "To retrieve data from a list of indicators, it is possible to do so from one or multiple runs common to all indicators. Let's start with the simple case of a single run\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We have to specified a run, to select the latest run\n", + "if not hasattr(client, \"capabilities\"):\n", + " client.get_capabilities()\n", + "\n", + "capabilities = client.capabilities[client.capabilities[\"indicator\"] == random_indicators[0]]\n", + "run = capabilities.iloc[0][\"run\"]\n", + "\n", + "print(run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", + " runs=[run], \n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specify intervals\n", + "\n", + "For an indicator, it is sometimes necessary to specify an \"interval\" parameter. To use the `get_combined_coverage` method, you must provide a list of intervals (`intervals`) associated with the list of indicators (`indicator_names`). If this is not done and an \"interval\" is required, a default interval will be used, and the default value will be logged as information. If you want to have multiple intervals for the same indicator, you need to duplicate the indicator in `indicator_names`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Depending on the indicators, the parameters intervals must be specified, if needed, we pick the first value\n", + "intervals_list = []\n", + "for indicator in random_indicators:\n", + " capabilities = client.capabilities[client.capabilities[\"indicator\"] == indicator]\n", + " intervals = capabilities.iloc[0][\"interval\"]\n", + " intervals_list.append(intervals)\n", + "\n", + "print(f\"Intervals: {intervals_list}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "capabilities.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", + " runs=[run], \n", + " intervals=intervals_list,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specify list of forecast_horizons\n", + "\n", + "The list of forecast horizons must be the same and valid for all indicators because the indicators are concatenated into columns for similar forecast horizons. If you want different forecast horizons for different indicators, you need to make multiple calls to `get_combined_coverage`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#if you don't know common forecast_horizons between list of indicators, we can pick 2 common forecast_horizons like that\n", + "if intervals_list:\n", + " intervals_list = [None if interval == '' else interval for interval in intervals_list]\n", + "list_coverage_id = [client._get_coverage_id(indicator, run, interval) for indicator, interval in zip(random_indicators, intervals_list)]\n", + "forecast_horizons = client.find_common_forecast_horizons(list_coverage_id)[:2]\n", + "print(forecast_horizons)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", + " runs=[run], \n", + " intervals=intervals_list,\n", + " forecast_horizons = forecast_horizons,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specify list of 'heights' or 'pressures'\n", + "\n", + "Depending on the indicators, you need to select `heights` and `pressures` from a list of values. If you do not specify any values, a default value will be used. The list of `heights` or `pressures` must be the same length as `indicator_names`. If you want multiple heights or pressures for the same indicator, you need to duplicate the indicator in `indicator_names`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#To get heights, we can use `self.get_coverage_description` and pick a random height if height exist for the indicator\n", + "heights = []\n", + "for indicator, interval in zip(random_indicators, intervals_list):\n", + " coverage_id = client._get_coverage_id(indicator, run, interval)\n", + " description = client.get_coverage_description(coverage_id)\n", + " \n", + " # Get a random height if heights exist for the indicator\n", + " possible_heights = description.get('heights', [])\n", + " if possible_heights:\n", + " random_height = random.choice(possible_heights)\n", + " heights.append(random_height)\n", + " else:\n", + " heights.append(None)\n", + "\n", + "print(heights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", + " runs=[run], \n", + " intervals=intervals_list,\n", + " forecast_horizons = forecast_horizons,\n", + " heights = heights,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### For multiple runs\n", + "\n", + "If you need to retrieve data for multiple runs, it is possible. Different runs will be concatenated into rows, unlike indicators which are concatenated into columns. Therefore, you need to specify different runs as each run applies to the entire set of indicators." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "capabilities = client.capabilities[client.capabilities[\"indicator\"] == random_indicators[0]]\n", + "runs = capabilities['run'].unique()[:2].tolist()\n", + "\n", + "print(runs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", + " runs=runs, \n", + " intervals=intervals_list,\n", + " forecast_horizons = forecast_horizons,\n", + " heights = heights,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined.head(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "inondation", + "language": "python", + "name": "inondation" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tutorial/Fetch_forecasts.ipynb b/tutorial/Fetch_forecasts.ipynb new file mode 100644 index 0000000..e33a144 --- /dev/null +++ b/tutorial/Fetch_forecasts.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fetch Forecasts\n", + "Welcome to this tutorial! In this notebook, we will explore various methods to retrieve weather data from the ARPEGE and AROME models. This tutorial aims to provide a comprehensive guide on how to efficiently access data for different scenarios.\n", + "\n", + "We will cover the following cases:\n", + "- Retrieving data based on a list of indicators\n", + "- For a single run date\n", + "- Specifying the \"interval\" parameter for each indicator\n", + "- Specifying the \"forecast_horizon\" parameter\n", + "- Specifying the \"heights\" or \"pressures\" parameters for each indicator\n", + "- For multiple runs\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "from meteole import AromeForecast" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Requirements notice** : TODO Link to the documentation to have application_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "APP_ID = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Init Client Arome\n", + "\n", + "To get Arpege Forecast, import `ArpegeForecast`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# init client\n", + "arome = AromeForecast(application_id=APP_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pick a random indicator\n", + "random_indicator = random.choice(arome.indicators)\n", + "print(f\"Indicator: {random_indicator}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "arome.indicators" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### fetch data using default computed params" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "arome.get_coverage(random_indicator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get parameters\n", + "\n", + "If you want to select the different parameters available, you can retrieve them using the `get_capabilities` and `get_coverage_description`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#First parameters to create a coverage_id (run and interval)\n", + "df_capabilities = arome.get_capabilities()\n", + "\n", + "list_run_valid = list(df_capabilities[df_capabilities['indicator']==random_indicator]['run'].unique())\n", + "list_interval_valid = list(df_capabilities[df_capabilities['indicator']==random_indicator]['interval'].unique())\n", + "list_coverage_id_valid = list(df_capabilities[df_capabilities['indicator']==random_indicator]['id'].unique())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Then other parameters from a coverage_id\n", + "description = arome.get_coverage_description(list_coverage_id_valid[0])\n", + "\n", + "list_forecast_horizons_valid = description.get('forecast_horizons', [])\n", + "list_height_valid = description.get('heights', [])\n", + "list_pressure_id_valid = description.get('pressures', [])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "inondation", + "language": "python", + "name": "inondation" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorial/vigilance.ipynb b/tutorial/Get_vigilance.ipynb similarity index 100% rename from tutorial/vigilance.ipynb rename to tutorial/Get_vigilance.ipynb diff --git a/tutorial/arome.ipynb b/tutorial/arome.ipynb deleted file mode 100644 index f16c2ee..0000000 --- a/tutorial/arome.ipynb +++ /dev/null @@ -1,89 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# AROME" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import random\n", - "\n", - "from meteole import AromeForecast\n", - "\n", - "APP_ID = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# init client\n", - "arome_model = AromeForecast(application_id=APP_ID)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# pick a random indicator\n", - "random_indicator = random.choice(arome_model.INDICATORS)\n", - "print(f\"Indicator: {random_indicator}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# fetch data using default computed params\n", - "arome_model.get_coverage(random_indicator)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "meteole_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "undefined.undefined.undefined" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tutorial/arpege.ipynb b/tutorial/arpege.ipynb deleted file mode 100644 index a85c299..0000000 --- a/tutorial/arpege.ipynb +++ /dev/null @@ -1,97 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# ARPEGE" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [], - "source": [ - "import random\n", - "\n", - "from meteole import ArpegeForecast\n", - "\n", - "APP_ID = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# init client\n", - "arpege_model = ArpegeForecast(application_id=APP_ID)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# pick a random indicator\n", - "\n", - "random_indicator = random.choice(arpege_model.INDICATORS)\n", - "print(f\"Indicator: {random_indicator}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# fetch data using default computed params\n", - "arpege_model.get_coverage(\"VERTICAL_VELOCITY_PRESSURE__ISOBARIC_SURFACE\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 0b429f07b8dab086bd41147f57f94237dd9948fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Mon, 6 Jan 2025 16:17:34 +0100 Subject: [PATCH 2/8] Split get_combined_coverage into 2 smaller functions --- src/meteole/forecast.py | 242 +++++++++++++++++++++++----------------- 1 file changed, 141 insertions(+), 101 deletions(-) diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index 6de4200..6af231c 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from functools import reduce from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Optional from warnings import warn import pandas as pd @@ -300,11 +300,11 @@ def _raise_if_invalid_or_fetch_default( Args: param_name (str): The name of the parameter to validate. - inputs (Optional[List[int]]): The list of inputs to validate. - availables (List[int]): The list of available values. + inputs (Optional[list[int]]): The list of inputs to validate. + availables (list[int]): The list of available values. Returns: - List[int]: The validated list of inputs or the default value. + list[int]: The validated list of inputs or the default value. Raises: ValueError: If any of the inputs are not in `availables`. @@ -542,114 +542,154 @@ def _get_available_feature(grid_axis, feature_name): features = feature_grid_axis[0]["gmlrgrid:GeneralGridAxis"]["gmlrgrid:coefficients"].split(" ") features = [int(feature) for feature in features] return features - + def get_combined_coverage( self, - indicator_names: List[str], - runs: List[str], - heights: Optional[List[int]] = None, - pressures: Optional[List[int]] = None, - intervals: Optional[List[str]] = None, + indicator_names: list[str], + runs: list[str], + heights: Optional[list[int]] = None, + pressures: Optional[list[int]] = None, + intervals: Optional[list[str]] = None, lat: tuple = FRANCE_METRO_LATITUDES, long: tuple = FRANCE_METRO_LONGITUDES, - forecast_horizons: List[int] | None = None, + forecast_horizons: list[int] | None = None, ) -> pd.DataFrame: """ - Get a combined DataFrame of coverage data for multiple coverage_ids with different runs. - Parameters: - indicator_names (List[str]): List of indicator names. - runs (List[str]): List of runs for each indicator. Format "YYYY-MM-DDTHH:MM:SSZ". - heights (List[int]): List of heights in meters. - pressures (List[int]): pressures in hPa - intervals (Optional[List[str]]): List of aggregation periods. Must be None for instant indicators, otherwise raises. Defaults to P1D for time-aggregated indicators like TOTAL_PRECIPITATION. - lat (tuple): Minimum and maximum latitude. - long (tuple): Minimum and maximum longitude. - forecast_horizons (list): list of integers, representing the forecast horizon in hours + Get a combined DataFrame of coverage data for multiple indicators and different runs. + + This method retrieves and aggregates coverage data for specified indicators, with options + to filter by height, pressure, and forecast_horizon. It returns a concatenated DataFrame + containing the coverage data for all provided runs. + + Args: + indicator_names (list[str]): A list of indicator names to retrieve data for. + runs (list[str]): A list of runs for each indicator. Format should be "YYYY-MM-DDTHH:MM:SSZ". + heights (Optional[list[int]]): A list of heights in meters to filter by (default is None). + pressures (Optional[list[int]]): A list of pressures in hPa to filter by (default is None). + intervals (Optional[list[str]]): A list of aggregation periods (default is None). Must be `None` for instant indicators; + otherwise, raises an exception. Defaults to 'P1D' for time-aggregated indicators. + lat (tuple): The latitude range as (min_latitude, max_latitude). Defaults to FRANCE_METRO_LATITUDES. + long (tuple): The longitude range as (min_longitude, max_longitude). Defaults to FRANCE_METRO_LONGITUDES. + forecast_horizons (Optional[list[int]]): A list of forecast horizon values in hours. Defaults to None. + Returns: - pd.DataFrame: Combined DataFrame with coverage data for all coverage_ids. + pd.DataFrame: A combined DataFrame containing coverage data for all specified runs and indicators. + Raises: - ValueError: If the length of heights does not match the length of indicator_names. + ValueError: If the length of `heights` does not match the length of `indicator_names`. + """ - if len(runs) != len(set(runs)): - raise ValueError("The run in 'runs' must be different.") + coverages = [ + self._get_combined_coverage_for_single_run( + indicator_names=indicator_names, + run=run, + lat=lat, + long=long, + heights=heights, + pressures=pressures, + intervals=intervals, + forecast_horizons=forecast_horizons, + ) + for run in runs + ] + return pd.concat(coverages, axis=0).reset_index(drop=True) - if heights is not None: - if len(heights) != len(indicator_names): - raise ValueError( - "The length of heights must match the length of indicator_names. If you want multiple heights for a single indicator, you need to create multiple entries in indicator_names." - ) - else: - heights = [] - if pressures is not None: - if len(pressures) != len(indicator_names): + def _get_combined_coverage_for_single_run( + self, + indicator_names: list[str], + run: str, + heights: list[int] | None = None, + pressures: list[int] | None = None, + intervals: list[str] | None = None, + lat: tuple = FRANCE_METRO_LATITUDES, + long: tuple = FRANCE_METRO_LONGITUDES, + forecast_horizons: list[int] | None = None, + ) -> pd.DataFrame: + """ + Get a combined DataFrame of coverage data for a given run considering a list of indicators. + + This method retrieves and aggregates coverage data for specified indicators, with options + to filter by height, pressure, and forecast_horizon. It returns a concatenated DataFrame + containing the coverage data. + + Args: + indicator_names (list[str]): A list of indicator names to retrieve data for. + run (str): A single runs for each indicator. Format should be "YYYY-MM-DDTHH:MM:SSZ". + heights (Optional[list[int]]): A list of heights in meters to filter by (default is None). + pressures (Optional[list[int]]): A list of pressures in hPa to filter by (default is None). + intervals (Optional[list[str]]): A list of aggregation periods (default is None). Must be `None` for instant indicators; + otherwise, raises an exception. Defaults to 'P1D' for time-aggregated indicators. + lat (tuple): The latitude range as (min_latitude, max_latitude). Defaults to FRANCE_METRO_LATITUDES. + long (tuple): The longitude range as (min_longitude, max_longitude). Defaults to FRANCE_METRO_LONGITUDES. + forecast_horizons (Optional[list[int]]): A list of forecast horizon values in hours. Defaults to None. + + Returns: + pd.DataFrame: A combined DataFrame containing coverage data for all specified runs and indicators. + + Raises: + ValueError: If the length of `heights` does not match the length of `indicator_names`. + + """ + + def _check_params_length(params: list | None, arg_name: str) -> list: + """assert length is ok or raise""" + if params is None: + return [None] * len(indicator_names) + if len(params) != len(indicator_names): raise ValueError( - "The length of pressures must match the length of indicator_names. If you want multiple pressures for a single indicator, you need to create multiple entries in indicator_names." + f"The length of {arg_name} must match the length of indicator_names. If you want multiple {arg_name} for a single indicator, create multiple entries in `indicator_names`." ) - else: - pressures = [] - - if intervals and len(intervals) != len(indicator_names): - raise ValueError("The length of intervals must match the length of indicator_names if provided.") - - coverage_ids_by_run: Dict[str, List[Tuple[str, Optional[int], Optional[int]]]] = dict() - - for run in runs: - if run not in coverage_ids_by_run: - coverage_ids_by_run[run] = [] - for i, indicator in enumerate(indicator_names): - height_value: Optional[int] = heights[i] if heights != [] else None - pressure_value: Optional[int] = pressures[i] if pressures != [] else None - coverage_id: str = self._get_coverage_id(indicator, run, intervals[i] if intervals else None) - coverage_ids_by_run[run].append((coverage_id, height_value, pressure_value)) - - if forecast_horizons is None: - list_coverage_id = [cid for cid, _, _ in coverage_ids_by_run[run]] - forecast_horizons = [self.find_common_forecast_horizons(list_coverage_id)[0]] - logger.info(f"Using common forecast_horizons `forecast_horizons={forecast_horizons}`.") - - # Check forecast_horizons is valid for all indicators - if forecast_horizons is not None: - coverage_ids = [cid for run_coverage in coverage_ids_by_run.values() for cid, _, _ in run_coverage] + return params + + heights = _check_params_length(heights, "heights") + pressures = _check_params_length(pressures, "pressures") + intervals = _check_params_length(intervals, "intervals") + + # Get coverage id from run and indicator_name + coverage_ids = [ + self._get_coverage_id(indicator_name, run, interval) + for indicator_name, interval in zip(indicator_names, intervals) + ] + + if forecast_horizons: + # Check forecast_horizons is valid for all indicators invalid_coverage_ids = self.validate_forecast_horizons(coverage_ids, forecast_horizons) if invalid_coverage_ids: - raise ValueError(f"{forecast_horizons} are not valid for this coverage_ids : {invalid_coverage_ids}") - - coverages_by_run = {} - - for run, coverage_ids in coverage_ids_by_run.items(): - coverages = [ - self.get_coverage( - coverage_id=coverage_id, - lat=lat, - long=long, - heights=[height] if height is not None else [], - pressures=[pressure] if pressure is not None else [], - forecast_horizons=forecast_horizons, - ) - for coverage_id, height, pressure in coverage_ids - ] - coverages_by_run[run] = reduce( - lambda left, right: pd.merge( - left, - right, - on=["latitude", "longitude", "run", "forecast_horizon"], - how="inner", - validate="one_to_one", - ), - coverages, - ) + raise ValueError(f"{forecast_horizons} are not valid for these coverage_ids : {invalid_coverage_ids}") + else: + forecast_horizons = [self.find_common_forecast_horizons(coverage_ids)[0]] + logger.info(f"Using common forecast_horizons `forecast_horizons={forecast_horizons}`.") - final_df = pd.concat(coverages_by_run.values(), axis=0).reset_index(drop=True) + coverages = [ + self.get_coverage( + coverage_id=coverage_id, + lat=lat, + long=long, + heights=[height] if height is not None else [], + pressures=[pressure] if pressure is not None else [], + forecast_horizons=forecast_horizons, + ) + for coverage_id, height, pressure in zip(coverage_ids, heights, pressures) + ] - return final_df + return reduce( + lambda left, right: pd.merge( + left, + right, + on=["latitude", "longitude", "run", "forecast_horizon"], + how="inner", + validate="one_to_one", + ), + coverages, + ) - def get_forecast_horizons(self, coverage_ids: List[str]) -> List[List[int]]: + def get_forecast_horizons(self, coverage_ids: list[str]) -> list[list[int]]: """ Retrieve the times for each coverage_id. Parameters: - coverage_ids (List[str]): List of coverage IDs. + coverage_ids (list[str]): List of coverage IDs. Returns: - List[List[int]]: List of times for each coverage ID. + list[list[int]]: List of times for each coverage ID. """ indicator_times = [] for coverage_id in coverage_ids: @@ -659,15 +699,15 @@ def get_forecast_horizons(self, coverage_ids: List[str]) -> List[List[int]]: def find_common_forecast_horizons( self, - list_coverage_id: List[str], - ) -> List[int]: + list_coverage_id: list[str], + ) -> list[int]: """ Find common forecast_horizons among coverage IDs. - indicator_names (List[str]): List of indicator names. + indicator_names (list[str]): List of indicator names. run (Optional[str]): Identifies the model inference. Defaults to latest if None. Format "YYYY-MM-DDTHH:MM:SSZ". - intervals (Optional[List[str]]): List of aggregation periods. Must be None for instant indicators, otherwise raises. Defaults to P1D for time-aggregated indicators like TOTAL_PRECIPITATION. + intervals (Optional[list[str]]): List of aggregation periods. Must be None for instant indicators, otherwise raises. Defaults to P1D for time-aggregated indicators like TOTAL_PRECIPITATION. Returns: - List[int]: Common forecast_horizons + list[int]: Common forecast_horizons """ indicator_forecast_horizons = self.get_forecast_horizons(list_coverage_id) @@ -681,14 +721,14 @@ def find_common_forecast_horizons( return sorted(common_forecast_horizons) - def validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: List[int]) -> List[str]: + def validate_forecast_horizons(self, coverage_ids: list[str], forecast_horizons: list[int]) -> list[str]: """ Validate forecast_horizons for a list of coverage IDs. Parameters: - coverage_ids (List[str]): List of coverage IDs. - forecast_horizons (List[int]): List of time forecasts to validate. + coverage_ids (list[str]): List of coverage IDs. + forecast_horizons (list[int]): List of time forecasts to validate. Returns: - List[str]: List of invalid coverage IDs. + list[str]: List of invalid coverage IDs. """ indicator_forecast_horizons = self.get_forecast_horizons(coverage_ids) @@ -698,4 +738,4 @@ def validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: if not set(forecast_horizons).issubset(times) ] - return invalid_coverage_ids \ No newline at end of file + return invalid_coverage_ids From e573fec75c35765c3c93103f4666b44fb88c7f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Tue, 7 Jan 2025 10:13:09 +0100 Subject: [PATCH 3/8] Fix: allow interval='' and interval=None --- .pre-commit-config.yaml | 1 + src/meteole/forecast.py | 34 ++++---- ...tch_forecast_for_multiple_indicators.ipynb | 82 ++++++++++--------- 3 files changed, 63 insertions(+), 54 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ae94b60..e06ea91 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,7 @@ repos: exclude: ^(docs/) - id: pretty-format-json args: [--autofix] + exclude_types: [jupyter] - id: trailing-whitespace args: [--markdown-linebreak-ext=md] exclude: ^(docs/) diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index 6af231c..b9f501d 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from functools import reduce from pathlib import Path -from typing import Any, Optional +from typing import Any from warnings import warn import pandas as pd @@ -263,7 +263,7 @@ def _get_coverage_id( valid_intervals = capabilities["interval"].unique().tolist() if indicator in self.INSTANT_INDICATORS: - if interval is None: + if not interval: # no interval is expected for instant indicators pass else: @@ -272,7 +272,7 @@ def _get_coverage_id( "indicator `{indicator}`." ) else: - if interval is None: + if not interval: interval = "P1D" logger.info( f"`interval=None` is invalid for non-instant indicators. Using default `interval={interval}`" @@ -285,7 +285,7 @@ def _get_coverage_id( coverage_id = f"{indicator}___{run}" - if interval is not None: + if interval: coverage_id += f"_{interval}" return coverage_id @@ -300,7 +300,7 @@ def _raise_if_invalid_or_fetch_default( Args: param_name (str): The name of the parameter to validate. - inputs (Optional[list[int]]): The list of inputs to validate. + inputs (list[int] | None): The list of inputs to validate. availables (list[int]): The list of available values. Returns: @@ -547,9 +547,9 @@ def get_combined_coverage( self, indicator_names: list[str], runs: list[str], - heights: Optional[list[int]] = None, - pressures: Optional[list[int]] = None, - intervals: Optional[list[str]] = None, + heights: list[int] | None = None, + pressures: list[int] | None = None, + intervals: list[str | None] | None = None, lat: tuple = FRANCE_METRO_LATITUDES, long: tuple = FRANCE_METRO_LONGITUDES, forecast_horizons: list[int] | None = None, @@ -564,13 +564,13 @@ def get_combined_coverage( Args: indicator_names (list[str]): A list of indicator names to retrieve data for. runs (list[str]): A list of runs for each indicator. Format should be "YYYY-MM-DDTHH:MM:SSZ". - heights (Optional[list[int]]): A list of heights in meters to filter by (default is None). - pressures (Optional[list[int]]): A list of pressures in hPa to filter by (default is None). - intervals (Optional[list[str]]): A list of aggregation periods (default is None). Must be `None` for instant indicators; + heights (list[int] | None): A list of heights in meters to filter by (default is None). + pressures (list[int] | None): A list of pressures in hPa to filter by (default is None). + intervals (list[str] | None): A list of aggregation periods (default is None). Must be `None` or "" for instant indicators; otherwise, raises an exception. Defaults to 'P1D' for time-aggregated indicators. lat (tuple): The latitude range as (min_latitude, max_latitude). Defaults to FRANCE_METRO_LATITUDES. long (tuple): The longitude range as (min_longitude, max_longitude). Defaults to FRANCE_METRO_LONGITUDES. - forecast_horizons (Optional[list[int]]): A list of forecast horizon values in hours. Defaults to None. + forecast_horizons (list[int] | None): A list of forecast horizon values in hours. Defaults to None. Returns: pd.DataFrame: A combined DataFrame containing coverage data for all specified runs and indicators. @@ -600,7 +600,7 @@ def _get_combined_coverage_for_single_run( run: str, heights: list[int] | None = None, pressures: list[int] | None = None, - intervals: list[str] | None = None, + intervals: list[str | None] | None = None, lat: tuple = FRANCE_METRO_LATITUDES, long: tuple = FRANCE_METRO_LONGITUDES, forecast_horizons: list[int] | None = None, @@ -615,13 +615,13 @@ def _get_combined_coverage_for_single_run( Args: indicator_names (list[str]): A list of indicator names to retrieve data for. run (str): A single runs for each indicator. Format should be "YYYY-MM-DDTHH:MM:SSZ". - heights (Optional[list[int]]): A list of heights in meters to filter by (default is None). - pressures (Optional[list[int]]): A list of pressures in hPa to filter by (default is None). - intervals (Optional[list[str]]): A list of aggregation periods (default is None). Must be `None` for instant indicators; + heights (list[int] | None): A list of heights in meters to filter by (default is None). + pressures (list[int] | None): A list of pressures in hPa to filter by (default is None). + intervals (Optional[list[str]]): A list of aggregation periods (default is None). Must be `None` or "" for instant indicators; otherwise, raises an exception. Defaults to 'P1D' for time-aggregated indicators. lat (tuple): The latitude range as (min_latitude, max_latitude). Defaults to FRANCE_METRO_LATITUDES. long (tuple): The longitude range as (min_longitude, max_longitude). Defaults to FRANCE_METRO_LONGITUDES. - forecast_horizons (Optional[list[int]]): A list of forecast horizon values in hours. Defaults to None. + forecast_horizons (list[int] | None): A list of forecast horizon values in hours. Defaults to None. Returns: pd.DataFrame: A combined DataFrame containing coverage data for all specified runs and indicators. diff --git a/tutorial/Fetch_forecast_for_multiple_indicators.ipynb b/tutorial/Fetch_forecast_for_multiple_indicators.ipynb index 2275efd..221bf1e 100644 --- a/tutorial/Fetch_forecast_for_multiple_indicators.ipynb +++ b/tutorial/Fetch_forecast_for_multiple_indicators.ipynb @@ -23,8 +23,9 @@ "metadata": {}, "outputs": [], "source": [ - "from meteole import ArpegeForecast\n", - "import random" + "import random\n", + "\n", + "from meteole import ArpegeForecast" ] }, { @@ -103,9 +104,10 @@ "metadata": {}, "outputs": [], "source": [ - "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", - " runs=[run], \n", - " )" + "df_combined = client.get_combined_coverage(\n", + " indicator_names=random_indicators,\n", + " runs=[run],\n", + ")" ] }, { @@ -157,10 +159,11 @@ "metadata": {}, "outputs": [], "source": [ - "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", - " runs=[run], \n", - " intervals=intervals_list,\n", - " )" + "df_combined = client.get_combined_coverage(\n", + " indicator_names=random_indicators,\n", + " runs=[run],\n", + " intervals=intervals_list,\n", + ")" ] }, { @@ -187,12 +190,14 @@ "metadata": {}, "outputs": [], "source": [ - "#if you don't know common forecast_horizons between list of indicators, we can pick 2 common forecast_horizons like that\n", + "# if you don't know common forecast_horizons between list of indicators, we can pick 2 common forecast_horizons like that\n", "if intervals_list:\n", - " intervals_list = [None if interval == '' else interval for interval in intervals_list]\n", - "list_coverage_id = [client._get_coverage_id(indicator, run, interval) for indicator, interval in zip(random_indicators, intervals_list)]\n", + " intervals_list = [None if interval == \"\" else interval for interval in intervals_list]\n", + "list_coverage_id = [\n", + " client._get_coverage_id(indicator, run, interval) for indicator, interval in zip(random_indicators, intervals_list)\n", + "]\n", "forecast_horizons = client.find_common_forecast_horizons(list_coverage_id)[:2]\n", - "print(forecast_horizons)\n" + "print(forecast_horizons)" ] }, { @@ -201,11 +206,12 @@ "metadata": {}, "outputs": [], "source": [ - "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", - " runs=[run], \n", - " intervals=intervals_list,\n", - " forecast_horizons = forecast_horizons,\n", - " )" + "df_combined = client.get_combined_coverage(\n", + " indicator_names=random_indicators,\n", + " runs=[run],\n", + " intervals=intervals_list,\n", + " forecast_horizons=forecast_horizons,\n", + ")" ] }, { @@ -232,14 +238,14 @@ "metadata": {}, "outputs": [], "source": [ - "#To get heights, we can use `self.get_coverage_description` and pick a random height if height exist for the indicator\n", + "# To get heights, we can use `self.get_coverage_description` and pick a random height if height exist for the indicator\n", "heights = []\n", "for indicator, interval in zip(random_indicators, intervals_list):\n", " coverage_id = client._get_coverage_id(indicator, run, interval)\n", " description = client.get_coverage_description(coverage_id)\n", - " \n", + "\n", " # Get a random height if heights exist for the indicator\n", - " possible_heights = description.get('heights', [])\n", + " possible_heights = description.get(\"heights\", [])\n", " if possible_heights:\n", " random_height = random.choice(possible_heights)\n", " heights.append(random_height)\n", @@ -255,12 +261,13 @@ "metadata": {}, "outputs": [], "source": [ - "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", - " runs=[run], \n", - " intervals=intervals_list,\n", - " forecast_horizons = forecast_horizons,\n", - " heights = heights,\n", - " )\n" + "df_combined = client.get_combined_coverage(\n", + " indicator_names=random_indicators,\n", + " runs=[run],\n", + " intervals=intervals_list,\n", + " forecast_horizons=forecast_horizons,\n", + " heights=heights,\n", + ")" ] }, { @@ -297,7 +304,7 @@ "outputs": [], "source": [ "capabilities = client.capabilities[client.capabilities[\"indicator\"] == random_indicators[0]]\n", - "runs = capabilities['run'].unique()[:2].tolist()\n", + "runs = capabilities[\"run\"].unique()[:2].tolist()\n", "\n", "print(runs)" ] @@ -308,12 +315,13 @@ "metadata": {}, "outputs": [], "source": [ - "df_combined = client.get_combined_coverage(indicator_names=random_indicators,\n", - " runs=runs, \n", - " intervals=intervals_list,\n", - " forecast_horizons = forecast_horizons,\n", - " heights = heights,\n", - " )" + "df_combined = client.get_combined_coverage(\n", + " indicator_names=random_indicators,\n", + " runs=runs,\n", + " intervals=intervals_list,\n", + " forecast_horizons=forecast_horizons,\n", + " heights=heights,\n", + ")" ] }, { @@ -328,9 +336,9 @@ ], "metadata": { "kernelspec": { - "display_name": "inondation", + "display_name": "meteole_env", "language": "python", - "name": "inondation" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -342,7 +350,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "undefined.undefined.undefined" }, "toc": { "base_numbering": 1, From 9df3d9ad958bd7f8fcdae076fe85de10a2056495 Mon Sep 17 00:00:00 2001 From: Thomas BOUCHE Date: Tue, 7 Jan 2025 14:32:53 +0100 Subject: [PATCH 4/8] add unit tests --- src/meteole/forecast.py | 12 +- tests/test_forecasts.py | 423 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 427 insertions(+), 8 deletions(-) diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index 6de4200..432244c 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -542,7 +542,7 @@ def _get_available_feature(grid_axis, feature_name): features = feature_grid_axis[0]["gmlrgrid:GeneralGridAxis"]["gmlrgrid:coefficients"].split(" ") features = [int(feature) for feature in features] return features - + def get_combined_coverage( self, indicator_names: List[str], @@ -610,7 +610,7 @@ def get_combined_coverage( # Check forecast_horizons is valid for all indicators if forecast_horizons is not None: coverage_ids = [cid for run_coverage in coverage_ids_by_run.values() for cid, _, _ in run_coverage] - invalid_coverage_ids = self.validate_forecast_horizons(coverage_ids, forecast_horizons) + invalid_coverage_ids = self._validate_forecast_horizons(coverage_ids, forecast_horizons) if invalid_coverage_ids: raise ValueError(f"{forecast_horizons} are not valid for this coverage_ids : {invalid_coverage_ids}") @@ -643,7 +643,7 @@ def get_combined_coverage( return final_df - def get_forecast_horizons(self, coverage_ids: List[str]) -> List[List[int]]: + def _get_forecast_horizons(self, coverage_ids: List[str]) -> List[List[int]]: """ Retrieve the times for each coverage_id. Parameters: @@ -669,7 +669,7 @@ def find_common_forecast_horizons( Returns: List[int]: Common forecast_horizons """ - indicator_forecast_horizons = self.get_forecast_horizons(list_coverage_id) + indicator_forecast_horizons = self._get_forecast_horizons(list_coverage_id) common_forecast_horizons = indicator_forecast_horizons[0] for times in indicator_forecast_horizons[1:]: @@ -681,7 +681,7 @@ def find_common_forecast_horizons( return sorted(common_forecast_horizons) - def validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: List[int]) -> List[str]: + def _validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: List[int]) -> List[str]: """ Validate forecast_horizons for a list of coverage IDs. Parameters: @@ -690,7 +690,7 @@ def validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: Returns: List[str]: List of invalid coverage IDs. """ - indicator_forecast_horizons = self.get_forecast_horizons(coverage_ids) + indicator_forecast_horizons = self._get_forecast_horizons(coverage_ids) invalid_coverage_ids = [ coverage_id diff --git a/tests/test_forecasts.py b/tests/test_forecasts.py index cf6d542..fae071e 100644 --- a/tests/test_forecasts.py +++ b/tests/test_forecasts.py @@ -188,7 +188,7 @@ def test_get_coverage_file(self, mock_get_request, mock_get_capabilities): @patch("meteole._arome.AromeForecast._transform_grib_to_df") @patch("meteole._arome.AromeForecast._get_coverage_file") def test_get_data_single_forecast(self, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities): - mock_transform_grib_to_df.return_value = pd.DataFrame({"data": [1, 2, 3], "heightAboveGround": ["1", "2", "3"]}) + mock_transform_grib_to_df.return_value = pd.DataFrame({"data": [1, 2, 3]}) forecast = AromeForecast( self.client, @@ -198,7 +198,7 @@ def test_get_data_single_forecast(self, mock_get_coverage_file, mock_transform_g df = forecast._get_data_single_forecast( coverage_id="coverage_1", - height=2, + height=None, pressure=None, forecast_horizon=0, lat=(37.5, 55.4), @@ -207,6 +207,29 @@ def test_get_data_single_forecast(self, mock_get_coverage_file, mock_transform_g self.assertTrue("data" in df.columns) + @patch("meteole._arome.AromeForecast.get_capabilities") + @patch("meteole._arome.AromeForecast._transform_grib_to_df") + @patch("meteole._arome.AromeForecast._get_coverage_file") + def test_get_data_single_forecast_with_height(self, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities): + mock_transform_grib_to_df.return_value = pd.DataFrame({"data": [1, 2, 3], "heightAboveGround": ["2", "2", "2"]}) + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + df = forecast._get_data_single_forecast( + coverage_id="coverage_1", + height=2, + pressure=None, + forecast_horizon=0, + lat=(37.5, 55.4), + long=(-12, 16), + ) + + self.assertTrue("data_2m" in df.columns) + @patch("meteole._arome.AromeForecast.get_coverage_description") @patch("meteole._arome.AromeForecast.get_capabilities") @patch("meteole._arome.AromeForecast._get_data_single_forecast") @@ -241,6 +264,402 @@ def test_get_coverage(self, mock_get_data_single_forecast, mock_get_capabilities coverage_id="toto", height=2, pressure=None, forecast_horizon=0, lat=(37.5, 55.4), long=(-12, 16) ) + @patch("meteole._arome.AromeForecast.get_coverage_description") + def test_get_forecast_horizons(self, mock_get_coverage_description): + def side_effect(coverage_id): + if coverage_id == "id1": + return {"forecast_horizons": [0, 1, 2], "heights": [], "pressures": []} + elif coverage_id == "id2": + return {"forecast_horizons": [0, 2, 3], "heights": [], "pressures": []} + + mock_get_coverage_description.side_effect = side_effect + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + coverage_ids = ["id1", "id2"] + expected_result = [[0, 1, 2], [0, 2, 3]] + result = forecast._get_forecast_horizons(coverage_ids) + self.assertEqual(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_forecast_horizons") + def test_find_common_forecast_horizons(self, mock_get_forecast_horizons): + mock_get_forecast_horizons.return_value = [[0, 1, 2, 3], [2, 3, 4, 5], [1, 2, 3, 6]] + + list_coverage_id = ["id1", "id2", "id3"] + expected_result = [2, 3] + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + result = forecast.find_common_forecast_horizons(list_coverage_id) + self.assertEqual(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_forecast_horizons") + def test_validate_forecast_horizons_valid(self, mock_get_forecast_horizons): + mock_get_forecast_horizons.return_value = [[0, 1, 2, 3], [2, 3, 4, 5]] + + coverage_ids = ["id1", "id2"] + forecast_horizons = [2, 3] + expected_result = [] + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + result = forecast._validate_forecast_horizons(coverage_ids, forecast_horizons) + self.assertEqual(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_forecast_horizons") + def test_validate_forecast_horizons_invalid(self, mock_get_forecast_horizons): + mock_get_forecast_horizons.return_value = [[0, 1, 2, 3], [2, 3, 4, 5]] + + coverage_ids = ["id1", "id2"] + forecast_horizons = [1, 2] + expected_result = ["id2"] + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + result = forecast._validate_forecast_horizons(coverage_ids, forecast_horizons) + self.assertEqual(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_coverage_id") + @patch("meteole._arome.AromeForecast.find_common_forecast_horizons") + @patch("meteole._arome.AromeForecast._validate_forecast_horizons") + @patch("meteole._arome.AromeForecast.get_coverage") + def test_get_combined_coverage( + self, + mock_get_coverage, + mock_validate_forecast_horizons, + mock_find_common_forecast_horizons, + mock_get_coverage_id, + ): + mock_get_coverage_id.side_effect = lambda indicator, run, interval: f"{indicator}_{run}_{interval}" + mock_find_common_forecast_horizons.return_value = [0] + mock_validate_forecast_horizons.return_value = [] + mock_get_coverage.side_effect = [ + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + } + ), + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data2": [30, 40], + } + ), + ] + + indicator_names = [ + "GEOMETRIC_HEIGHT__GROUND_OR_WATER_SURFACE", + "BRIGHTNESS_TEMPERATURE__GROUND_OR_WATER_SURFACE", + ] + runs = ["2024-12-13T00.00.00Z"] + heights = [None, 2] + pressures = [None, None] + intervals = ["", "P1D"] + lat = (37.5, 55.4) + long = (-12, 16) + forecast_horizons = [0] + + expected_result = pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + "data2": [30, 40], + } + ) + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + result = forecast.get_combined_coverage( + indicator_names, runs, heights, pressures, intervals, lat, long, forecast_horizons + ) + pd.testing.assert_frame_equal(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_coverage_id") + @patch("meteole._arome.AromeForecast.find_common_forecast_horizons") + @patch("meteole._arome.AromeForecast._validate_forecast_horizons") + @patch("meteole._arome.AromeForecast.get_coverage") + def test_get_combined_coverage_invalid_forecast_horizons( + self, + mock_get_coverage, + mock_validate_forecast_horizons, + mock_find_common_forecast_horizons, + mock_get_coverage_id, + ): + mock_get_coverage_id.side_effect = lambda indicator, run, interval: f"{indicator}_{run}_{interval}" + mock_find_common_forecast_horizons.return_value = [0] + mock_validate_forecast_horizons.return_value = [ + "GEOMETRIC_HEIGHT__GROUND_OR_WATER_SURFACE_2024-12-13T00.00.00Z" + ] + + indicator_names = [ + "GEOMETRIC_HEIGHT__GROUND_OR_WATER_SURFACE", + "BRIGHTNESS_TEMPERATURE__GROUND_OR_WATER_SURFACE", + ] + runs = ["2024-12-13T00.00.00Z"] + heights = [None, 2] + pressures = [None, None] + intervals = ["", "P1D"] + lat = (37.5, 55.4) + long = (-12, 16) + forecast_horizons = [0] + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + with self.assertRaises(ValueError) as context: + forecast.get_combined_coverage( + indicator_names, runs, heights, pressures, intervals, lat, long, forecast_horizons + ) + self.assertIn("are not valid for this coverage_ids", str(context.exception)) + + @patch("meteole._arome.AromeForecast._get_coverage_id") + @patch("meteole._arome.AromeForecast.find_common_forecast_horizons") + @patch("meteole._arome.AromeForecast._validate_forecast_horizons") + @patch("meteole._arome.AromeForecast.get_coverage") + def test_get_combined_coverage_multiple_runs( + self, + mock_get_coverage, + mock_validate_forecast_horizons, + mock_find_common_forecast_horizons, + mock_get_coverage_id, + ): + # Mock return values + mock_get_coverage_id.side_effect = lambda indicator, run, interval: f"{indicator}_{run}_{interval}" + mock_find_common_forecast_horizons.return_value = [0] + mock_validate_forecast_horizons.return_value = [] + mock_get_coverage.side_effect = [ + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + } + ), + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data2": [30, 40], + } + ), + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-14T00.00.00Z", "2024-12-14T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [100, 200], + } + ), + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-14T00.00.00Z", "2024-12-14T00.00.00Z"], + "forecast_horizon": [0, 0], + "data2": [300, 400], + } + ), + ] + + indicator_names = [ + "GEOMETRIC_HEIGHT__GROUND_OR_WATER_SURFACE", + "BRIGHTNESS_TEMPERATURE__GROUND_OR_WATER_SURFACE", + ] + runs = ["2024-12-13T00.00.00Z", "2024-12-14T00.00.00Z"] + heights = [None, 2] + pressures = [None, None] + intervals = ["", "P1D"] + lat = (37.5, 55.4) + long = (-12, 16) + forecast_horizons = [0] + + expected_result = pd.DataFrame( + { + "latitude": [1, 2, 1, 2], + "longitude": [3, 4, 3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z", "2024-12-14T00.00.00Z", "2024-12-14T00.00.00Z"], + "forecast_horizon": [0, 0, 0, 0], + "data1": [10, 20, 100, 200], + "data2": [30, 40, 300, 400], + } + ) + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + result = forecast.get_combined_coverage( + indicator_names, runs, heights, pressures, intervals, lat, long, forecast_horizons + ) + pd.testing.assert_frame_equal(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_coverage_id") + @patch("meteole._arome.AromeForecast.find_common_forecast_horizons") + @patch("meteole._arome.AromeForecast._validate_forecast_horizons") + @patch("meteole._arome.AromeForecast.get_coverage") + def test_get_combined_coverage_no_heights_or_pressures( + self, + mock_get_coverage, + mock_validate_forecast_horizons, + mock_find_common_forecast_horizons, + mock_get_coverage_id, + ): + mock_get_coverage_id.side_effect = lambda indicator, run, interval: f"{indicator}_{run}_{interval}" + mock_find_common_forecast_horizons.return_value = [0] + mock_validate_forecast_horizons.return_value = [] + mock_get_coverage.side_effect = [ + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + } + ), + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data2": [30, 40], + } + ), + ] + + indicator_names = [ + "GEOMETRIC_HEIGHT__GROUND_OR_WATER_SURFACE", + "BRIGHTNESS_TEMPERATURE__GROUND_OR_WATER_SURFACE", + ] + runs = ["2024-12-13T00.00.00Z"] + heights = None + pressures = None + intervals = ["", "P1D"] + lat = (37.5, 55.4) + long = (-12, 16) + forecast_horizons = [0] + + expected_result = pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + "data2": [30, 40], + } + ) + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + result = forecast.get_combined_coverage( + indicator_names, runs, heights, pressures, intervals, lat, long, forecast_horizons + ) + pd.testing.assert_frame_equal(result, expected_result) + + @patch("meteole._arome.AromeForecast._get_coverage_id") + @patch("meteole._arome.AromeForecast.find_common_forecast_horizons") + @patch("meteole._arome.AromeForecast._validate_forecast_horizons") + @patch("meteole._arome.AromeForecast.get_coverage") + def test_get_combined_coverage_no_optional_params( + self, + mock_get_coverage, + mock_validate_forecast_horizons, + mock_find_common_forecast_horizons, + mock_get_coverage_id, + ): + mock_get_coverage_id.side_effect = lambda indicator, run, interval: f"{indicator}_{run}_{interval}" + mock_find_common_forecast_horizons.return_value = [0] + mock_validate_forecast_horizons.return_value = [] + mock_get_coverage.side_effect = [ + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + } + ), + pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data2": [30, 40], + } + ), + ] + + indicator_names = [ + "GEOMETRIC_HEIGHT__GROUND_OR_WATER_SURFACE", + "BRIGHTNESS_TEMPERATURE__GROUND_OR_WATER_SURFACE", + ] + runs = ["2024-12-13T00.00.00Z"] + + expected_result = pd.DataFrame( + { + "latitude": [1, 2], + "longitude": [3, 4], + "run": ["2024-12-13T00.00.00Z", "2024-12-13T00.00.00Z"], + "forecast_horizon": [0, 0], + "data1": [10, 20], + "data2": [30, 40], + } + ) + + forecast = AromeForecast( + self.client, + precision=self.precision, + territory=self.territory, + ) + + result = forecast.get_combined_coverage(indicator_names, runs) + pd.testing.assert_frame_equal(result, expected_result) + class TestArpegeForecast(unittest.TestCase): def setUp(self): From 8f055c5890394a254bb584f065c0ad8e52ef58c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Tue, 7 Jan 2025 14:34:08 +0100 Subject: [PATCH 5/8] Default value for runs=[None] --- src/meteole/forecast.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index b9f501d..ffc86c6 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -546,7 +546,7 @@ def _get_available_feature(grid_axis, feature_name): def get_combined_coverage( self, indicator_names: list[str], - runs: list[str], + runs: list[str | None] | None = None, heights: list[int] | None = None, pressures: list[int] | None = None, intervals: list[str | None] | None = None, @@ -579,6 +579,8 @@ def get_combined_coverage( ValueError: If the length of `heights` does not match the length of `indicator_names`. """ + if not runs: + runs = [None] coverages = [ self._get_combined_coverage_for_single_run( indicator_names=indicator_names, @@ -597,7 +599,7 @@ def get_combined_coverage( def _get_combined_coverage_for_single_run( self, indicator_names: list[str], - run: str, + run: str | None = None, heights: list[int] | None = None, pressures: list[int] | None = None, intervals: list[str | None] | None = None, @@ -663,6 +665,7 @@ def _check_params_length(params: list | None, arg_name: str) -> list: coverages = [ self.get_coverage( coverage_id=coverage_id, + run=run, lat=lat, long=long, heights=[height] if height is not None else [], From b36d5533ee8c42f6d8dcbec7944999ae0fb2918d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Tue, 7 Jan 2025 14:35:36 +0100 Subject: [PATCH 6/8] Pass precommits --- tutorial/Fetch_forecasts.ipynb | 18 +- tutorial/Get_vigilance.ipynb | 418 ++++++++++++++++----------------- 2 files changed, 218 insertions(+), 218 deletions(-) diff --git a/tutorial/Fetch_forecasts.ipynb b/tutorial/Fetch_forecasts.ipynb index e33a144..8d93d3a 100644 --- a/tutorial/Fetch_forecasts.ipynb +++ b/tutorial/Fetch_forecasts.ipynb @@ -23,6 +23,7 @@ "outputs": [], "source": [ "import random\n", + "\n", "from meteole import AromeForecast" ] }, @@ -112,12 +113,12 @@ "metadata": {}, "outputs": [], "source": [ - "#First parameters to create a coverage_id (run and interval)\n", + "# First parameters to create a coverage_id (run and interval)\n", "df_capabilities = arome.get_capabilities()\n", "\n", - "list_run_valid = list(df_capabilities[df_capabilities['indicator']==random_indicator]['run'].unique())\n", - "list_interval_valid = list(df_capabilities[df_capabilities['indicator']==random_indicator]['interval'].unique())\n", - "list_coverage_id_valid = list(df_capabilities[df_capabilities['indicator']==random_indicator]['id'].unique())\n" + "list_run_valid = list(df_capabilities[df_capabilities[\"indicator\"] == random_indicator][\"run\"].unique())\n", + "list_interval_valid = list(df_capabilities[df_capabilities[\"indicator\"] == random_indicator][\"interval\"].unique())\n", + "list_coverage_id_valid = list(df_capabilities[df_capabilities[\"indicator\"] == random_indicator][\"id\"].unique())" ] }, { @@ -126,13 +127,12 @@ "metadata": {}, "outputs": [], "source": [ - "#Then other parameters from a coverage_id\n", + "# Then other parameters from a coverage_id\n", "description = arome.get_coverage_description(list_coverage_id_valid[0])\n", "\n", - "list_forecast_horizons_valid = description.get('forecast_horizons', [])\n", - "list_height_valid = description.get('heights', [])\n", - "list_pressure_id_valid = description.get('pressures', [])\n", - "\n" + "list_forecast_horizons_valid = description.get(\"forecast_horizons\", [])\n", + "list_height_valid = description.get(\"heights\", [])\n", + "list_pressure_id_valid = description.get(\"pressures\", [])" ] }, { diff --git a/tutorial/Get_vigilance.ipynb b/tutorial/Get_vigilance.ipynb index 69f8858..7deefe4 100644 --- a/tutorial/Get_vigilance.ipynb +++ b/tutorial/Get_vigilance.ipynb @@ -1,211 +1,211 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "7fb27b941602401d91542211134fc71a", - "metadata": {}, - "source": [ - "# Vigilance Bulletin\n", - "\n", - "This tutorial will help you access the vigilance bulletin\n", - "\n", - "For more documentation, click [here](https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=305&id_rubrique=50).\n", - "\n", - "Contents:\n", - "\n", - "- Init Vigilance Class\n", - "- Access Data" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "acae54e37e7d407bbb7b55eff062a284", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "from meteole import Vigilance" - ] - }, - { - "cell_type": "markdown", - "id": "9a63283cbaf04dbcab1f6479b197f3a8", - "metadata": {}, - "source": [ - "# Init Vigilance Class" - ] - }, - { - "cell_type": "markdown", - "id": "8dd0d8092fe74a7c96281538738b07e2", - "metadata": {}, - "source": [ - "**Requirements notice** : TODO Link to the documentation to have application_id" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "72eea5119410473aa328ad9291626812", - "metadata": {}, - "outputs": [], - "source": [ - "APP_ID = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "8edb47106e1a46a883d545849b8ab81b", - "metadata": {}, - "outputs": [], - "source": [ - "vigi = Vigilance(application_id=APP_ID)" - ] - }, - { - "cell_type": "markdown", - "id": "10185d26023b46108eb7d9f57d49d2b3", - "metadata": {}, - "source": [ - "# Collect Forecasted phenomenon\n", - "\n", - "Collect vigilance data from M\u00e9t\u00e9o France, including the forecasted phenomenon in df_phenomenon and the maximum intensity for each zone in df_timelaps" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "8763a12b2bbd4a93a75aff182afb95dc", - "metadata": {}, - "outputs": [], - "source": [ - "df_phenomenon, df_timelaps = vigi.get_phenomenon()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7623eae2785240b9bd12b16a66d81610", - "metadata": {}, - "outputs": [], - "source": [ - "df_phenomenon.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7cdc8c89c7104fffa095e18ddfef8986", - "metadata": {}, - "outputs": [], - "source": [ - "df_timelaps.head()" - ] - }, - { - "cell_type": "markdown", - "id": "b118ea5561624da68c537baed56e602f", - "metadata": {}, - "source": [ - "# Collect text of monitoring bulletins\n", - "\n", - "Contains the text of monitoring bulletins, whether national, zonal (in the sense of defense zones) or departmental. It is issued in addition to the Vigilance card, when the meteorological situation so requires (systematically in Vigilance Orange and Red, when necessary in Vigilance Yellow)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "938c804e27f84196a10c8828c723f798", - "metadata": {}, - "outputs": [], - "source": [ - "vigilance_bulletin = vigi.get_bulletin()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "504fb2a444614c0babb325280ed9130a", - "metadata": {}, - "outputs": [], - "source": [ - "vigilance_bulletin_df = pd.json_normalize(vigilance_bulletin)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b43b363d81ae4b689946ece5c682cd59", - "metadata": {}, - "outputs": [], - "source": [ - "text_bloc_items_df = pd.json_normalize(vigilance_bulletin_df[\"product.text_bloc_items\"].explode())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8a65eabff63a45729fe45fb5ade58bdc", - "metadata": {}, - "outputs": [], - "source": [ - "text_bloc_items_df.head(10)" - ] - }, - { - "cell_type": "markdown", - "id": "c3933fab20d04ec698c2621248eb3be0", - "metadata": {}, - "source": [ - "# Map display" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4dd4641cc4064e0191573fe9c69df29b", - "metadata": {}, - "outputs": [], - "source": [ - "vigi.get_vignette()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "meteole_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "undefined.undefined.undefined" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Vigilance Bulletin\n", + "\n", + "This tutorial will help you access the vigilance bulletin\n", + "\n", + "For more documentation, click [here](https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=305&id_rubrique=50).\n", + "\n", + "Contents:\n", + "\n", + "- Init Vigilance Class\n", + "- Access Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from meteole import Vigilance" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "# Init Vigilance Class" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "**Requirements notice** : TODO Link to the documentation to have application_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "APP_ID = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "vigi = Vigilance(application_id=APP_ID)" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "# Collect Forecasted phenomenon\n", + "\n", + "Collect vigilance data from Météo France, including the forecasted phenomenon in df_phenomenon and the maximum intensity for each zone in df_timelaps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "df_phenomenon, df_timelaps = vigi.get_phenomenon()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "df_phenomenon.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "df_timelaps.head()" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "# Collect text of monitoring bulletins\n", + "\n", + "Contains the text of monitoring bulletins, whether national, zonal (in the sense of defense zones) or departmental. It is issued in addition to the Vigilance card, when the meteorological situation so requires (systematically in Vigilance Orange and Red, when necessary in Vigilance Yellow)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "vigilance_bulletin = vigi.get_bulletin()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "vigilance_bulletin_df = pd.json_normalize(vigilance_bulletin)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "text_bloc_items_df = pd.json_normalize(vigilance_bulletin_df[\"product.text_bloc_items\"].explode())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "text_bloc_items_df.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "# Map display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "vigi.get_vignette()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "meteole_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "undefined.undefined.undefined" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 } From b81214c77861fae116d8c0e70e879bbd73483747 Mon Sep 17 00:00:00 2001 From: Thomas BOUCHE Date: Tue, 7 Jan 2025 14:53:34 +0100 Subject: [PATCH 7/8] fix ruff --- src/meteole/forecast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index 1be0dad..6050a08 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from functools import reduce from pathlib import Path -from typing import Any, List +from typing import Any from warnings import warn import pandas as pd @@ -686,7 +686,7 @@ def _check_params_length(params: list | None, arg_name: str) -> list: coverages, ) - def _get_forecast_horizons(self, coverage_ids: List[str]) -> List[List[int]]: + def _get_forecast_horizons(self, coverage_ids: list[str]) -> list[list[int]]: """ Retrieve the times for each coverage_id. Parameters: @@ -724,7 +724,7 @@ def find_common_forecast_horizons( return sorted(common_forecast_horizons) - def _validate_forecast_horizons(self, coverage_ids: List[str], forecast_horizons: List[int]) -> List[str]: + def _validate_forecast_horizons(self, coverage_ids: list[str], forecast_horizons: list[int]) -> list[str]: """ Validate forecast_horizons for a list of coverage IDs. Parameters: From b06c988e0102309c0d2eb779d42eee5dfd760608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Tue, 7 Jan 2025 14:59:20 +0100 Subject: [PATCH 8/8] Run all precommits --- tests/test_forecasts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_forecasts.py b/tests/test_forecasts.py index ce25f73..9233b5a 100644 --- a/tests/test_forecasts.py +++ b/tests/test_forecasts.py @@ -210,7 +210,9 @@ def test_get_data_single_forecast(self, mock_get_coverage_file, mock_transform_g @patch("meteole._arome.AromeForecast.get_capabilities") @patch("meteole._arome.AromeForecast._transform_grib_to_df") @patch("meteole._arome.AromeForecast._get_coverage_file") - def test_get_data_single_forecast_with_height(self, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities): + def test_get_data_single_forecast_with_height( + self, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities + ): mock_transform_grib_to_df.return_value = pd.DataFrame({"data": [1, 2, 3], "heightAboveGround": ["2", "2", "2"]}) forecast = AromeForecast(