From d60260f5aee0c851b43338a983c5789c5dc04641 Mon Sep 17 00:00:00 2001 From: Dan Holdaway <27729500+danholdaway@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:59:48 -0400 Subject: [PATCH] Add Time Series Capability (#187) ## Description Add capability to read and transform on the fly and assemble dataset that has a time series dimension. - Add test for compiuting mean of omb and making time series. - Add test for plotting bias correction coefficients time series. - Add var bc data. - Add reader for JEDI bias and tlapse files. - Removed prints of datasets from read and transform classes and added to driver level. Plots the new tests create shown below. Note that the tests are somewhat contrived since the data is the same at both time steps. We can try to improve on that in the future. ![time_series_omb](https://github.com/JCSDA-internal/eva/assets/27729500/e5d04412-6bde-4f7f-9e4c-da42ca6214d6) ![varbc_time_series](https://github.com/JCSDA-internal/eva/assets/27729500/2ee6db9a-c8ae-476c-b63c-ff9e1782584b) ## Dependencies NONE ## Impact NONE --------- Co-authored-by: danholdaway Co-authored-by: Cory Martin Co-authored-by: Akira Sewnath --- .gitattributes | 2 + src/eva/data/csv_space.py | 3 - src/eva/data/cubed_sphere_restart.py | 4 - src/eva/data/data_collections.py | 49 ++++- src/eva/data/data_driver.py | 47 ++--- src/eva/data/geoval_space.py | 3 - src/eva/data/gsi_obs_space.py | 3 - src/eva/data/ioda_obs_space.py | 3 - src/eva/data/jedi_log.py | 3 - .../data/jedi_variational_bias_correction.py | 156 +++++++++++++++ src/eva/data/lat_lon.py | 3 - src/eva/data/mon_data_space.py | 3 - src/eva/data/soca_restart.py | 4 - src/eva/eva_driver.py | 184 ++++++++++++++++-- src/eva/tests/config/testCsv.yaml | 10 +- .../testIodaObsSpaceAmsuaN19_TimeSeries.yaml | 73 +++++++ ...JediVariationalBiasCorrectionAmsuaN19.yaml | 88 +++++++++ .../config/testMonDataSpaceHirs4Metop-A.yaml | 6 +- src/eva/tests/config/testMonSummary.yaml | 4 +- ....bc.amsua_n19.2021-12-11T15:00:00Z.satbias | 3 + ...2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse | 3 + ....bc.amsua_n19.2021-12-11T21:00:00Z.satbias | 3 + ...2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse | 3 + ...pace.amsua_n19.hofx.2020-12-15T210000Z.nc4 | 3 + src/eva/time_series/__init__.py | 11 ++ src/eva/time_series/time_series.py | 100 ++++++++++ src/eva/transforms/select_time.py | 1 - src/eva/transforms/transform_driver.py | 6 - src/eva/utilities/duration.py | 63 ++++++ src/eva/utilities/logger.py | 32 ++- 30 files changed, 775 insertions(+), 101 deletions(-) create mode 100644 src/eva/data/jedi_variational_bias_correction.py create mode 100644 src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml create mode 100644 src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml create mode 100644 src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias create mode 100644 src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse create mode 100644 src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias create mode 100644 src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse create mode 100644 src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 create mode 100644 src/eva/time_series/__init__.py create mode 100644 src/eva/time_series/time_series.py create mode 100644 src/eva/utilities/duration.py diff --git a/.gitattributes b/.gitattributes index a9d86eae..3cae5ebc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,3 +3,5 @@ *.res filter=lfs diff=lfs merge=lfs -text *.odb filter=lfs diff=lfs merge=lfs -text *.ipynb filter=lfs diff=lfs merge=lfs -text +*.satbias filter=lfs diff=lfs merge=lfs -text +*.tlapse filter=lfs diff=lfs merge=lfs -text diff --git a/src/eva/data/csv_space.py b/src/eva/data/csv_space.py index a853c182..2ed34698 100644 --- a/src/eva/data/csv_space.py +++ b/src/eva/data/csv_space.py @@ -98,9 +98,6 @@ def execute(self, dataset_config, data_collections, timing): data_collections.create_or_add_to_collection(collection_name, ds) ds.close() - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/cubed_sphere_restart.py b/src/eva/data/cubed_sphere_restart.py index d40bff3d..1f6f5d21 100644 --- a/src/eva/data/cubed_sphere_restart.py +++ b/src/eva/data/cubed_sphere_restart.py @@ -142,10 +142,6 @@ def execute(self, dataset_config, data_collections, timing): # ------------------------- data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - # ------------------------- - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/data_collections.py b/src/eva/data/data_collections.py index 20d32109..0b8a711b 100644 --- a/src/eva/data/data_collections.py +++ b/src/eva/data/data_collections.py @@ -32,7 +32,7 @@ class DataCollections: """Manage collections of xarray Datasets with variable manipulations.""" - def __init__(self): + def __init__(self, time_series=False): """Initialize the DataCollections instance.""" @@ -42,6 +42,9 @@ def __init__(self): # Create a logger self.logger = Logger('DataCollections') + # If this is a time series, store it + self.time_series = False if not time_series else True + # ---------------------------------------------------------------------------------------------- def create_or_add_to_collection(self, collection_name, collection, concat_dimension=None): @@ -61,6 +64,11 @@ def create_or_add_to_collection(self, collection_name, collection, concat_dimens ValueError: If concatenation dimension is missing or invalid. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In create_or_add_to_collection: time_series collection must ' + + 'be \'time_series\'') + # Collections should only be xarray datasets if not isinstance(collection, Dataset): self.logger.abort('In add_collection: collection must be an xarray.Dataset') @@ -149,6 +157,11 @@ def add_variable_to_collection(self, collection_name, group_name, variable_name, ValueError: If variable is not an xarray DataArray. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In add_variable_to_collection: time_series collection must ' + + 'be \'time_series\'') + # Assert that new variable is an xarray Dataarray if not isinstance(variable, DataArray): self.logger.abort('In add_variable_to_collection: variable must be xarray.DataArray') @@ -197,6 +210,11 @@ def get_variable_data_array(self, collection_name, group_name, variable_name, is missing. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In get_variable_data_array: time_series collection must ' + + 'be \'time_series\'') + group_variable_name = group_name + '::' + variable_name data_array = self._collections[collection_name][group_variable_name] @@ -274,6 +292,11 @@ def get_variable_data(self, collection_name, group_name, variable_name, ndarray: The selected variable data as a NumPy array. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In get_variable_data: time_series collection must ' + + 'be \'time_series\'') + variable_array = self.get_variable_data_array(collection_name, group_name, variable_name, channels, levels, datatypes) @@ -378,6 +401,7 @@ def display_collections(self): 'float32': '{:+.4e}', 'int64': '{:+11d}', 'int32': '{:+11d}', + 'datetime64[ns]': '{}' } # Display a list of variables that are available in the collection @@ -388,7 +412,7 @@ def display_collections(self): self.logger.info('Collection name: ' + fcol.underline + collection + fcol.end) self.logger.info('\n Dimensions:') for dim in list(self._collections[collection].dims): - dim_value = self._collections[collection].dims[dim] + dim_value = self._collections[collection].sizes[dim] self.logger.info(f' {dim}: {dim_value}') self.logger.info('\n Coordinates:') for coord in list(self._collections[collection].coords): @@ -411,8 +435,25 @@ def display_collections(self): rms = np.sqrt(np.nanmean(data_var_value**2)) rms_string = ', RMS=' + minmaxrms_format.format(rms) minmaxrms_string = ' | ' + min_string + ', ' + max_string + rms_string - self.logger.info(' ' + data_var.ljust(max_name_len) + ' (' + - str(data_var_value.dtype).ljust(7) + ')' + minmaxrms_string) + full_str = ' ' + data_var.ljust(max_name_len) + ' (' + \ + str(data_var_value.dtype)[0:7].ljust(7) + ')' + minmaxrms_string + else: + # No min/max + min_string = '' + max_string = '' + minmaxrms_string = ' | ' + min_string + ', ' + max_string + full_str = ' ' + data_var.ljust(max_name_len) + ' (' + \ + str(data_var_value.dtype)[0:7].ljust(7) + ')' + minmaxrms_string + self.logger.info(full_str) + + # Add the raw xarray display of the collection for more information about coords/dims + self.logger.info(' ') + self.logger.info('/'*80) + self.logger.info(' ') + self.logger.info(f'Raw xarray display of the {fcol.underline + collection + fcol.end} ' + + 'collection:') + self.logger.info(' ') + self.logger.info(str(self._collections[collection])) self.logger.info('-'*80) # ---------------------------------------------------------------------------------------------- diff --git a/src/eva/data/data_driver.py b/src/eva/data/data_driver.py index 70ac3e1b..5e74df0d 100644 --- a/src/eva/data/data_driver.py +++ b/src/eva/data/data_driver.py @@ -11,55 +11,40 @@ # -------------------------------------------------------------------------------------------------- -from eva.utilities.config import get from eva.data.eva_dataset_base import EvaDatasetFactory -import importlib -import os - # -------------------------------------------------------------------------------------------------- -def data_driver(config, data_collections, timing, logger): +def data_driver(dataset_config, data_collections, timing, logger): """ Driver for executing data processing. Args: - config (dict): Configuration settings for data processing. + dataset_config (dict): Configuration settings for data processing. data_collections (DataCollections): Instance of the DataCollections class. timing (Timing): Timing instance for performance measurement. logger (Logger): Logger instance for logging messages. """ - # Get list of dataset dictionaries - datasets = get(config, logger, 'datasets') - - # Loop over datasets - for dataset in datasets: + # Check if the dataset_config contains the 'type' key + logger.assert_abort('type' in dataset_config, 'Each dataset must have a \'type\' key') - # Extract name for this diagnostic data type - try: - eva_data_class_name = dataset['type'] - except Exception as e: - msg = '\'type\' key not found. \'diagnostic_data_config\': ' \ - f'{diagnostic_data_config}, error: {e}' - raise KeyError(msg) + # Extract name for this diagnostic data type + eva_data_class_name = dataset_config['type'] - # Create the data object - creator = EvaDatasetFactory() - timing.start('DataObjectConstructor') - eva_data_object = creator.create_eva_object(eva_data_class_name, - 'data', - logger, - timing) - timing.stop('DataObjectConstructor') + # Create the data object + creator = EvaDatasetFactory() + timing.start('DataObjectConstructor') + eva_data_object = creator.create_eva_object(eva_data_class_name, 'data', logger, timing) + timing.stop('DataObjectConstructor') - # Prepare diagnostic data - logger.info(f'Running execute for {eva_data_object.name}') - timing.start('DataObjectExecute') - eva_data_object.execute(dataset, data_collections, timing) - timing.stop('DataObjectExecute') + # Prepare diagnostic data + logger.info(f'Running execute for {eva_data_object.name}') + timing.start('DataObjectExecute') + eva_data_object.execute(dataset_config, data_collections, timing) + timing.stop('DataObjectExecute') # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/data/geoval_space.py b/src/eva/data/geoval_space.py index 27c6bd28..447d4ec2 100644 --- a/src/eva/data/geoval_space.py +++ b/src/eva/data/geoval_space.py @@ -86,9 +86,6 @@ def execute(self, dataset_config, data_collections, timing): # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - def generate_default_config(self, filenames, collection_name): """ diff --git a/src/eva/data/gsi_obs_space.py b/src/eva/data/gsi_obs_space.py index bebb930a..cad45a5a 100644 --- a/src/eva/data/gsi_obs_space.py +++ b/src/eva/data/gsi_obs_space.py @@ -297,9 +297,6 @@ def execute(self, dataset_config, data_collections, timeing): # Change the channel dimension name data_collections.adjust_channel_dimension_name('nchans') - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/ioda_obs_space.py b/src/eva/data/ioda_obs_space.py index cca423a7..895e08ed 100644 --- a/src/eva/data/ioda_obs_space.py +++ b/src/eva/data/ioda_obs_space.py @@ -277,9 +277,6 @@ def execute(self, dataset_config, data_collections, timing): # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - def generate_default_config(self, filenames, collection_name): """ diff --git a/src/eva/data/jedi_log.py b/src/eva/data/jedi_log.py index 56f5be1c..95030fcc 100644 --- a/src/eva/data/jedi_log.py +++ b/src/eva/data/jedi_log.py @@ -120,9 +120,6 @@ def execute(self, dataset_config, data_collections, timing): # Add to the Eva dataset data_collections.create_or_add_to_collection(collection_name, convergence_ds) - # Write out all the collections - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def get_from_log(self, search_term, separator, position, custom_log=None): diff --git a/src/eva/data/jedi_variational_bias_correction.py b/src/eva/data/jedi_variational_bias_correction.py new file mode 100644 index 00000000..1b377895 --- /dev/null +++ b/src/eva/data/jedi_variational_bias_correction.py @@ -0,0 +1,156 @@ +# (C) Copyright 2024- NOAA/NWS/EMC +# +# (C) Copyright 2024- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +import numpy as np +import xarray as xr + +from eva.data.eva_dataset_base import EvaDatasetBase +from eva.utilities.config import get + + +# -------------------------------------------------------------------------------------------------- + + +class JediVariationalBiasCorrection(EvaDatasetBase): + + """ + A class for executing data read for JEDI variational bias correction data. + + Args: + EvaDatasetBase (class): The base class for dataset processing. + + Attributes: + N/A + + Methods: + execute(dataset_config, data_collections, timing): + Executes data read and transition to data collection for IODA observation space. + + generate_default_config(filenames, collection_name): + Generates a default configuration dictionary for IODA observation space, used for + more easily accessing the class interactively. + + Notes: + - The class inherits from `EvaDatasetBase` and extends its functionality. + """ + + # ---------------------------------------------------------------------------------------------- + + def execute(self, dataset_config, data_collections, timing): + + """ + Executes data read for JEDI variational bias correction data. + + Args: + dataset_config (dict): Configuration settings for the dataset. + data_collections (DataCollection): The data collection to store read data. + timing (Timing): Timing information for profiling. + + Returns: + None + """ + + # Check for required keys in config + required_keys = [ + 'name', + 'bias_file', + 'lapse_file', + ] + for key in required_keys: + self.logger.assert_abort(key in dataset_config, "For JediVariationalBiasCorrection " + + f"the config must contain key: {key}") + + # Parse config + collection_name = dataset_config['name'] + bias_file = get(dataset_config, self.logger, 'bias_file') + lapse_file = get(dataset_config, self.logger, 'lapse_file') + + groups = [ + 'BiasCoefficients', + 'BiasCoefficientErrors', + ] + + # Create a new dataset to store the bias data + bias_dataset = xr.open_dataset(bias_file) + + # Get record dimension size + self.logger.assert_abort(len(bias_dataset['Record']) == 1, 'This code currently only ' + 'supports reading VarBC files where the Record diminsion is 1') + + # Loop over groups, open and store in bias_dataset + for group in groups: + dsg = xr.open_dataset(bias_file, group=group) + + # Rename variables with group + dsg = dsg.rename_vars({var: f'{group}::{var}' for var in dsg.data_vars}) + + # Store the data in the bias_dataset + bias_dataset = xr.merge([bias_dataset, dsg]) + + # Now add coordinate for channels dimension going from 0 to channel + bias_dataset = bias_dataset.assign_coords( + {'Channel': range(len(bias_dataset.Channel))} + ) + + # Squeeze the record dimension and remove record coordinate + bias_dataset = bias_dataset.squeeze('Record') + bias_dataset = bias_dataset.drop_vars('Record') + + # Rename numberObservationsUsed to Bias::numberObservationsUsed + bias_dataset = bias_dataset.rename_vars( + {'numberObservationsUsed': 'Bias::numberObservationsUsed'} + ) + + # Remove attrributes from the dataset + bias_dataset.attrs = {} + + # Read the t-lapse file (text) into a dataset + with open(lapse_file, 'r') as f: + lapse_data = f.readlines() + + # Store the third element of each line as numpy array + lapse_data = np.array([float(line.split()[2]) for line in lapse_data]) + + # Store lapse data in flat_dataset with channel as dimension/coordinate + bias_dataset['Bias::tlapse'] = xr.DataArray(lapse_data, dims=['Channel']) + + # Add bias_dataset to data_collections + data_collections.create_or_add_to_collection(collection_name, bias_dataset) + + # ---------------------------------------------------------------------------------------------- + + def generate_default_config(self, filenames, collection_name): + + """ + Generates a default configuration dictionary for the class. + + Args: + filenames (list length 2): List of bias and lapse filenames for the data collection. + collection_name (str): Name of the data collection. + + Returns: + dict: A dictionary containing default configuration settings. + + Example: + :: + ioda_instance = JediVariationalBiasCorrection() + default_config = ioda_instance.generate_default_config([bias_file, lapse_file], + collection_name) + """ + + return { + 'bias_file': filenames[0], + 'lapse_file': filenames[1], + 'name': collection_name + } + + # ---------------------------------------------------------------------------------------------- diff --git a/src/eva/data/lat_lon.py b/src/eva/data/lat_lon.py index feb295a0..42b61499 100644 --- a/src/eva/data/lat_lon.py +++ b/src/eva/data/lat_lon.py @@ -61,9 +61,6 @@ def execute(self, dataset_config, data_collections, timing): ds.close() - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/mon_data_space.py b/src/eva/data/mon_data_space.py index a49dfff1..e96054f5 100644 --- a/src/eva/data/mon_data_space.py +++ b/src/eva/data/mon_data_space.py @@ -198,9 +198,6 @@ def execute(self, dataset_config, data_collections, timing): # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name, control_file): diff --git a/src/eva/data/soca_restart.py b/src/eva/data/soca_restart.py index 17d73205..818775fb 100644 --- a/src/eva/data/soca_restart.py +++ b/src/eva/data/soca_restart.py @@ -108,10 +108,6 @@ def execute(self, dataset_config, data_collections, timing): # ------------------------- data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - # ------------------------- - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/eva_driver.py b/src/eva/eva_driver.py index c9e79470..7c738a04 100644 --- a/src/eva/eva_driver.py +++ b/src/eva/eva_driver.py @@ -9,16 +9,170 @@ # -------------------------------------------------------------------------------------------------- +from datetime import datetime +import argparse +import os +from eva.utilities.config import get from eva.utilities.logger import Logger from eva.utilities.timing import Timing from eva.data.data_driver import data_driver +from eva.time_series.time_series import collapse_collection_to_time_series from eva.transforms.transform_driver import transform_driver from eva.plotting.batch.base.plot_tools.figure_driver import figure_driver from eva.data.data_collections import DataCollections +from eva.utilities.duration import iso_duration_to_timedelta from eva.utilities.utils import load_yaml_file -import argparse -import os + + +# -------------------------------------------------------------------------------------------------- + + +def read_transform(logger, timing, eva_dict, data_collections): + + """ + Read the data and perform any transforms based on the configuration. + + Parameters: + logger (Logger): An instance of the logger for logging messages. + timing (Timing): An instance of the timing object for timing the process. + eva_dict (dict): The configuration dictionary for the EVA process. + data_collections (DataCollections): An instance of the data collections object. + + Returns: + None + """ + + # Get the datasets configuration + datasets_config = get(eva_dict, logger, 'datasets') + + # Optionally suppress the display of the collection + suppress_collection_display = get(eva_dict, logger, 'suppress_collection_display', False) + + # Loop over datasets reading each one in turn, internally appending the data_collections + for dataset_config in datasets_config: + + # Prepare diagnostic data + logger.info('Running data driver') + timing.start('DataDriverExecute') + data_driver(dataset_config, data_collections, timing, logger) + timing.stop('DataDriverExecute') + + # After reading all datasets display the collection + if not suppress_collection_display: + logger.info('Reading of Eva data complete: status of collections: ') + data_collections.display_collections() + + # Perform any transforms + if 'transforms' in eva_dict: + logger.info(f'Running transform driver') + timing.start('TransformDriverExecute') + transform_driver(eva_dict, data_collections, timing, logger) + timing.stop('TransformDriverExecute') + + # After reading all datasets display the collection + if not suppress_collection_display: + logger.info('Transformations of data complete: status of collections: ') + data_collections.display_collections() + + +# -------------------------------------------------------------------------------------------------- + + +def read_transform_time_series(logger, timing, eva_dict, data_collections): + + """ + Read the data and perform transforms on the fly. Then collapse data into time series + + Parameters: + logger (Logger): An instance of the logger for logging messages. + timing (Timing): An instance of the timing object for timing the process. + eva_dict (dict): The configuration dictionary for the EVA process. + data_collections (DataCollections): An instance of the data collections object. + + Returns: + None + """ + + # Check for required keys + # ----------------------- + required_keys = [ + 'begin_date', + 'final_date', + 'interval', + 'collection', + 'variables', + ] + for key in required_keys: + logger.assert_abort(key in eva_dict['time_series'], 'If running Eva in time series ' + + f'mode the time series config must contain "{key}"') + + # Write message that this is a time series run + logger.info('This instance of Eva is being used to accumulate a time series.') + + # Optionally suppress the display of the collection + suppress_collection_display = get(eva_dict, logger, 'suppress_collection_display', False) + + # Get the datasets configuration + time_series_config = eva_dict['time_series'] + + # Extract the dates of the time series + begin_date = time_series_config['begin_date'] + final_date = time_series_config['final_date'] + interval = time_series_config['interval'] + + # Convert begin and end dates from ISO strings to datetime objects + begin_date = datetime.fromisoformat(begin_date) + final_date = datetime.fromisoformat(final_date) + + # Convert interval ISO string to timedelta object + interval = iso_duration_to_timedelta(logger, interval) + + # Make list of dates from begin to end with interval + dates = [] + date = begin_date + count = 0 + while date <= final_date: + dates.append(date) + date += interval + count += 1 + # Abort if count hits one million + logger.assert_abort(count < 1e6, 'You are planning to read more than one million ' + + 'time steps. This is likely an error. Please check your configuration.') + + # Get the datasets configuration + datasets_config = get(eva_dict, logger, 'datasets') + + # Assert that datasets_config is the same length as dates + logger.assert_abort(len(datasets_config) == len(dates), 'When running in time series mode ' + + 'the number of datasets must be the same as the number of dates.') + + # Loop over datasets reading each one in turn, internally appending the data_collections + for ind, dataset_config in enumerate(datasets_config): + + # Create a temporary collection for this time step + data_collections_tmp = DataCollections() + + # Prepare diagnostic data + logger.info('Running data driver') + timing.start('DataDriverExecute') + data_driver(dataset_config, data_collections_tmp, timing, logger) + timing.stop('DataDriverExecute') + + # Perform any transforms on the fly + if 'transforms' in eva_dict: + logger.info(f'Running transform driver') + timing.start('TransformDriverExecute') + transform_driver(eva_dict, data_collections_tmp, timing, logger) + timing.stop('TransformDriverExecute') + + # Collapse data into time series + collapse_collection_to_time_series(logger, ind, dates, time_series_config, data_collections, + data_collections_tmp) + + if not suppress_collection_display: + logger.info('Computing of Eva time series complete: status of collection:') + data_collections.display_collections() # -------------------------------------------------------------------------------------------------- @@ -49,6 +203,7 @@ def eva(eva_config, eva_logger=None): logger.info('Starting Eva') # Convert incoming config (either dictionary or file) to dictionary + # ----------------------------------------------------------------- timing.start('Generate Dictionary') if isinstance(eva_config, dict): eva_dict = eva_config @@ -58,28 +213,23 @@ def eva(eva_config, eva_logger=None): timing.stop('Generate Dictionary') # Each diagnostic should have two dictionaries: data and graphics + # --------------------------------------------------------------- if not all(sub_config in eva_dict for sub_config in ['datasets', 'graphics']): - msg = "diagnostic config must contain 'datasets' and 'graphics'" - raise KeyError(msg) + logger.abort("The configuration must contain 'datasets' and 'graphics' keys.") # Create the data collections # --------------------------- - data_collections = DataCollections() - - # Prepare diagnostic data - logger.info(f'Running data driver') - timing.start('DataDriverExecute') - data_driver(eva_dict, data_collections, timing, logger) - timing.stop('DataDriverExecute') + data_collections = DataCollections('time_series' in eva_dict) - # Create the transforms - if 'transforms' in eva_dict: - logger.info(f'Running transform driver') - timing.start('TransformDriverExecute') - transform_driver(eva_dict, data_collections, timing, logger) - timing.stop('TransformDriverExecute') + # Check to see if this a time series run of eva and then read and transform + # ------------------------------------------------------------------------- + if 'time_series' in eva_dict: + read_transform_time_series(logger, timing, eva_dict, data_collections) + else: + read_transform(logger, timing, eva_dict, data_collections) # Generate figure(s) + # ------------------ logger.info(f'Running figure driver') timing.start('FigureDriverExecute') figure_driver(eva_dict, data_collections, timing, logger) diff --git a/src/eva/tests/config/testCsv.yaml b/src/eva/tests/config/testCsv.yaml index 5212b9f1..6c7146e3 100644 --- a/src/eva/tests/config/testCsv.yaml +++ b/src/eva/tests/config/testCsv.yaml @@ -37,13 +37,13 @@ graphics: - figure: layout: [3,1] figure size: [20,18] - tight layout: + #tight layout: title: "Valid: 2023062006" output name: line_plots/minimization/gfs_gdas.summary.gnorms.png - plot logo: - which: 'noaa/nws' - loc: 'upper left' - subplot_orientation: 'first' + #plot logo: + # which: 'noaa/nws' + # loc: 'upper left' + # subplot_orientation: 'first' plots: - add_xlabel: 'Cycle Time' diff --git a/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml b/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml new file mode 100644 index 00000000..477ceb27 --- /dev/null +++ b/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml @@ -0,0 +1,73 @@ +suppress_collection_display: False + +datasets: + - name: experiment + type: IodaObsSpace + filenames: + - ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T210000Z.nc4 + channels: &channels 3,8 + groups: + - name: ObsValue + variables: &variables [brightnessTemperature] + - name: hofx + - name: experiment + type: IodaObsSpace + filenames: + - ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T210000Z.nc4 + channels: *channels + groups: + - name: ObsValue + - name: hofx + +transforms: + + # Generate omb for JEDI + - transform: arithmetic + new name: experiment::ObsValueMinusHofx::${variable} + equals: experiment::ObsValue::${variable}-experiment::hofx::${variable} + for: + variable: *variables + +time_series: + + begin_date: '2020-12-14T21:00:00' + final_date: '2020-12-15T03:00:00' + interval: 'PT6H' + + collection: experiment + variables: + - ObsValueMinusHofx::brightnessTemperature + aggregation_methods: + - mean + dimension: Location + +graphics: + + plotting_backend: Emcpy + figure_list: + + # Correlation scatter plots + # ------------------------- + + # JEDI h(x) vs Observations + - figure: + layout: [1,1] + title: 'Mean OmB | AMSU-A NOAA-19 | ObsValueMinusHofx::brightnessTemperature' + output name: time_series/amsua_n19/brightnessTemperature_mean/3/time_series_omb.png + plots: + - add_xlabel: 'Datetime' + add_ylabel: 'JEDI h(x)' + add_grid: + add_legend: + loc: 'upper left' + layers: + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::ObsValueMinusHofx::brightnessTemperature_mean + channel: 3 + markersize: 5 + color: 'black' + label: 'Observation minus h(x)' + diff --git a/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml b/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml new file mode 100644 index 00000000..60cd9bb8 --- /dev/null +++ b/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml @@ -0,0 +1,88 @@ +suppress_collection_display: False + +datasets: + - name: experiment + type: JediVariationalBiasCorrection + bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias + lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse + + - name: experiment + type: JediVariationalBiasCorrection + bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias + lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse + +time_series: + + begin_date: '2020-12-15T00:00:00' + final_date: '2020-12-15T06:00:00' + interval: 'PT6H' + + collection: experiment + variables: + - all + +graphics: + + plotting_backend: Emcpy + figure_list: + + # Correlation scatter plots + # ------------------------- + + # JEDI h(x) vs Observations + - figure: + layout: [1,1] + title: 'AMSUA-N19 Channel 7 Bias Coefficients' + output name: time_series/amsua_n19/varbc/7/varbc_time_series.png + plots: + - add_xlabel: 'Datetime' + add_ylabel: 'Bias Coefficients' + add_grid: + add_legend: + loc: 'upper left' + layers: + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::constant + channel: 7 + markersize: 5 + color: 'blue' + label: 'BiasCoefficients::constant' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle + channel: 7 + markersize: 5 + color: 'orange' + label: 'BiasCoefficients::sensorScanAngle' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle_order_2 + channel: 7 + markersize: 5 + color: 'green' + label: 'BiasCoefficients::sensorScanAngle_order_2' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle_order_3 + channel: 7 + markersize: 5 + color: 'red' + label: 'BiasCoefficients::sensorScanAngle_order_3' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle_order_4 + channel: 7 + markersize: 5 + color: 'purple' + label: 'BiasCoefficients::sensorScanAngle_order_4' diff --git a/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml b/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml index 692e0c2b..61656cff 100644 --- a/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml +++ b/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml @@ -30,9 +30,9 @@ graphics: figure size: [12,10] title: 'hirs4_metop-a | Channel 1 | Obs Count' output name: lineplots/hirs4_metop-a/hirs4_metop-a.0.count.png - plot logo: - which: 'noaa/nws' - loc: 'upper right' + #plot logo: + # which: 'noaa/nws' + # loc: 'upper right' plots: - add_xlabel: 'Cycle Time' add_ylabel: 'Observation Count' diff --git a/src/eva/tests/config/testMonSummary.yaml b/src/eva/tests/config/testMonSummary.yaml index 87d1de5c..99c5b457 100644 --- a/src/eva/tests/config/testMonSummary.yaml +++ b/src/eva/tests/config/testMonSummary.yaml @@ -90,8 +90,8 @@ graphics: - figure: layout: [2,1] figure size: [12,10] - tight layout: - pad: 5 + #tight layout: + # pad: 5 title: 'hirs4_metop-a | Number of Obs Count' output name: lineplots/hirs4_metop-a/summary.hirs4_metop-a.png diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias new file mode 100644 index 00000000..473d0c97 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671c1d6d680cde37fcfa5077d871ce6aa38a3ebce0b23823f10cadc2466163ac +size 26023 diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse new file mode 100644 index 00000000..9619de51 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60abf992a6ba520499c626e494d9fabb1318995454caaf9b6aa28d65aec6e9da +size 387 diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias new file mode 100644 index 00000000..473d0c97 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671c1d6d680cde37fcfa5077d871ce6aa38a3ebce0b23823f10cadc2466163ac +size 26023 diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse new file mode 100644 index 00000000..9619de51 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60abf992a6ba520499c626e494d9fabb1318995454caaf9b6aa28d65aec6e9da +size 387 diff --git a/src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 b/src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 new file mode 100644 index 00000000..93744827 --- /dev/null +++ b/src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e477ae290baf6837445b83de76ecba78ea30a27fbe6e64e37de385df77d68dae +size 191780 diff --git a/src/eva/time_series/__init__.py b/src/eva/time_series/__init__.py new file mode 100644 index 00000000..bf82927f --- /dev/null +++ b/src/eva/time_series/__init__.py @@ -0,0 +1,11 @@ +# (C) Copyright 2024- NOAA/NWS/EMC +# +# (C) Copyright 2024- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + +import os + +repo_directory = os.path.dirname(__file__) diff --git a/src/eva/time_series/time_series.py b/src/eva/time_series/time_series.py new file mode 100644 index 00000000..399356eb --- /dev/null +++ b/src/eva/time_series/time_series.py @@ -0,0 +1,100 @@ +# (C) Copyright 2024- NOAA/NWS/EMC +# +# (C) Copyright 2024- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +import numpy as np +import xarray as xr + + +# -------------------------------------------------------------------------------------------------- + + +xr_aggregation_methods = { + 'mean': lambda ds, dim: ds.mean(dim=dim, skipna=True), + 'sum': lambda ds, dim: ds.sum(dim=dim, skipna=True), +} + + +# -------------------------------------------------------------------------------------------------- + + +def collapse_collection_to_time_series(logger, ind, dates, time_series_config, data_collections, + data_collections_tmp): + + # Parse the configuration + # ----------------------- + + # Collection + collection_to_ts = time_series_config['collection'] + + # Variables + var_to_ts = time_series_config['variables'] + + # Optional: aggregation methods + aggregation_methods = time_series_config.get('aggregation_methods', []) + + # If specifying aggregation methods it must be accompanied by a dimension + if aggregation_methods: + logger.assert_abort('dimension' in time_series_config, 'When specifying aggregation ' + 'methods a dimension must also be specified.') + dimension = time_series_config['dimension'] + + # Get the actual dataset for this step in the time series + # ------------------------------------------------------- + dataset_tmp = data_collections_tmp.get_data_collection(collection_to_ts) + + # Remove any variables that are not to be aggregated + if var_to_ts != ['all']: + variables_to_remove = [var for var in list(dataset_tmp.data_vars) if var not in var_to_ts] + dataset_tmp = dataset_tmp.drop_vars(variables_to_remove) + + # Create an empty dataset to hold the aggregated data + # --------------------------------------------------- + dataset_aggregated = xr.Dataset() + + # If there is no aggregation method specified, just add the dataset to the time series + if not aggregation_methods: + dataset_aggregated = xr.merge([dataset_aggregated, dataset_tmp]) + else: + for aggregation_method in aggregation_methods: + # Assert that aggregation_method is in the aggregation methods + logger.assert_abort(aggregation_method in xr_aggregation_methods, + f'Unknown aggregation method {aggregation_method}') + + # Compute the aggregation_method + dataset_am = xr_aggregation_methods[aggregation_method](dataset_tmp, dim=dimension) + + # Append each variable name in dataset_am with _aggregation_method + rename_dict = {var: f"{var}_{aggregation_method}" for var in dataset_am.data_vars} + dataset_am = dataset_am.rename(rename_dict) + + # Merge all the results into the aggregated dataset + dataset_aggregated = xr.merge([dataset_aggregated, dataset_am]) + + # Get all dims of dataset_aggregated and create empty array with those dims + # ------------------------------------------------------------------------- + dims = {dim: dataset_aggregated.sizes[dim] for dim in dataset_aggregated.dims} + data_array_shape = tuple(dims[dim] for dim in dims) + dataset_aggregated['MetaData::Dates'] = xr.DataArray(np.full(data_array_shape, dates[ind]), + dims=dataset_aggregated.dims) + + # Add the time index to the aggregated dataset + # -------------------------------------------- + dataset_aggregated = dataset_aggregated.expand_dims('TimeIndex') + dataset_aggregated['TimeIndex'] = [0] + + # Append the dataset with the aggregation + concat_dimension = 'TimeIndex' if ind > 0 else None + data_collections.create_or_add_to_collection('time_series', dataset_aggregated, + concat_dimension) + + +# -------------------------------------------------------------------------------------------------- diff --git a/src/eva/transforms/select_time.py b/src/eva/transforms/select_time.py index 15522475..fb4ad52f 100644 --- a/src/eva/transforms/select_time.py +++ b/src/eva/transforms/select_time.py @@ -122,6 +122,5 @@ def select_time(config, data_collections): data_collections.add_variable_to_collection(cgv_new[0], cgv_new[1], cgv_new[2], select_time) - data_collections.display_collections() # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/transforms/transform_driver.py b/src/eva/transforms/transform_driver.py index 85e89db9..a092eb4e 100644 --- a/src/eva/transforms/transform_driver.py +++ b/src/eva/transforms/transform_driver.py @@ -60,10 +60,4 @@ def transform_driver(config, data_collections, timing, logger): transform_method(transform, data_collections) timing.stop(f'Transform: {transform_type}') - # Display the contents of the collections after updating with transforms - if transforms: - logger.info('Tranforms complete. Collections after transforms:') - data_collections.display_collections() - - # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/utilities/duration.py b/src/eva/utilities/duration.py new file mode 100644 index 00000000..2b9b44fc --- /dev/null +++ b/src/eva/utilities/duration.py @@ -0,0 +1,63 @@ +# (C) Copyright 2021-2022 United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + +# -------------------------------------------------------------------------------------------------- + + +from datetime import timedelta +import re + +from eva.utilities.logger import Logger + + +# -------------------------------------------------------------------------------------------------- + + +def iso_duration_to_timedelta(logger: Logger, iso_duration: str) -> timedelta: + """ + Convert an ISO 8601 duration string to a datetime.timedelta object. + + Parameters: + iso_duration (str): The ISO 8601 duration string. + + Returns: + timedelta: The equivalent datetime.timedelta object. + """ + pattern = re.compile( + r'P' # starts with 'P' + r'(?:(?P\d+)Y)?' # years + r'(?:(?P\d+)M)?' # months + r'(?:(?P\d+)D)?' # days + r'(?:T' # starts time part with 'T' + r'(?:(?P\d+)H)?' # hours + r'(?:(?P\d+)M)?' # minutes + r'(?:(?P\d+)S)?' # seconds + r')?' # end time part + ) + + match = pattern.match(iso_duration) + if not match: + raise ValueError(f"Invalid ISO 8601 duration string: {iso_duration}") + + parts = match.groupdict() + years = int(parts['years']) if parts['years'] else 0 + months = int(parts['months']) if parts['months'] else 0 + days = int(parts['days']) if parts['days'] else 0 + hours = int(parts['hours']) if parts['hours'] else 0 + minutes = int(parts['minutes']) if parts['minutes'] else 0 + seconds = int(parts['seconds']) if parts['seconds'] else 0 + + # Assert that years and months are zero + logger.assert_abort(years == 0, "Years are not supported in parsing ISO 8601 durations.") + logger.assert_abort(months == 0, "Months are not supported in parsing ISO 8601 durations.") + + # Compute total seconds + total_seconds = hours * 3600 + minutes * 60 + seconds + + return timedelta(days=days, seconds=total_seconds) + + +# -------------------------------------------------------------------------------------------------- diff --git a/src/eva/utilities/logger.py b/src/eva/utilities/logger.py index 54cbeac3..5288d228 100644 --- a/src/eva/utilities/logger.py +++ b/src/eva/utilities/logger.py @@ -9,6 +9,7 @@ import os import sys +import traceback # -------------------------------------------------------------------------------------------------- @@ -20,12 +21,12 @@ class textcolors: A class that defines color codes for text in the terminal. """ + red = '\033[91m' blue = '\033[94m' cyan = '\033[96m' green = '\033[92m' end = '\033[0m' - # -------------------------------------------------------------------------------------------------- @@ -64,6 +65,9 @@ def __init__(self, task_name): if log_env is not None: self.loggerdict[loglevel] = int(log_env) == 1 + # Text colors + self.tc = textcolors() + # ---------------------------------------------------------------------------------------------- def send_message(self, level, message): @@ -146,7 +150,29 @@ def abort(self, message): None """ - self.send_message("ABORT", message) - sys.exit('ABORT\n') + # Make the text red + message = self.tc.red + message + self.tc.end + + self.send_message('ABORT', message) + + # Get traceback stack (without logger.py lines) + filtered_stack = [line for line in traceback.format_stack() if 'logger.py' not in line] + + # Remove everything after 'logger.assert_abort' in last element of filtered_stack + filtered_stack[-1] = filtered_stack[-1].split('logger.assert_abort')[0] + + traceback_str = '\n'.join(filtered_stack) + + # Exit with traceback + sys.exit('\nHERE IS THE TRACEBACK: \n----------------------\n\n' + traceback_str) + + # ---------------------------------------------------------------------------------------------- + + def assert_abort(self, condition, message): + + if condition: + return + else: + self.abort(message) # --------------------------------------------------------------------------------------------------