diff --git a/.gitattributes b/.gitattributes index a9d86eae..3cae5ebc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,3 +3,5 @@ *.res filter=lfs diff=lfs merge=lfs -text *.odb filter=lfs diff=lfs merge=lfs -text *.ipynb filter=lfs diff=lfs merge=lfs -text +*.satbias filter=lfs diff=lfs merge=lfs -text +*.tlapse filter=lfs diff=lfs merge=lfs -text diff --git a/src/eva/data/csv_space.py b/src/eva/data/csv_space.py index a853c182..2ed34698 100644 --- a/src/eva/data/csv_space.py +++ b/src/eva/data/csv_space.py @@ -98,9 +98,6 @@ def execute(self, dataset_config, data_collections, timing): data_collections.create_or_add_to_collection(collection_name, ds) ds.close() - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/cubed_sphere_restart.py b/src/eva/data/cubed_sphere_restart.py index d40bff3d..1f6f5d21 100644 --- a/src/eva/data/cubed_sphere_restart.py +++ b/src/eva/data/cubed_sphere_restart.py @@ -142,10 +142,6 @@ def execute(self, dataset_config, data_collections, timing): # ------------------------- data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - # ------------------------- - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/data_collections.py b/src/eva/data/data_collections.py index 20d32109..0b8a711b 100644 --- a/src/eva/data/data_collections.py +++ b/src/eva/data/data_collections.py @@ -32,7 +32,7 @@ class DataCollections: """Manage collections of xarray Datasets with variable manipulations.""" - def __init__(self): + def __init__(self, time_series=False): """Initialize the DataCollections instance.""" @@ -42,6 +42,9 @@ def __init__(self): # Create a logger self.logger = Logger('DataCollections') + # If this is a time series, store it + self.time_series = False if not time_series else True + # ---------------------------------------------------------------------------------------------- def create_or_add_to_collection(self, collection_name, collection, concat_dimension=None): @@ -61,6 +64,11 @@ def create_or_add_to_collection(self, collection_name, collection, concat_dimens ValueError: If concatenation dimension is missing or invalid. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In create_or_add_to_collection: time_series collection must ' + + 'be \'time_series\'') + # Collections should only be xarray datasets if not isinstance(collection, Dataset): self.logger.abort('In add_collection: collection must be an xarray.Dataset') @@ -149,6 +157,11 @@ def add_variable_to_collection(self, collection_name, group_name, variable_name, ValueError: If variable is not an xarray DataArray. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In add_variable_to_collection: time_series collection must ' + + 'be \'time_series\'') + # Assert that new variable is an xarray Dataarray if not isinstance(variable, DataArray): self.logger.abort('In add_variable_to_collection: variable must be xarray.DataArray') @@ -197,6 +210,11 @@ def get_variable_data_array(self, collection_name, group_name, variable_name, is missing. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In get_variable_data_array: time_series collection must ' + + 'be \'time_series\'') + group_variable_name = group_name + '::' + variable_name data_array = self._collections[collection_name][group_variable_name] @@ -274,6 +292,11 @@ def get_variable_data(self, collection_name, group_name, variable_name, ndarray: The selected variable data as a NumPy array. """ + # If time_series collection name must also be time_series + if self.time_series and collection_name != 'time_series': + self.logger.abort('In get_variable_data: time_series collection must ' + + 'be \'time_series\'') + variable_array = self.get_variable_data_array(collection_name, group_name, variable_name, channels, levels, datatypes) @@ -378,6 +401,7 @@ def display_collections(self): 'float32': '{:+.4e}', 'int64': '{:+11d}', 'int32': '{:+11d}', + 'datetime64[ns]': '{}' } # Display a list of variables that are available in the collection @@ -388,7 +412,7 @@ def display_collections(self): self.logger.info('Collection name: ' + fcol.underline + collection + fcol.end) self.logger.info('\n Dimensions:') for dim in list(self._collections[collection].dims): - dim_value = self._collections[collection].dims[dim] + dim_value = self._collections[collection].sizes[dim] self.logger.info(f' {dim}: {dim_value}') self.logger.info('\n Coordinates:') for coord in list(self._collections[collection].coords): @@ -411,8 +435,25 @@ def display_collections(self): rms = np.sqrt(np.nanmean(data_var_value**2)) rms_string = ', RMS=' + minmaxrms_format.format(rms) minmaxrms_string = ' | ' + min_string + ', ' + max_string + rms_string - self.logger.info(' ' + data_var.ljust(max_name_len) + ' (' + - str(data_var_value.dtype).ljust(7) + ')' + minmaxrms_string) + full_str = ' ' + data_var.ljust(max_name_len) + ' (' + \ + str(data_var_value.dtype)[0:7].ljust(7) + ')' + minmaxrms_string + else: + # No min/max + min_string = '' + max_string = '' + minmaxrms_string = ' | ' + min_string + ', ' + max_string + full_str = ' ' + data_var.ljust(max_name_len) + ' (' + \ + str(data_var_value.dtype)[0:7].ljust(7) + ')' + minmaxrms_string + self.logger.info(full_str) + + # Add the raw xarray display of the collection for more information about coords/dims + self.logger.info(' ') + self.logger.info('/'*80) + self.logger.info(' ') + self.logger.info(f'Raw xarray display of the {fcol.underline + collection + fcol.end} ' + + 'collection:') + self.logger.info(' ') + self.logger.info(str(self._collections[collection])) self.logger.info('-'*80) # ---------------------------------------------------------------------------------------------- diff --git a/src/eva/data/data_driver.py b/src/eva/data/data_driver.py index 70ac3e1b..5e74df0d 100644 --- a/src/eva/data/data_driver.py +++ b/src/eva/data/data_driver.py @@ -11,55 +11,40 @@ # -------------------------------------------------------------------------------------------------- -from eva.utilities.config import get from eva.data.eva_dataset_base import EvaDatasetFactory -import importlib -import os - # -------------------------------------------------------------------------------------------------- -def data_driver(config, data_collections, timing, logger): +def data_driver(dataset_config, data_collections, timing, logger): """ Driver for executing data processing. Args: - config (dict): Configuration settings for data processing. + dataset_config (dict): Configuration settings for data processing. data_collections (DataCollections): Instance of the DataCollections class. timing (Timing): Timing instance for performance measurement. logger (Logger): Logger instance for logging messages. """ - # Get list of dataset dictionaries - datasets = get(config, logger, 'datasets') - - # Loop over datasets - for dataset in datasets: + # Check if the dataset_config contains the 'type' key + logger.assert_abort('type' in dataset_config, 'Each dataset must have a \'type\' key') - # Extract name for this diagnostic data type - try: - eva_data_class_name = dataset['type'] - except Exception as e: - msg = '\'type\' key not found. \'diagnostic_data_config\': ' \ - f'{diagnostic_data_config}, error: {e}' - raise KeyError(msg) + # Extract name for this diagnostic data type + eva_data_class_name = dataset_config['type'] - # Create the data object - creator = EvaDatasetFactory() - timing.start('DataObjectConstructor') - eva_data_object = creator.create_eva_object(eva_data_class_name, - 'data', - logger, - timing) - timing.stop('DataObjectConstructor') + # Create the data object + creator = EvaDatasetFactory() + timing.start('DataObjectConstructor') + eva_data_object = creator.create_eva_object(eva_data_class_name, 'data', logger, timing) + timing.stop('DataObjectConstructor') - # Prepare diagnostic data - logger.info(f'Running execute for {eva_data_object.name}') - timing.start('DataObjectExecute') - eva_data_object.execute(dataset, data_collections, timing) - timing.stop('DataObjectExecute') + # Prepare diagnostic data + logger.info(f'Running execute for {eva_data_object.name}') + timing.start('DataObjectExecute') + eva_data_object.execute(dataset_config, data_collections, timing) + timing.stop('DataObjectExecute') # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/data/geoval_space.py b/src/eva/data/geoval_space.py index 27c6bd28..447d4ec2 100644 --- a/src/eva/data/geoval_space.py +++ b/src/eva/data/geoval_space.py @@ -86,9 +86,6 @@ def execute(self, dataset_config, data_collections, timing): # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - def generate_default_config(self, filenames, collection_name): """ diff --git a/src/eva/data/gsi_obs_space.py b/src/eva/data/gsi_obs_space.py index bebb930a..cad45a5a 100644 --- a/src/eva/data/gsi_obs_space.py +++ b/src/eva/data/gsi_obs_space.py @@ -297,9 +297,6 @@ def execute(self, dataset_config, data_collections, timeing): # Change the channel dimension name data_collections.adjust_channel_dimension_name('nchans') - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/ioda_obs_space.py b/src/eva/data/ioda_obs_space.py index cca423a7..895e08ed 100644 --- a/src/eva/data/ioda_obs_space.py +++ b/src/eva/data/ioda_obs_space.py @@ -277,9 +277,6 @@ def execute(self, dataset_config, data_collections, timing): # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - def generate_default_config(self, filenames, collection_name): """ diff --git a/src/eva/data/jedi_log.py b/src/eva/data/jedi_log.py index 56f5be1c..95030fcc 100644 --- a/src/eva/data/jedi_log.py +++ b/src/eva/data/jedi_log.py @@ -120,9 +120,6 @@ def execute(self, dataset_config, data_collections, timing): # Add to the Eva dataset data_collections.create_or_add_to_collection(collection_name, convergence_ds) - # Write out all the collections - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def get_from_log(self, search_term, separator, position, custom_log=None): diff --git a/src/eva/data/jedi_variational_bias_correction.py b/src/eva/data/jedi_variational_bias_correction.py new file mode 100644 index 00000000..1b377895 --- /dev/null +++ b/src/eva/data/jedi_variational_bias_correction.py @@ -0,0 +1,156 @@ +# (C) Copyright 2024- NOAA/NWS/EMC +# +# (C) Copyright 2024- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +import numpy as np +import xarray as xr + +from eva.data.eva_dataset_base import EvaDatasetBase +from eva.utilities.config import get + + +# -------------------------------------------------------------------------------------------------- + + +class JediVariationalBiasCorrection(EvaDatasetBase): + + """ + A class for executing data read for JEDI variational bias correction data. + + Args: + EvaDatasetBase (class): The base class for dataset processing. + + Attributes: + N/A + + Methods: + execute(dataset_config, data_collections, timing): + Executes data read and transition to data collection for IODA observation space. + + generate_default_config(filenames, collection_name): + Generates a default configuration dictionary for IODA observation space, used for + more easily accessing the class interactively. + + Notes: + - The class inherits from `EvaDatasetBase` and extends its functionality. + """ + + # ---------------------------------------------------------------------------------------------- + + def execute(self, dataset_config, data_collections, timing): + + """ + Executes data read for JEDI variational bias correction data. + + Args: + dataset_config (dict): Configuration settings for the dataset. + data_collections (DataCollection): The data collection to store read data. + timing (Timing): Timing information for profiling. + + Returns: + None + """ + + # Check for required keys in config + required_keys = [ + 'name', + 'bias_file', + 'lapse_file', + ] + for key in required_keys: + self.logger.assert_abort(key in dataset_config, "For JediVariationalBiasCorrection " + + f"the config must contain key: {key}") + + # Parse config + collection_name = dataset_config['name'] + bias_file = get(dataset_config, self.logger, 'bias_file') + lapse_file = get(dataset_config, self.logger, 'lapse_file') + + groups = [ + 'BiasCoefficients', + 'BiasCoefficientErrors', + ] + + # Create a new dataset to store the bias data + bias_dataset = xr.open_dataset(bias_file) + + # Get record dimension size + self.logger.assert_abort(len(bias_dataset['Record']) == 1, 'This code currently only ' + 'supports reading VarBC files where the Record diminsion is 1') + + # Loop over groups, open and store in bias_dataset + for group in groups: + dsg = xr.open_dataset(bias_file, group=group) + + # Rename variables with group + dsg = dsg.rename_vars({var: f'{group}::{var}' for var in dsg.data_vars}) + + # Store the data in the bias_dataset + bias_dataset = xr.merge([bias_dataset, dsg]) + + # Now add coordinate for channels dimension going from 0 to channel + bias_dataset = bias_dataset.assign_coords( + {'Channel': range(len(bias_dataset.Channel))} + ) + + # Squeeze the record dimension and remove record coordinate + bias_dataset = bias_dataset.squeeze('Record') + bias_dataset = bias_dataset.drop_vars('Record') + + # Rename numberObservationsUsed to Bias::numberObservationsUsed + bias_dataset = bias_dataset.rename_vars( + {'numberObservationsUsed': 'Bias::numberObservationsUsed'} + ) + + # Remove attrributes from the dataset + bias_dataset.attrs = {} + + # Read the t-lapse file (text) into a dataset + with open(lapse_file, 'r') as f: + lapse_data = f.readlines() + + # Store the third element of each line as numpy array + lapse_data = np.array([float(line.split()[2]) for line in lapse_data]) + + # Store lapse data in flat_dataset with channel as dimension/coordinate + bias_dataset['Bias::tlapse'] = xr.DataArray(lapse_data, dims=['Channel']) + + # Add bias_dataset to data_collections + data_collections.create_or_add_to_collection(collection_name, bias_dataset) + + # ---------------------------------------------------------------------------------------------- + + def generate_default_config(self, filenames, collection_name): + + """ + Generates a default configuration dictionary for the class. + + Args: + filenames (list length 2): List of bias and lapse filenames for the data collection. + collection_name (str): Name of the data collection. + + Returns: + dict: A dictionary containing default configuration settings. + + Example: + :: + ioda_instance = JediVariationalBiasCorrection() + default_config = ioda_instance.generate_default_config([bias_file, lapse_file], + collection_name) + """ + + return { + 'bias_file': filenames[0], + 'lapse_file': filenames[1], + 'name': collection_name + } + + # ---------------------------------------------------------------------------------------------- diff --git a/src/eva/data/lat_lon.py b/src/eva/data/lat_lon.py index feb295a0..42b61499 100644 --- a/src/eva/data/lat_lon.py +++ b/src/eva/data/lat_lon.py @@ -61,9 +61,6 @@ def execute(self, dataset_config, data_collections, timing): ds.close() - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/data/mon_data_space.py b/src/eva/data/mon_data_space.py index a49dfff1..e96054f5 100644 --- a/src/eva/data/mon_data_space.py +++ b/src/eva/data/mon_data_space.py @@ -198,9 +198,6 @@ def execute(self, dataset_config, data_collections, timing): # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name, control_file): diff --git a/src/eva/data/soca_restart.py b/src/eva/data/soca_restart.py index 17d73205..818775fb 100644 --- a/src/eva/data/soca_restart.py +++ b/src/eva/data/soca_restart.py @@ -108,10 +108,6 @@ def execute(self, dataset_config, data_collections, timing): # ------------------------- data_collections.nan_float_values_outside_threshold(threshold) - # Display the contents of the collections for helping the user with making plots - # ------------------------- - data_collections.display_collections() - # ---------------------------------------------------------------------------------------------- def generate_default_config(self, filenames, collection_name): diff --git a/src/eva/eva_driver.py b/src/eva/eva_driver.py index c9e79470..7c738a04 100644 --- a/src/eva/eva_driver.py +++ b/src/eva/eva_driver.py @@ -9,16 +9,170 @@ # -------------------------------------------------------------------------------------------------- +from datetime import datetime +import argparse +import os +from eva.utilities.config import get from eva.utilities.logger import Logger from eva.utilities.timing import Timing from eva.data.data_driver import data_driver +from eva.time_series.time_series import collapse_collection_to_time_series from eva.transforms.transform_driver import transform_driver from eva.plotting.batch.base.plot_tools.figure_driver import figure_driver from eva.data.data_collections import DataCollections +from eva.utilities.duration import iso_duration_to_timedelta from eva.utilities.utils import load_yaml_file -import argparse -import os + + +# -------------------------------------------------------------------------------------------------- + + +def read_transform(logger, timing, eva_dict, data_collections): + + """ + Read the data and perform any transforms based on the configuration. + + Parameters: + logger (Logger): An instance of the logger for logging messages. + timing (Timing): An instance of the timing object for timing the process. + eva_dict (dict): The configuration dictionary for the EVA process. + data_collections (DataCollections): An instance of the data collections object. + + Returns: + None + """ + + # Get the datasets configuration + datasets_config = get(eva_dict, logger, 'datasets') + + # Optionally suppress the display of the collection + suppress_collection_display = get(eva_dict, logger, 'suppress_collection_display', False) + + # Loop over datasets reading each one in turn, internally appending the data_collections + for dataset_config in datasets_config: + + # Prepare diagnostic data + logger.info('Running data driver') + timing.start('DataDriverExecute') + data_driver(dataset_config, data_collections, timing, logger) + timing.stop('DataDriverExecute') + + # After reading all datasets display the collection + if not suppress_collection_display: + logger.info('Reading of Eva data complete: status of collections: ') + data_collections.display_collections() + + # Perform any transforms + if 'transforms' in eva_dict: + logger.info(f'Running transform driver') + timing.start('TransformDriverExecute') + transform_driver(eva_dict, data_collections, timing, logger) + timing.stop('TransformDriverExecute') + + # After reading all datasets display the collection + if not suppress_collection_display: + logger.info('Transformations of data complete: status of collections: ') + data_collections.display_collections() + + +# -------------------------------------------------------------------------------------------------- + + +def read_transform_time_series(logger, timing, eva_dict, data_collections): + + """ + Read the data and perform transforms on the fly. Then collapse data into time series + + Parameters: + logger (Logger): An instance of the logger for logging messages. + timing (Timing): An instance of the timing object for timing the process. + eva_dict (dict): The configuration dictionary for the EVA process. + data_collections (DataCollections): An instance of the data collections object. + + Returns: + None + """ + + # Check for required keys + # ----------------------- + required_keys = [ + 'begin_date', + 'final_date', + 'interval', + 'collection', + 'variables', + ] + for key in required_keys: + logger.assert_abort(key in eva_dict['time_series'], 'If running Eva in time series ' + + f'mode the time series config must contain "{key}"') + + # Write message that this is a time series run + logger.info('This instance of Eva is being used to accumulate a time series.') + + # Optionally suppress the display of the collection + suppress_collection_display = get(eva_dict, logger, 'suppress_collection_display', False) + + # Get the datasets configuration + time_series_config = eva_dict['time_series'] + + # Extract the dates of the time series + begin_date = time_series_config['begin_date'] + final_date = time_series_config['final_date'] + interval = time_series_config['interval'] + + # Convert begin and end dates from ISO strings to datetime objects + begin_date = datetime.fromisoformat(begin_date) + final_date = datetime.fromisoformat(final_date) + + # Convert interval ISO string to timedelta object + interval = iso_duration_to_timedelta(logger, interval) + + # Make list of dates from begin to end with interval + dates = [] + date = begin_date + count = 0 + while date <= final_date: + dates.append(date) + date += interval + count += 1 + # Abort if count hits one million + logger.assert_abort(count < 1e6, 'You are planning to read more than one million ' + + 'time steps. This is likely an error. Please check your configuration.') + + # Get the datasets configuration + datasets_config = get(eva_dict, logger, 'datasets') + + # Assert that datasets_config is the same length as dates + logger.assert_abort(len(datasets_config) == len(dates), 'When running in time series mode ' + + 'the number of datasets must be the same as the number of dates.') + + # Loop over datasets reading each one in turn, internally appending the data_collections + for ind, dataset_config in enumerate(datasets_config): + + # Create a temporary collection for this time step + data_collections_tmp = DataCollections() + + # Prepare diagnostic data + logger.info('Running data driver') + timing.start('DataDriverExecute') + data_driver(dataset_config, data_collections_tmp, timing, logger) + timing.stop('DataDriverExecute') + + # Perform any transforms on the fly + if 'transforms' in eva_dict: + logger.info(f'Running transform driver') + timing.start('TransformDriverExecute') + transform_driver(eva_dict, data_collections_tmp, timing, logger) + timing.stop('TransformDriverExecute') + + # Collapse data into time series + collapse_collection_to_time_series(logger, ind, dates, time_series_config, data_collections, + data_collections_tmp) + + if not suppress_collection_display: + logger.info('Computing of Eva time series complete: status of collection:') + data_collections.display_collections() # -------------------------------------------------------------------------------------------------- @@ -49,6 +203,7 @@ def eva(eva_config, eva_logger=None): logger.info('Starting Eva') # Convert incoming config (either dictionary or file) to dictionary + # ----------------------------------------------------------------- timing.start('Generate Dictionary') if isinstance(eva_config, dict): eva_dict = eva_config @@ -58,28 +213,23 @@ def eva(eva_config, eva_logger=None): timing.stop('Generate Dictionary') # Each diagnostic should have two dictionaries: data and graphics + # --------------------------------------------------------------- if not all(sub_config in eva_dict for sub_config in ['datasets', 'graphics']): - msg = "diagnostic config must contain 'datasets' and 'graphics'" - raise KeyError(msg) + logger.abort("The configuration must contain 'datasets' and 'graphics' keys.") # Create the data collections # --------------------------- - data_collections = DataCollections() - - # Prepare diagnostic data - logger.info(f'Running data driver') - timing.start('DataDriverExecute') - data_driver(eva_dict, data_collections, timing, logger) - timing.stop('DataDriverExecute') + data_collections = DataCollections('time_series' in eva_dict) - # Create the transforms - if 'transforms' in eva_dict: - logger.info(f'Running transform driver') - timing.start('TransformDriverExecute') - transform_driver(eva_dict, data_collections, timing, logger) - timing.stop('TransformDriverExecute') + # Check to see if this a time series run of eva and then read and transform + # ------------------------------------------------------------------------- + if 'time_series' in eva_dict: + read_transform_time_series(logger, timing, eva_dict, data_collections) + else: + read_transform(logger, timing, eva_dict, data_collections) # Generate figure(s) + # ------------------ logger.info(f'Running figure driver') timing.start('FigureDriverExecute') figure_driver(eva_dict, data_collections, timing, logger) diff --git a/src/eva/tests/config/testCsv.yaml b/src/eva/tests/config/testCsv.yaml index 5212b9f1..6c7146e3 100644 --- a/src/eva/tests/config/testCsv.yaml +++ b/src/eva/tests/config/testCsv.yaml @@ -37,13 +37,13 @@ graphics: - figure: layout: [3,1] figure size: [20,18] - tight layout: + #tight layout: title: "Valid: 2023062006" output name: line_plots/minimization/gfs_gdas.summary.gnorms.png - plot logo: - which: 'noaa/nws' - loc: 'upper left' - subplot_orientation: 'first' + #plot logo: + # which: 'noaa/nws' + # loc: 'upper left' + # subplot_orientation: 'first' plots: - add_xlabel: 'Cycle Time' diff --git a/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml b/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml new file mode 100644 index 00000000..477ceb27 --- /dev/null +++ b/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml @@ -0,0 +1,73 @@ +suppress_collection_display: False + +datasets: + - name: experiment + type: IodaObsSpace + filenames: + - ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T210000Z.nc4 + channels: &channels 3,8 + groups: + - name: ObsValue + variables: &variables [brightnessTemperature] + - name: hofx + - name: experiment + type: IodaObsSpace + filenames: + - ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T210000Z.nc4 + channels: *channels + groups: + - name: ObsValue + - name: hofx + +transforms: + + # Generate omb for JEDI + - transform: arithmetic + new name: experiment::ObsValueMinusHofx::${variable} + equals: experiment::ObsValue::${variable}-experiment::hofx::${variable} + for: + variable: *variables + +time_series: + + begin_date: '2020-12-14T21:00:00' + final_date: '2020-12-15T03:00:00' + interval: 'PT6H' + + collection: experiment + variables: + - ObsValueMinusHofx::brightnessTemperature + aggregation_methods: + - mean + dimension: Location + +graphics: + + plotting_backend: Emcpy + figure_list: + + # Correlation scatter plots + # ------------------------- + + # JEDI h(x) vs Observations + - figure: + layout: [1,1] + title: 'Mean OmB | AMSU-A NOAA-19 | ObsValueMinusHofx::brightnessTemperature' + output name: time_series/amsua_n19/brightnessTemperature_mean/3/time_series_omb.png + plots: + - add_xlabel: 'Datetime' + add_ylabel: 'JEDI h(x)' + add_grid: + add_legend: + loc: 'upper left' + layers: + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::ObsValueMinusHofx::brightnessTemperature_mean + channel: 3 + markersize: 5 + color: 'black' + label: 'Observation minus h(x)' + diff --git a/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml b/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml new file mode 100644 index 00000000..60cd9bb8 --- /dev/null +++ b/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml @@ -0,0 +1,88 @@ +suppress_collection_display: False + +datasets: + - name: experiment + type: JediVariationalBiasCorrection + bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias + lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse + + - name: experiment + type: JediVariationalBiasCorrection + bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias + lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse + +time_series: + + begin_date: '2020-12-15T00:00:00' + final_date: '2020-12-15T06:00:00' + interval: 'PT6H' + + collection: experiment + variables: + - all + +graphics: + + plotting_backend: Emcpy + figure_list: + + # Correlation scatter plots + # ------------------------- + + # JEDI h(x) vs Observations + - figure: + layout: [1,1] + title: 'AMSUA-N19 Channel 7 Bias Coefficients' + output name: time_series/amsua_n19/varbc/7/varbc_time_series.png + plots: + - add_xlabel: 'Datetime' + add_ylabel: 'Bias Coefficients' + add_grid: + add_legend: + loc: 'upper left' + layers: + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::constant + channel: 7 + markersize: 5 + color: 'blue' + label: 'BiasCoefficients::constant' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle + channel: 7 + markersize: 5 + color: 'orange' + label: 'BiasCoefficients::sensorScanAngle' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle_order_2 + channel: 7 + markersize: 5 + color: 'green' + label: 'BiasCoefficients::sensorScanAngle_order_2' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle_order_3 + channel: 7 + markersize: 5 + color: 'red' + label: 'BiasCoefficients::sensorScanAngle_order_3' + - type: LinePlot + x: + variable: time_series::MetaData::Dates + y: + variable: time_series::BiasCoefficients::sensorScanAngle_order_4 + channel: 7 + markersize: 5 + color: 'purple' + label: 'BiasCoefficients::sensorScanAngle_order_4' diff --git a/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml b/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml index 692e0c2b..61656cff 100644 --- a/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml +++ b/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml @@ -30,9 +30,9 @@ graphics: figure size: [12,10] title: 'hirs4_metop-a | Channel 1 | Obs Count' output name: lineplots/hirs4_metop-a/hirs4_metop-a.0.count.png - plot logo: - which: 'noaa/nws' - loc: 'upper right' + #plot logo: + # which: 'noaa/nws' + # loc: 'upper right' plots: - add_xlabel: 'Cycle Time' add_ylabel: 'Observation Count' diff --git a/src/eva/tests/config/testMonSummary.yaml b/src/eva/tests/config/testMonSummary.yaml index 87d1de5c..99c5b457 100644 --- a/src/eva/tests/config/testMonSummary.yaml +++ b/src/eva/tests/config/testMonSummary.yaml @@ -90,8 +90,8 @@ graphics: - figure: layout: [2,1] figure size: [12,10] - tight layout: - pad: 5 + #tight layout: + # pad: 5 title: 'hirs4_metop-a | Number of Obs Count' output name: lineplots/hirs4_metop-a/summary.hirs4_metop-a.png diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias new file mode 100644 index 00000000..473d0c97 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.satbias @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671c1d6d680cde37fcfa5077d871ce6aa38a3ebce0b23823f10cadc2466163ac +size 26023 diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse new file mode 100644 index 00000000..9619de51 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T15:00:00Z.tlapse @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60abf992a6ba520499c626e494d9fabb1318995454caaf9b6aa28d65aec6e9da +size 387 diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias new file mode 100644 index 00000000..473d0c97 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671c1d6d680cde37fcfa5077d871ce6aa38a3ebce0b23823f10cadc2466163ac +size 26023 diff --git a/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse new file mode 100644 index 00000000..9619de51 --- /dev/null +++ b/src/eva/tests/data/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60abf992a6ba520499c626e494d9fabb1318995454caaf9b6aa28d65aec6e9da +size 387 diff --git a/src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 b/src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 new file mode 100644 index 00000000..93744827 --- /dev/null +++ b/src/eva/tests/data/ioda_obs_space.amsua_n19.hofx.2020-12-15T210000Z.nc4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e477ae290baf6837445b83de76ecba78ea30a27fbe6e64e37de385df77d68dae +size 191780 diff --git a/src/eva/time_series/__init__.py b/src/eva/time_series/__init__.py new file mode 100644 index 00000000..bf82927f --- /dev/null +++ b/src/eva/time_series/__init__.py @@ -0,0 +1,11 @@ +# (C) Copyright 2024- NOAA/NWS/EMC +# +# (C) Copyright 2024- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + +import os + +repo_directory = os.path.dirname(__file__) diff --git a/src/eva/time_series/time_series.py b/src/eva/time_series/time_series.py new file mode 100644 index 00000000..399356eb --- /dev/null +++ b/src/eva/time_series/time_series.py @@ -0,0 +1,100 @@ +# (C) Copyright 2024- NOAA/NWS/EMC +# +# (C) Copyright 2024- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +import numpy as np +import xarray as xr + + +# -------------------------------------------------------------------------------------------------- + + +xr_aggregation_methods = { + 'mean': lambda ds, dim: ds.mean(dim=dim, skipna=True), + 'sum': lambda ds, dim: ds.sum(dim=dim, skipna=True), +} + + +# -------------------------------------------------------------------------------------------------- + + +def collapse_collection_to_time_series(logger, ind, dates, time_series_config, data_collections, + data_collections_tmp): + + # Parse the configuration + # ----------------------- + + # Collection + collection_to_ts = time_series_config['collection'] + + # Variables + var_to_ts = time_series_config['variables'] + + # Optional: aggregation methods + aggregation_methods = time_series_config.get('aggregation_methods', []) + + # If specifying aggregation methods it must be accompanied by a dimension + if aggregation_methods: + logger.assert_abort('dimension' in time_series_config, 'When specifying aggregation ' + 'methods a dimension must also be specified.') + dimension = time_series_config['dimension'] + + # Get the actual dataset for this step in the time series + # ------------------------------------------------------- + dataset_tmp = data_collections_tmp.get_data_collection(collection_to_ts) + + # Remove any variables that are not to be aggregated + if var_to_ts != ['all']: + variables_to_remove = [var for var in list(dataset_tmp.data_vars) if var not in var_to_ts] + dataset_tmp = dataset_tmp.drop_vars(variables_to_remove) + + # Create an empty dataset to hold the aggregated data + # --------------------------------------------------- + dataset_aggregated = xr.Dataset() + + # If there is no aggregation method specified, just add the dataset to the time series + if not aggregation_methods: + dataset_aggregated = xr.merge([dataset_aggregated, dataset_tmp]) + else: + for aggregation_method in aggregation_methods: + # Assert that aggregation_method is in the aggregation methods + logger.assert_abort(aggregation_method in xr_aggregation_methods, + f'Unknown aggregation method {aggregation_method}') + + # Compute the aggregation_method + dataset_am = xr_aggregation_methods[aggregation_method](dataset_tmp, dim=dimension) + + # Append each variable name in dataset_am with _aggregation_method + rename_dict = {var: f"{var}_{aggregation_method}" for var in dataset_am.data_vars} + dataset_am = dataset_am.rename(rename_dict) + + # Merge all the results into the aggregated dataset + dataset_aggregated = xr.merge([dataset_aggregated, dataset_am]) + + # Get all dims of dataset_aggregated and create empty array with those dims + # ------------------------------------------------------------------------- + dims = {dim: dataset_aggregated.sizes[dim] for dim in dataset_aggregated.dims} + data_array_shape = tuple(dims[dim] for dim in dims) + dataset_aggregated['MetaData::Dates'] = xr.DataArray(np.full(data_array_shape, dates[ind]), + dims=dataset_aggregated.dims) + + # Add the time index to the aggregated dataset + # -------------------------------------------- + dataset_aggregated = dataset_aggregated.expand_dims('TimeIndex') + dataset_aggregated['TimeIndex'] = [0] + + # Append the dataset with the aggregation + concat_dimension = 'TimeIndex' if ind > 0 else None + data_collections.create_or_add_to_collection('time_series', dataset_aggregated, + concat_dimension) + + +# -------------------------------------------------------------------------------------------------- diff --git a/src/eva/transforms/select_time.py b/src/eva/transforms/select_time.py index 15522475..fb4ad52f 100644 --- a/src/eva/transforms/select_time.py +++ b/src/eva/transforms/select_time.py @@ -122,6 +122,5 @@ def select_time(config, data_collections): data_collections.add_variable_to_collection(cgv_new[0], cgv_new[1], cgv_new[2], select_time) - data_collections.display_collections() # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/transforms/transform_driver.py b/src/eva/transforms/transform_driver.py index 85e89db9..a092eb4e 100644 --- a/src/eva/transforms/transform_driver.py +++ b/src/eva/transforms/transform_driver.py @@ -60,10 +60,4 @@ def transform_driver(config, data_collections, timing, logger): transform_method(transform, data_collections) timing.stop(f'Transform: {transform_type}') - # Display the contents of the collections after updating with transforms - if transforms: - logger.info('Tranforms complete. Collections after transforms:') - data_collections.display_collections() - - # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/utilities/duration.py b/src/eva/utilities/duration.py new file mode 100644 index 00000000..2b9b44fc --- /dev/null +++ b/src/eva/utilities/duration.py @@ -0,0 +1,63 @@ +# (C) Copyright 2021-2022 United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + +# -------------------------------------------------------------------------------------------------- + + +from datetime import timedelta +import re + +from eva.utilities.logger import Logger + + +# -------------------------------------------------------------------------------------------------- + + +def iso_duration_to_timedelta(logger: Logger, iso_duration: str) -> timedelta: + """ + Convert an ISO 8601 duration string to a datetime.timedelta object. + + Parameters: + iso_duration (str): The ISO 8601 duration string. + + Returns: + timedelta: The equivalent datetime.timedelta object. + """ + pattern = re.compile( + r'P' # starts with 'P' + r'(?:(?P\d+)Y)?' # years + r'(?:(?P\d+)M)?' # months + r'(?:(?P\d+)D)?' # days + r'(?:T' # starts time part with 'T' + r'(?:(?P\d+)H)?' # hours + r'(?:(?P\d+)M)?' # minutes + r'(?:(?P\d+)S)?' # seconds + r')?' # end time part + ) + + match = pattern.match(iso_duration) + if not match: + raise ValueError(f"Invalid ISO 8601 duration string: {iso_duration}") + + parts = match.groupdict() + years = int(parts['years']) if parts['years'] else 0 + months = int(parts['months']) if parts['months'] else 0 + days = int(parts['days']) if parts['days'] else 0 + hours = int(parts['hours']) if parts['hours'] else 0 + minutes = int(parts['minutes']) if parts['minutes'] else 0 + seconds = int(parts['seconds']) if parts['seconds'] else 0 + + # Assert that years and months are zero + logger.assert_abort(years == 0, "Years are not supported in parsing ISO 8601 durations.") + logger.assert_abort(months == 0, "Months are not supported in parsing ISO 8601 durations.") + + # Compute total seconds + total_seconds = hours * 3600 + minutes * 60 + seconds + + return timedelta(days=days, seconds=total_seconds) + + +# -------------------------------------------------------------------------------------------------- diff --git a/src/eva/utilities/logger.py b/src/eva/utilities/logger.py index 54cbeac3..5288d228 100644 --- a/src/eva/utilities/logger.py +++ b/src/eva/utilities/logger.py @@ -9,6 +9,7 @@ import os import sys +import traceback # -------------------------------------------------------------------------------------------------- @@ -20,12 +21,12 @@ class textcolors: A class that defines color codes for text in the terminal. """ + red = '\033[91m' blue = '\033[94m' cyan = '\033[96m' green = '\033[92m' end = '\033[0m' - # -------------------------------------------------------------------------------------------------- @@ -64,6 +65,9 @@ def __init__(self, task_name): if log_env is not None: self.loggerdict[loglevel] = int(log_env) == 1 + # Text colors + self.tc = textcolors() + # ---------------------------------------------------------------------------------------------- def send_message(self, level, message): @@ -146,7 +150,29 @@ def abort(self, message): None """ - self.send_message("ABORT", message) - sys.exit('ABORT\n') + # Make the text red + message = self.tc.red + message + self.tc.end + + self.send_message('ABORT', message) + + # Get traceback stack (without logger.py lines) + filtered_stack = [line for line in traceback.format_stack() if 'logger.py' not in line] + + # Remove everything after 'logger.assert_abort' in last element of filtered_stack + filtered_stack[-1] = filtered_stack[-1].split('logger.assert_abort')[0] + + traceback_str = '\n'.join(filtered_stack) + + # Exit with traceback + sys.exit('\nHERE IS THE TRACEBACK: \n----------------------\n\n' + traceback_str) + + # ---------------------------------------------------------------------------------------------- + + def assert_abort(self, condition, message): + + if condition: + return + else: + self.abort(message) # --------------------------------------------------------------------------------------------------