Skip to content

Commit fa0df4c

Browse files
Merge branch 'develop' into develop
2 parents e428224 + a40e651 commit fa0df4c

9 files changed

+315
-1
lines changed

requirements-github.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ xarray>=2022.6.0
88
seaborn>=0.12.2
99
hvplot>=0.8.2
1010
nbconvert>=6.5.4
11-
bokeh>=3.1.1
11+
bokeh<3.5.0,>=3.4.0
1212
geopandas>=0.13.2
1313
geoviews>=1.10.0
1414
nbsite

src/eva/data/data_collections.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,11 @@ def add_variable_to_collection(self, collection_name, group_name, variable_name,
169169

170170
# ----------------------------------------------------------------------------------------------
171171

172+
def get_data_collection(self, collection_name):
173+
return self._collections[collection_name]
174+
175+
# ----------------------------------------------------------------------------------------------
176+
172177
def get_variable_data_array(self, collection_name, group_name, variable_name,
173178
channels=None, levels=None, datatypes=None):
174179

src/eva/data/geoval_space.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# (C) Copyright 2024 NOAA/NWS/EMC
2+
#
3+
# (C) Copyright 2024 United States Government as represented by the Administrator of the
4+
# National Aeronautics and Space Administration. All Rights Reserved.
5+
#
6+
# This software is licensed under the terms of the Apache Licence Version 2.0
7+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8+
9+
# --------------------------------------------------------------------------------------------------
10+
11+
import os
12+
import netCDF4 as nc
13+
import numpy as np
14+
from xarray import Dataset, open_dataset
15+
from eva.utilities.config import get
16+
from eva.data.eva_dataset_base import EvaDatasetBase
17+
from eva.utilities.utils import parse_channel_list
18+
19+
20+
class GeovalSpace(EvaDatasetBase):
21+
22+
"""
23+
A class for handling geoval files
24+
"""
25+
26+
def execute(self, dataset_config, data_collections, timing):
27+
28+
"""
29+
Executes the processing of data file dataset.
30+
31+
Args:
32+
dataset_config (dict): Configuration dictionary for the dataset.
33+
data_collections (DataCollections): Object for managing data collections.
34+
timing (Timing): Timing object for tracking execution time.
35+
"""
36+
37+
# Set the collection name
38+
# -----------------------
39+
collection_name = get(dataset_config, self.logger, 'name')
40+
41+
# Get missing value threshold
42+
# ---------------------------
43+
threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30))
44+
45+
# Get levels to plot profiles
46+
# --------------------------_
47+
levels_str_or_list = get(dataset_config, self.logger, 'levels', [])
48+
49+
# Convert levels to list
50+
levels = []
51+
if levels_str_or_list is not []:
52+
levels = parse_channel_list(levels_str_or_list, self.logger)
53+
54+
# Filename to be used for reads
55+
# ---------------------------------------
56+
data_filename = get(dataset_config, self.logger, 'data_file')
57+
58+
# Get instrument name
59+
instr_name = get(dataset_config, self.logger, 'instrument_name')
60+
61+
# Open instrument files xarray dataset
62+
instr_ds = open_dataset(data_filename)
63+
64+
# Enforce that a variable exists, do not default to all variables
65+
variables = get(dataset_config, self.logger, 'variables')
66+
if not variables:
67+
self.logger.abort('A variables list needs to be defined in the config file.')
68+
vars_to_remove = list(set(list(instr_ds.keys())) - set(variables))
69+
instr_ds = instr_ds.drop_vars(vars_to_remove)
70+
71+
# Rename variables and nval dimension
72+
rename_dict = {}
73+
rename_dims_dict = {}
74+
for v in variables:
75+
# Retrieve dimension names
76+
dims = instr_ds[v].dims
77+
if np.size(dims) > 1:
78+
rename_dims_dict[dims[1]] = f'Level'
79+
rename_dict[v] = f'{instr_name}::{v}'
80+
instr_ds = instr_ds.rename(rename_dict)
81+
instr_ds = instr_ds.rename_dims(rename_dims_dict)
82+
83+
# Add the dataset_config to the collections
84+
data_collections.create_or_add_to_collection(collection_name, instr_ds)
85+
86+
# Nan out unphysical values
87+
data_collections.nan_float_values_outside_threshold(threshold)
88+
89+
# Display the contents of the collections for helping the user with making plots
90+
data_collections.display_collections()
91+
92+
def generate_default_config(self, filenames, collection_name):
93+
94+
"""
95+
Generate a default configuration for the dataset.
96+
97+
This method generates a default configuration for the dataset based on the provided
98+
filenames and collection name. It can be used as a starting point for creating a
99+
configuration for the dataset.
100+
101+
Args:
102+
filenames: Filenames or file paths relevant to the dataset.
103+
collection_name (str): Name of the collection for the dataset.
104+
105+
Returns:
106+
dict: A dictionary representing the default configuration for the dataset.
107+
"""
108+
109+
pass
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
datasets:
2+
3+
- name: exp_geovals_with_lvls
4+
type: GeovalSpace
5+
data_file: ${data_input_path}/swell-hofx.amsua_n19-geovals.20211211T210000Z.nc4
6+
levels: &exp_levels 33,60
7+
instrument_name: amsua_n19
8+
variables: &exp_vars_with_lvls ['mole_fraction_of_carbon_dioxide_in_air']
9+
10+
- name: exp_geovals
11+
type: GeovalSpace
12+
data_file: ${data_input_path}/swell-hofx.amsua_n19-geovals.20211211T210000Z.nc4
13+
instrument_name: amsua_n19
14+
variables: &exp_vars ['vegetation_area_fraction', 'leaf_area_index']
15+
16+
- name: exp_latlon
17+
type: IodaObsSpace
18+
filenames:
19+
- ${data_input_path}/swell-hofx.amsua_n19.20211211T210000Z.nc4
20+
groups:
21+
- name: MetaData
22+
23+
- name: ctrl_geovals_with_lvls
24+
type: GeovalSpace
25+
data_file: ${data_input_path}/ncdiag.x0048v2-geovals.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4
26+
levels: &ctrl_levels 33,60
27+
instrument_name: amsua_n19
28+
variables: &ctrl_vars_with_lvls ['mole_fraction_of_carbon_dioxide_in_air']
29+
30+
- name: ctrl_geovals
31+
type: GeovalSpace
32+
data_file: ${data_input_path}/ncdiag.x0048v2-geovals.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4
33+
instrument_name: amsua_n19
34+
variables: &ctrl_vars ['vegetation_area_fraction', 'leaf_area_index']
35+
36+
- name: ctrl_latlon
37+
type: IodaObsSpace
38+
filenames:
39+
- ${data_input_path}/ncdiag.x0048v2.ob.PT6H.amsua_n19.2021-12-11T21:00:00Z.nc4
40+
groups:
41+
- name: MetaData
42+
43+
transforms:
44+
45+
- transform: latlon_match
46+
new_collection_name: ctrl_geovals_matched_index
47+
base_latlon: ctrl_latlon
48+
match_base_latlon_to: exp_latlon
49+
base_collection: ctrl_geovals::amsua_n19::${variable}
50+
for:
51+
variable: *ctrl_vars
52+
53+
- transform: latlon_match
54+
new_collection_name: ctrl_geovals_with_lvls_matched_index
55+
base_latlon: ctrl_latlon
56+
match_base_latlon_to: exp_latlon
57+
base_collection: ctrl_geovals_with_lvls::amsua_n19::${variable}
58+
for:
59+
variable: *ctrl_vars_with_lvls
60+
61+
- transform: arithmetic
62+
new name: exp_geovals::amsua_n19::exp_minus_ctrl_${variable}
63+
equals: exp_geovals::amsua_n19::${variable}-ctrl_geovals_matched_index::amsua_n19::${variable}
64+
for:
65+
variable: *exp_vars
66+
67+
graphics:
68+
69+
plotting_backend: Emcpy
70+
figure_list:
71+
72+
- batch figure:
73+
variables: *exp_vars
74+
dynamic options:
75+
- type: vminvmaxcmap
76+
data variable: exp_geovals::amsua_n19::exp_minus_ctrl_${variable}
77+
figure:
78+
figure size: [20,10]
79+
layout: [1,1]
80+
title: 'JEDI - GSI | AMSU-A NOAA-19 | Geoval | ${variable}'
81+
output name: map_plots/geovals/amsua_n19/${variable}/observations_amsua_n19_${variable}.png
82+
plots:
83+
- mapping:
84+
projection: plcarr
85+
domain: global
86+
add_map_features: ['coastline']
87+
add_colorbar:
88+
label: '${variable}'
89+
layers:
90+
- type: MapScatter
91+
longitude:
92+
variable: exp_latlon::MetaData::longitude
93+
latitude:
94+
variable: exp_latlon::MetaData::latitude
95+
data:
96+
variable: exp_geovals::amsua_n19::exp_minus_ctrl_${variable}
97+
markersize: 2
98+
cmap: ${dynamic_cmap}
99+
vmin: ${dynamic_vmin}
100+
vmax: ${dynamic_vmax}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:4e45612c2316c187aa1e47319b367860781875b9a4f9856e5af567588e3bb602
3+
size 16948017
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:33b4da4c1c3f96e48ba8e6966302e7873ae5bd0e9a1334bccebb6d6bf66ea7b6
3+
size 2291303
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e823689cdc33713b3db16bbc8eb2d6979e0f0bd116717e5ad6ca496c3cbabbbf
3+
size 16737844
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:8ebaac0dd57bfa071b12692a5798f32a0a342c63d5c18050bb32883718376cd3
3+
size 14615231

src/eva/transforms/latlon_match.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# (C) Copyright 2024 NOAA/NWS/EMC
2+
#
3+
# (C) Copyright 2024 United States Government as represented by the Administrator of the
4+
# National Aeronautics and Space Administration. All Rights Reserved.
5+
#
6+
# This software is licensed under the terms of the Apache Licence Version 2.0
7+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
8+
9+
import numpy as np
10+
from xarray import Dataset, DataArray
11+
from eva.utilities.config import get
12+
from eva.utilities.logger import Logger
13+
from eva.transforms.transform_utils import parse_for_dict, split_collectiongroupvariable
14+
15+
16+
def latlon_match(config, data_collections):
17+
18+
"""
19+
Applies lat/lon match transform to a given collection.
20+
21+
Args:
22+
config (dict): A configuration dictionary containing transformation parameters.
23+
data_collections (DataCollections): An instance of the DataCollections class containing
24+
input data.
25+
26+
Returns:
27+
None
28+
29+
This function applies lat/lon matching to variables in the base collection. A new collection
30+
with matched variables is added to the data collection.
31+
32+
base collection: collection to perform the latlon matching on
33+
base_latlon: the collection with lat/lon coordiates corresponding to base collection
34+
match_base_latlon_to: the collection with lat/lon coordinates corresponding to what you want to
35+
match the base latlon to.
36+
37+
"""
38+
39+
# Create a logger
40+
logger = Logger('LatLonMatchTransform')
41+
42+
# Parse the for dictionary
43+
_, _, variables = parse_for_dict(config, logger)
44+
45+
# Parse config for names
46+
base_collection = get(config, logger, 'base_collection')
47+
base_latlon_name = get(config, logger, 'base_latlon')
48+
match_latlon_name = get(config, logger, 'match_base_latlon_to')
49+
50+
# Extract collection and group
51+
cgv = split_collectiongroupvariable(logger, base_collection)
52+
53+
# Retrieve collections using collection names
54+
base_lat = data_collections.get_variable_data_array(base_latlon_name, 'MetaData',
55+
'latitude').to_numpy()
56+
base_lon = data_collections.get_variable_data_array(base_latlon_name, 'MetaData',
57+
'longitude').to_numpy()
58+
match_lat = data_collections.get_variable_data_array(match_latlon_name, 'MetaData',
59+
'latitude').to_numpy()
60+
match_lon = data_collections.get_variable_data_array(match_latlon_name, 'MetaData',
61+
'longitude').to_numpy()
62+
63+
# Find matching index (this can be updated using dask)
64+
matching_index = []
65+
for i in range(len(base_lat)):
66+
matching_index.append((abs(base_lat - match_lat[i]) +
67+
abs(base_lon - match_lon[i])).argmin())
68+
69+
# Retrieve data collection from data collections
70+
match_ds = data_collections.get_data_collection(cgv[0])
71+
72+
# Loop through starting_dataset and update all variable arrays
73+
update_ds_list = []
74+
for variable in variables:
75+
var_array = data_collections.get_variable_data_array(cgv[0], cgv[1], variable)
76+
var_values = var_array.values
77+
78+
# Index data array with matching_index and then save to new collection
79+
var_values = var_values[matching_index]
80+
var_array.values = var_values
81+
match_ds[f'{cgv[1]}::{variable}'] = var_array
82+
83+
# get new collection name
84+
new_collection_name = get(config, logger, 'new_collection_name')
85+
86+
# add new collection to data collections
87+
data_collections.create_or_add_to_collection(new_collection_name, match_ds)
88+
match_ds.close()

0 commit comments

Comments
 (0)