Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teleconnections Recipe and diagnostic script #65

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion recipe_diagnostics/diagnostic_scripts/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,19 @@ def plot_matrix(diag_path):

metric_df = pd.read_csv(diag_path, header=None)
# run normalisation on all these values
metric_df[2] = (metric_df[2]-metric_df[2].mean())/metric_df[2].std()
# metric_df[2] = (metric_df[2]-metric_df[2].mean())/metric_df[2].std()

transformls = []
for mod in metric_df[0].unique(): #iterate model, translate metrics
df = metric_df.loc[metric_df[0]==mod,:]
transformls.append(df[[1,2]].set_index(1).T.rename(index={2:mod}))

matrixdf = pd.concat(transformls)

# normalise each metric
for col in matrixdf.columns:
matrixdf[col] = (matrixdf[col] - matrixdf[col].mean()) / matrixdf[col].std()

figure = plt.figure(dpi=300)
plt.imshow(matrixdf, cmap='coolwarm')
plt.colorbar()
Expand Down
212 changes: 212 additions & 0 deletions recipe_diagnostics/diagnostic_scripts/teleconnections_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""diagnostic script to plot ENSO teleconnections metrics"""

import os
import logging
import iris
from pprint import pformat
import numpy as np
from shapely import box
import shapely.vectorized as shp_vect
import matplotlib.pyplot as plt
import iris.plot as iplt
import cartopy.crs as ccrs

from esmvaltool.diag_scripts.shared import (run_diagnostic,
save_figure,
get_diagnostic_filename,
group_metadata,
select_metadata,
)
from esmvalcore.preprocessor import (extract_season,
anomalies)


# This part sends debug statements to stdout
logger = logging.getLogger(os.path.basename(__file__))

def plot_level1(input_data, rmse, title): #input data is 2 - model and obs

figure = plt.figure(figsize=(20, 6), dpi=300)

proj = ccrs.PlateCarree(central_longitude=180)
figure.suptitle(title)
i =121

for label, cube in input_data.items():

ax1 = plt.subplot(i,projection=proj)
ax1.coastlines()
cf1 = iplt.contourf(cube, levels=np.arange(-1,1,0.1), extend='both',cmap='RdBu_r')
ax1.set_title(label)
gl1 = ax1.gridlines(draw_labels=True, linestyle='--')
gl1.top_labels = False
gl1.right_labels = False
i+=1

plt.text(0.1, -0.3, f'RMSE: {rmse:.2f} ', fontsize=12, ha='left',
transform=plt.gca().transAxes, bbox=dict(facecolor='white', alpha=0.8, edgecolor='none'))
# Add a single colorbar at the bottom
cax = plt.axes([0.15,0.08,0.7,0.05])
cbar = figure.colorbar(cf1, cax=cax, orientation='horizontal', extend='both', ticks=np.arange(-1,1.5,0.5))
cbar.set_label('regression (°C/°C)')
logger.info(f"{title}, {label} : metric:{rmse}")
plt.tight_layout

return figure


def lin_regress_matrix(cubeA, cubeB): #array must not contain infs or NaNs
"""
Calculate the linear regression of cubeA on cubeB using matrix operations.

Parameters
----------
cubeA: iris.cube.Cube
The 2D input cube for which the regression is calculated.

cubeB: iris.cube.Cube
The cube used as the independent variable in the regression.

Returns
-------
iris.cube.Cube
A new cube containing the slope of the regression for each spatial point.
"""
# Get data as flattened arrays
A_data = cubeA.data.reshape(cubeA.shape[0], -1) # Shape (time, spatial_points)
B_data = cubeB.data.flatten() # Shape (time,)
logger.info("cubes: %s, %s", cubeA.name, cubeB.name)
# Add intercept term by stacking a column of ones with cubeB
B_with_intercept = np.vstack([B_data, np.ones_like(B_data)]).T

# Solve the linear equations using least squares method
coefs, _, _, _ = np.linalg.lstsq(B_with_intercept, A_data, rcond=None)
logger.info("%s, %s",cubeA.coords(), cubeA.shape)
# Extract slopes from coefficients #coefs 1
slopes = coefs[0].reshape(cubeA.shape[1], cubeA.shape[2])

# Create a new Iris Cube for the regression results
result_cube = iris.cube.Cube(slopes, long_name='regression ENSO SSTA',
dim_coords_and_dims=[(cubeA.coord('latitude'), 0),
(cubeA.coord('longitude'), 1)])

return result_cube


def mask_pacific(cube):
region = box(130.,-15.,270.,15) #remove land
x_p, y_p = np.meshgrid(
cube.coord(axis="X").points,
cube.coord(axis="Y").points,
)

mask = shp_vect.contains(region, x_p, y_p)
cube.data.mask = mask
return cube

def compute_telecon_metrics(input_pair, var_group, metric):

if metric =='pr_telecon':
title = '{} PR Teleconnection' # both seasons
elif metric == 'ts_telecon':
title = '{} SST Teleconnection'

val, fig = {}, {}
for seas in ['DJF','JJA']:
data_values = []
cubes = {}
for label, ds in input_pair.items(): #obs 0, mod 1
preproc = {}
for variable in var_group:
cube = extract_season(ds[variable].copy(), seas)
preproc[variable] = anomalies(cube, period="full")

regcube = lin_regress_matrix(preproc[var_group[1]], preproc[var_group[0]])
reg_masked = mask_pacific(regcube)

data_values.append(reg_masked.data)
cubes[label] = reg_masked

val[seas] = np.sqrt(np.mean((data_values[0] - data_values[1]) ** 2))
fig[seas] = plot_level1(cubes, val[seas], title.format(seas))

return val, fig


def get_provenance_record(caption, ancestor_files):
"""Create a provenance record describing the diagnostic data and plot."""

record = {
'caption': caption,
'statistics': ['anomaly'],
'domains': ['eq'],
'plot_types': ['map'],
'authors': [
'chun_felicity',
# 'beucher_romain',
# 'sullivan_arnold',
],
'references': [
'access-nri',
],
'ancestors': ancestor_files,
}
return record

def main(cfg):
"""Run ENSO metrics."""

input_data = cfg['input_data'].values()

# iterate through each metric and get variable group, select_metadata, map to function call
metrics = {'pr_telecon': ['tos_enso', 'pr_global'],
'ts_telecon':['tos_enso','tos_global']}

# select twice with project to get obs, iterate through model selection
for metric, var_preproc in metrics.items(): #if empty or try
logger.info(f"{metric},{var_preproc}")
obs, models = [], []
for var_prep in var_preproc: #enumerate 1 or 2 length? if 2 append,
obs += select_metadata(input_data, variable_group=var_prep, project='OBS')
obs += select_metadata(input_data, variable_group=var_prep, project='OBS6')
models += select_metadata(input_data, variable_group=var_prep, project='CMIP6')

# log
msg = "{} : observation datasets {}, models {}".format(metric, len(obs), len(models))
logger.info(msg)

# obs datasets for each model
obs_datasets = {dataset['variable_group']: iris.load_cube(dataset['filename']) for dataset in obs}

# group models by dataset
model_ds = group_metadata(models, 'dataset', sort='project')

# dataset name # split for teleconnections
for dataset in model_ds:
logger.info(f"{metric}, preprocessed cubes:{len(model_ds)}, dataset:{dataset}")
dt_files = [ds['filename']
for ds in obs] + [ds['filename']
for ds in model_ds[dataset]]

model_datasets = {attributes['variable_group']: iris.load_cube(attributes['filename'])
for attributes in model_ds[dataset]}
input_pair = {obs[0]['dataset']:obs_datasets, dataset:model_datasets}
logger.info(pformat(model_datasets))
# process function for each metric - obs first.. if, else
### make one function, with the switches - same params
values, fig = compute_telecon_metrics(input_pair, var_preproc, metric)

# save metric for each pair, check not none teleconnection metric value djf, jja
for seas, val in values.items():
metricfile = get_diagnostic_filename('matrix', cfg, extension='csv')
with open(metricfile, 'a+') as f:
f.write(f"{dataset},{seas}_{metric},{val}\n")

prov_record = get_provenance_record(f'ENSO metrics {seas} {metric}', dt_files)
save_figure(f'{dataset}_{seas}_{metric}', prov_record, cfg, figure=fig[seas], dpi=300)#


if __name__ == '__main__':

with run_diagnostic() as config:
main(config)
145 changes: 145 additions & 0 deletions recipe_diagnostics/recipe_teleconnection_metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# ESMValTool
#
---
documentation:
description: ENSO CLIVAR metrics - ENSO teleconnections
title: Reproducing ENSO teleconnections metrics by Yann Planton
authors:
- chun_felicity
# - sullivan_arnold
maintainer:
- chun_felicity

datasets:
- {dataset: ACCESS-ESM1-5, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: ACCESS-CM2, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: BCC-CSM2-MR, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
# - {dataset: BCC-ESM1, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
#/g/data/oi10/replicas/CMIP6/CMIP/BCC/BCC-ESM1/1pctCO2/r1i1p1f1/Ofx/areacello/gn/v20190613/areacello_Ofx_BCC-ESM1_1pctCO2_r1i1p1f1_gn.nc') error
# There were errors in variable areacello:
# There are multiple coordinates with standard_name "latitude": ['lat', 'latitude']
# There are multiple coordinates with standard_name "longitude": ['lon', 'longitude']

# - {dataset: AWI-CM-1-1-MR, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
# extract region bounds error - ValueError: expected 0 or 2 bound values per cell ? unstructured
# - {dataset: AWI-ESM-1-1-LR, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CAS-ESM2-0, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CESM2-FV2, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CESM2-WACCM-FV2, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CESM2-WACCM, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CESM2, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CMCC-CM2-HR4, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CMCC-CM2-SR5, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CMCC-ESM2, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: CanESM5, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: FGOALS-g3, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: FIO-ESM-2-0, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
# - {dataset: GISS-E2-1-G-CC, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
#/g/data/oi10/replicas/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G-CC/piControl/r1i1p1f1/Ofx/areacello/gn/v20190325/areacello_Ofx_GISS-E2-1-G-CC_piControl_r1i1p1f1_gn.nc' dimensions
# 'https://furtherinfo.es-doc.org/CMIP6.NASA-GISS.GISS-E2-1-G-CC.historic ...' grid -'atmospheric grid: 144x90, ocean grid: 288x180'
# - {dataset: GISS-E2-1-G, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: GISS-E2-1-H, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
# - {dataset: IITM-ESM, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1900, end_year: 2014} # searching esgf, changed year
# WARNING [570089] Using 'area_weighted' regridder scheme in Omon variables for dataset IITM-ESM causes discontinuities in the longitude coordinate.
# ERROR [570089] Supplementary variables are needed to calculate grid cell areas for irregular or unstructured grid of cube sea_surface_temperature / (degC) (time: 1380; cell index along second dimension: 24; cell index along first dimension: 50)
# ERROR [570089] Failed to run preprocessor function 'area_statistics' on the data

- {dataset: MCM-UA-1-0, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: MIROC6, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: MPI-ESM-1-2-HAM, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: MPI-ESM1-2-HR, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: MPI-ESM1-2-LR, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: MRI-ESM2-0, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
# - {dataset: NESM3, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
# missing cell area
- {dataset: NorCPM1, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: NorESM2-LM, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: NorESM2-MM, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: SAM0-UNICON, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}
- {dataset: TaiESM1, project: CMIP6, exp: historical, ensemble: r1i1p1f1, grid: gn, start_year: 1850, end_year: 2014}

# observations
# - {dataset: HadISST, project: OBS, type: reanaly, tier: 2, mip: Omon}


preprocessors:
base_enso: &base_enso
custom_order: true
convert_units:
units: degC
regrid:
target_grid: 1x1
scheme: linear
extract_region: &nino34
start_longitude: 190.
end_longitude: 240.
start_latitude: -5.
end_latitude: 5.
# detrend: #tropflux nan/inf values
# dimension: time
# method: linear
# extract_season: # JJA and DJF in script
# season: DJF
# anomalies:
# period: full

enso_sst:
<<: *base_enso
area_statistics:
operator: mean

global_pr:
<<: *base_enso
convert_units:
units: mm/day
extract_region: &glb
start_longitude: 0.
end_longitude: 360.
start_latitude: -60.
end_latitude: 60.
global_sst:
<<: *base_enso
extract_region:
<<: *glb


diagnostics:
diagnostic_metrics:
description: run preprocessors on variables for ENSO metrics
variables:
tos_enso:
short_name: ts
mip: Amon
preprocessor: enso_sst
additional_datasets:
- {dataset: ERA-Interim, project: OBS6, type: reanaly, tier: 3}

tos_global:
short_name: ts
mip: Amon
preprocessor: global_sst
additional_datasets:
- {dataset: ERA-Interim, project: OBS6, type: reanaly, tier: 3}

pr_global:
short_name: pr
mip: Amon
preprocessor: global_pr
additional_datasets:
- {dataset: GPCP-SG, project: OBS, type: atmos, tier: 2, mip: Amon, start_year: 1979, end_year: 2018}

scripts:
plot_script:
script: /home/189/fc6164/esmValTool/repos/ENSO_recipes/recipe_diagnostics/diagnostic_scripts/teleconnections_metrics.py

diag_collect:
description: collect metrics
variables:
tos: #dummy variable to fill recipe requirements
mip: Omon
scripts:
matrix_collect:
script: /home/189/fc6164/esmValTool/repos/ENSO_recipes/recipe_diagnostics/diagnostic_scripts/matrix.py
# above diagnostic name and script name
diag_metrics: diagnostic_metrics/plot_script #cfg['work_dir']