Skip to content

Commit 0a3175e

Browse files
authored
Merge pull request #148 from coecms/cordexupdate
Cordexupdate
2 parents e2d5eba + d7d79dd commit 0a3175e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2121
-1412
lines changed

.circleci/config.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ run_conda: &run_conda
1616
command: |
1717
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O conda.sh
1818
bash conda.sh -b -p ~/miniconda
19+
source ~/miniconda/bin/activate
20+
conda update --all --yes
1921
- run:
2022
name: setup
2123
command: |
@@ -30,7 +32,7 @@ run_conda: &run_conda
3032
command: |
3133
source ~/miniconda/bin/activate
3234
mv conda/run_test_coverage.sh conda/run_test.sh
33-
conda build conda -c conda-forge --python=${PYTHON_VER} --output-folder /tmp/artefacts
35+
conda build conda -c conda-forge --output-folder /tmp/artefacts
3436
cp /tmp/artefacts/*/*.tar.bz2 /tmp/artefacts/persist
3537
- run:
3638
name: report-coverage

AUTHORS

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
1-
C. Carouge <[email protected]>
21
Paola Petrelli <[email protected]>
32
Scott Wales <[email protected]>
4-
Scott Wales <[email protected]>

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ SHELL=/bin/bash
55

66
check test:
77
${ENV} py.test --db=postgresql://clef.nci.org.au/clef test
8+
# ${ENV} py.test --db=postgresql://clefdev.nci.org.au/clef test
89

910
package:
1011
${ENV} conda build . --user coecms

README.rst

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Currently it searches for the following datasets:
2626

2727
- **CMIP5** raijin projects: rr3, where NCI is the primary publisher and al33 for replicas
2828
- **CMIP6** raijin projects: 0i10 for replicas
29+
- **CORDEX** raijin projects: rr3, where NCI is the primary publisher and al33 for replicas
2930

3031
The search returns both the path of data that is already available at NCI as well as information on data that
3132
is on external ESGF nodes but not yet available locally.
@@ -39,6 +40,8 @@ Clef is pre-installed into a Conda environment at NCI. Load it with::
3940
module use /g/data3/hh5/public/modules
4041
module load conda/analysis3-unstable
4142

43+
NB You need to be a member of hh5 to load the modules
44+
4245
We are constantly adding new features, the development version is available in a separate environment::
4346
module use /g/data3/hh5/public/modules
4447
module load conda
@@ -67,13 +70,13 @@ You can filter CMIP5 by the following terms:
6770
* ensemble/member
6871
* experiment
6972
* experiment-family
70-
* institution
7173
* model
7274
* table/cmor_table
7375
* realm
7476
* frequency
7577
* variable
7678
* cf-standard-name
79+
* institution
7780

7881
See ``clef cmip5 --help`` for all available filters and their aliases
7982

@@ -99,18 +102,45 @@ You can filter CMIP6 by the following terms:
99102

100103
* activity
101104
* experiment
102-
* institution
103105
* source_type
104106
* model
105107
* member
106108
* table
109+
* grid
110+
* resolution
107111
* realm
108112
* frequency
109113
* variable
110114
* version
115+
* sub_experiment
116+
* variant_label
117+
* institution
118+
* cf_standard_name
111119

112120
See ``clef cmip6 --help`` for all available filters
113121

122+
clef cordex
123+
~~~~~
124+
125+
You can filter CORDEX by the following terms:
126+
127+
* experiment
128+
* domain
129+
* driving_model
130+
* rcm_name (model)
131+
* rcm_version
132+
* ensemble
133+
* table
134+
* time_frequency
135+
* variable
136+
* version
137+
* experiment_family
138+
* institute
139+
* cf_standard_name
140+
141+
See ``clef cordex --help`` for all available filters
142+
143+
-------
114144
-------
115145
Develop
116146
-------

clef/cli.py

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import sys
2121
import os
2222
import stat
23-
2423
from itertools import repeat
2524
from datetime import datetime
2625

@@ -32,7 +31,7 @@
3231
from .code import call_local_query, matching, write_csv, print_stats, ids_df
3332
from .helpers import load_vocabularies, fix_model, fix_path, get_ids
3433
from .esdoc import citation, write_cite
35-
34+
import clef.cordex as cordex_
3635

3736
def clef_catch():
3837
debug_logger = logging.getLogger('clef_debug')
@@ -123,6 +122,7 @@ def cmip5_args(f):
123122
click.option('--ensemble', '--member', '-en', 'ensemble', multiple=True, help="CMIP5 ensemble member: r#i#p#"),
124123
click.option('--frequency', 'time_frequency', multiple=True, type=click.Choice(vocab['time_frequency']) ),
125124
click.option('--realm', multiple=True, type=click.Choice(vocab['realm']) ),
125+
click.option('--cf_standard_name',multiple=True, help="CF variable standard_name, use instead of variable constraint "),
126126
click.option('--and', 'and_attr', multiple=True, type=click.Choice(vocab['attributes']),
127127
help=("Attributes for which we want to add AND filter, i.e. `--and variable` to apply to variable values")),
128128
click.option('--institution', 'institute', multiple=True, help="Modelling group institution id: MIROC, IPSL, MRI ...")
@@ -136,7 +136,6 @@ def common_args(f):
136136
"""
137137
constraints = [
138138
click.argument('query', nargs=-1),
139-
click.option('--cf_standard_name',multiple=True, help="CF variable standard_name, use instead of variable constraint "),
140139
click.option('--latest/--all-versions', 'latest', default=True,
141140
help="Return only the latest version or all of them. Default: --latest"),
142141
click.option('--replica/--no-replica', default=False,
@@ -179,6 +178,7 @@ def cmip6_args(f):
179178
click.option('--sub_experiment_id', '-se', multiple=True,
180179
help="Only available for hindcast and forecast experiments: sYYYY"),
181180
click.option('--variant_label', '-vl', multiple=True, help="Indicates a model variant: r#i#p#f#"),
181+
click.option('--cf_standard_name',multiple=True, help="CF variable standard_name, use instead of variable constraint "),
182182
click.option('--and', 'and_attr', multiple=True, type=click.Choice(vocab['attributes']),
183183
help=("Attributes for which we want to add AND filter, i.e. `--and variable_id` to apply to variable values")),
184184
click.option('--cite', 'cite', is_flag=True, default=False,
@@ -203,10 +203,8 @@ def ds_args(f):
203203
click.option('--format', '-f', 'fileformat', multiple=False, type=click.Choice(['netcdf','grib','HDF5','binary']),
204204
help="Dataset file format as defined in clef.db Dataset table"),
205205
click.option('--standard-name', '-sn', multiple=True, type=click.Choice(st_names),
206-
#click.option('--standard-name', '-sn', multiple=False, type=click.Choice(st_names),
207206
help="Variable standard_name this is the most reliable way to look for a variable across datasets"),
208207
click.option('--cmor-name', '-cn', multiple=True, type=click.Choice(cm_names),
209-
#click.option('--cmor-name', '-cn', multiple=False, type=click.Choice(cm_names),
210208
help="Variable cmor_name useful to look for a variable across datasets"),
211209
click.option('--variable', '-va', 'varname', multiple=True, type=click.Choice(variables),
212210
help="Variable name as defined in files: tas, pr, sic, T ... "),
@@ -263,9 +261,11 @@ def cmip5(ctx, query, debug, distrib, replica, latest, csvf, stats,
263261
'time_frequency': time_frequency,
264262
'cmor_table': cmor_table,
265263
'variable': variable,
266-
'experiment_family': experiment_family
264+
'experiment_family': experiment_family,
265+
'cf_standard_name': cf_standard_name,
266+
'and_attr': and_attr
267267
}
268-
common_esgf_cli(ctx, project, query, cf_standard_name, latest, replica, distrib, csvf, stats, debug, dataset_constraints, and_attr)
268+
common_esgf_cli(ctx, project, query, latest, replica, distrib, csvf, stats, debug, dataset_constraints)
269269

270270

271271
@clef.command()
@@ -315,17 +315,47 @@ def cmip6(ctx,query, debug, distrib, replica, latest, csvf, stats,
315315
'table_id': table_id,
316316
'variable_id': variable_id,
317317
'variant_label': variant_label,
318+
'cf_standard_name': cf_standard_name,
319+
'and_attr': and_attr
318320
}
319321

320-
common_esgf_cli(ctx, project, query, cf_standard_name, latest,
321-
replica, distrib, csvf, stats, debug, dataset_constraints, and_attr, cite)
322+
common_esgf_cli(ctx, project, query, latest, replica, distrib,
323+
csvf, stats, debug, dataset_constraints, cite)
324+
325+
326+
@clef.command(cls=cordex_.CordexCommand)
327+
@common_args
328+
@click.pass_context
329+
def cordex(ctx, query, debug, distrib, replica, latest, csvf, stats, **kwargs):
330+
"""
331+
Search ESGF and local database for CORDEX files.
332+
333+
Constraints can be specified multiple times, in which case they are combined using OR: -v tas -v tasmin will return anything matching variable = 'tas' or variable = 'tasmin'.
334+
The --latest flag will check ESGF for the latest version available, this is the default behaviour
335+
NB. for CORDEX data associated to CMIP6 use the cmip6 command with CORDEX as activity_id
336+
"""
337+
dataset_constraints = {k:v for k, v in kwargs.items() if k in cordex_.cli_facets}
338+
dataset_constraints['and_attr'] = kwargs['and_attr']
339+
340+
project="CORDEX,CORDEX-Adjust,CORDEX-ESD,CORDEXReklies"
322341

342+
# change experiment_family to tuple to behave like other arguments
343+
if dataset_constraints['experiment_family'] == None:
344+
dataset_constraints['experiment_family'] = ()
345+
else:
346+
dataset_constraints['experiment_family'] = (dataset_constraints['experiment_family'],)
347+
348+
common_esgf_cli(ctx, project, [], latest, replica, distrib, csvf, stats, debug,
349+
dataset_constraints)
350+
351+
352+
def common_esgf_cli(ctx, project, query, latest, replica, distrib,
353+
csvf, stats, debug, constraints, cite=False):
323354

324-
def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
325-
replica, distrib, csvf, stats, debug, constraints, and_attr, cite=False):
326355
if debug:
327356
logging.basicConfig(level=logging.DEBUG)
328357
logging.getLogger('sqlalchemy.engine').setLevel(level=logging.INFO)
358+
logging.getLogger('clex_debug').setLevel(level=logging.INFO)
329359

330360
clef_log = ctx.obj['log']
331361
user_name=os.environ.get('USER','unknown')
@@ -336,7 +366,16 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
336366
matching_fixed = {
337367
'CMIP5': ['model','ensemble'],
338368
'CMIP6': ['source_id','member_id'],
369+
'CORDEX': ['domain', 'driving_model','rcm_name', 'ensemble']
339370
}
371+
if ctx.obj['flow'] == 'local' and project[0:6] == 'CORDEX':
372+
matching_fixed['CORDEX'][2] = 'model_id'
373+
project+=',CORDEX-Australasia'
374+
375+
if 'and_attr' in constraints.keys():
376+
and_attr = constraints.pop('and_attr')
377+
else:
378+
and_attr = []
340379

341380
# keep track of query arguments in clef_log file
342381
args_str = ' '.join('{}={}'.format(k,v) for k,v in constraints.items())
@@ -360,7 +399,6 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
360399
distrib=distrib,
361400
replica=replica,
362401
latest=latest,
363-
cf_standard_name=cf_standard_name,
364402
project=project,
365403
**constraints,
366404
)
@@ -372,7 +410,7 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
372410
for did in ids:
373411
print(did)
374412
if stats:
375-
print_stats(results)
413+
print_stats(results, project)
376414
if csvf:
377415
write_csv(results)
378416
if cite:
@@ -382,11 +420,16 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
382420

383421
# if local, query DB based on attributes not checksums
384422
if ctx.obj['flow'] == 'local':
423+
if project[0:6] == 'CORDEX':
424+
project='CORDEX'
385425
if len(and_attr) > 0:
386426
results, selection = matching(s, and_attr, matching_fixed[project], project=project,
387427
local=True, latest=latest, **terms)
388428
for row in selection.itertuples():
389-
print(f"{row.Index[0]} / {row.Index[1]} versions: {', '.join(row.version)}")
429+
line = f"{' / '.join(row.Index[:])} versions: {', '.join(row.version)}"
430+
if project == 'CORDEX':
431+
line += f" rcm versions: {', '.join(row.rcm_version_id)}"
432+
print(line)
390433
else:
391434
results, paths = call_local_query(s, project, latest, **terms)
392435
if not stats:
@@ -395,7 +438,7 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
395438
if csvf:
396439
write_csv(results)
397440
if stats:
398-
print_stats(results)
441+
print_stats(results, project)
399442
if cite:
400443
ids = get_ids(results)
401444
citations = citation(ids)
@@ -407,7 +450,6 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
407450
distrib=distrib,
408451
replica=replica,
409452
latest=(latest if latest else None),
410-
cf_standard_name=cf_standard_name,
411453
project=project,
412454
**terms
413455
)
@@ -429,7 +471,7 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
429471
# update list and print result
430472
if qm.count() > 0:
431473
varlist = []
432-
if project == 'CMIP5' and 'variable' in terms:
474+
if project in ['CMIP5'] and 'variable' in terms:
433475
varlist = terms['variable']
434476
updated = search_queue_csv(qm, project, varlist)
435477
print('\nAvailable on ESGF but not locally:')
@@ -440,7 +482,7 @@ def common_esgf_cli(ctx, project, query, cf_standard_name, latest,
440482
return
441483

442484
if ctx.obj['flow'] == 'request':
443-
if project == 'CMIP5' and len(varlist) == 0:
485+
if project in ['CMIP5'] and len(varlist) == 0:
444486
raise ClefException("Please specify at least one variable to request")
445487
if len(updated) >0:
446488
write_request(project,updated)

0 commit comments

Comments
 (0)