Skip to content

Commit

Permalink
introducing schema in config yaml so that user can supply a different…
Browse files Browse the repository at this point in the history
… schema vetted by MSD.

This is useful for MDTF as MDTF may need tiny tweaks until we can stablize and align completetely
  • Loading branch information
aradhakrishnanGFDL committed Aug 6, 2024
1 parent f138d94 commit 3fc1bc6
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 4 deletions.
139 changes: 139 additions & 0 deletions catalogbuilder/cats/mdtf_template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"esmcat_version": "0.0.1",
"attributes": [
{
"column_name": "activity_id",
"vocabulary": "",
"required": false
},
{
"column_name": "institution_id",
"vocabulary": "",
"required": false
},
{
"column_name": "source_id",
"vocabulary": "",
"required": false
},
{
"column_name": "experiment_id",
"vocabulary": "",
"required": true
},
{
"column_name": "frequency",
"vocabulary": "https://raw.githubusercontent.com/NOAA-GFDL/CMIP6_CVs/master/CMIP6_frequency.json",
"required": true
},
{
"column_name": "realm",
"vocabulary": "",
"required": true
},
{
"column_name": "table_id",
"vocabulary": "",
"required": false
},
{
"column_name": "member_id",
"vocabulary": "",
"required": false
},
{
"column_name": "grid_label",
"vocabulary": "",
"required": false
},
{
"column_name": "variable_id",
"vocabulary": "",
"required": true
},
{
"column_name": "time_range",
"vocabulary": "",
"required": true
},
{
"column_name": "chunk_freq",
"required": false
},
{
"column_name":"platform",
"vocabulary": "",
"required": false
},
{
"column_name":"target",
"vocabulary": "",
"required": false
},
{
"column_name": "cell_methods",
"vocabulary": "",
"required": "enhanced"
},
{
"column_name": "path",
"vocabulary": "",
"required": true
},
{
"column_name": "dimensions",
"vocabulary": "",
"required": "enhanced"
},
{
"column_name": "version_id",
"vocabulary": "",
"required": false
},
{
"column_name": "standard_name",
"vocabulary": "",
"required": "enhanced"
}
],
"assets": {
"column_name": "path",
"format": "netcdf",
"format_column_name": null
},
"aggregation_control": {
"variable_column_name": "variable_id",
"groupby_attrs": [
"source_id",
"experiment_id",
"frequency",
"table_id",
"grid_label",
"realm",
"member_id",
"chunk_freq",
"time_range"
],
"aggregations": [
{
"type": "union",
"attribute_name": "variable_id",
"options": {}
},
{
"type": "join_existing",
"attribute_name": "time_range",
"options": {
"dim": "time",
"coords": "minimal",
"compat": "override"
}
}
]
},
"id": "esm_catalog_ESM4",
"description": null,
"title": null,
"last_updated": "2023-05-07T16:35:52Z",
"catalog_file": "gfdl_autotest.csv"
}
5 changes: 5 additions & 0 deletions catalogbuilder/intakebuilder/configparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,9 @@ def __init__(self, config):
print("output_file_template :", self.output_file_template)
except:
raise KeyError("output_file_template does not exist in config")
try:
self.schema = configfile['schema']
print("schema:", self.schema)
except:
raise KeyError("schema does not exist in config")

10 changes: 8 additions & 2 deletions catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")

package_dir = os.path.dirname(os.path.abspath(__file__))
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
#template_path = os.path.join(package_dir, '../cats/gfdl_template.json')

def create_catalog(input_path=None, output_path=None, config=None, filter_realm=None, filter_freq=None, filter_chunk=None,
overwrite=False, append=False, slow = False):
Expand All @@ -42,7 +42,13 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=

input_path = configyaml.input_path
output_path = configyaml.output_path


if configyaml.schema is None or not configyaml.schema:
print("We will use catalog builder catalogbuilder/cats/gfdl_template.json as your json schema")
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
else:
template_path = configyaml.schema
print("Using schema from config file", template_path)
if not os.path.exists(input_path):
sys.exit("Input path does not exist. Adjust configuration.")
if not os.path.exists(Path(output_path).parent.absolute()):
Expand Down
4 changes: 2 additions & 2 deletions catalogbuilder/tests/config-mdtf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ output_file_template: ['realm','time_range','variable_id']

#######################################################

json_template: "/home/a1r/git/forkCatalogBuilder-/catalogbuilder/cats/mdtf-template.json" #if your json schema is slighlty different but vetted with MSD, you may use your json schema here
schema: "/home/a1r/git/forkCatalogBuilder-/catalogbuilder/cats/mdtf_template.json" #if your json schema is slighlty different but vetted with MSD, you may use your json schema here
input_path: "/archive/am5/am5/am5f7b10r0/c96L65_am5f7b10r0_amip/gfdl.ncrc5-deploy-prod-openmp/pp/"
output_path: "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip30" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
output_path: "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip30_test" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)

0 comments on commit 3fc1bc6

Please sign in to comment.