Skip to content

Commit 3fc1bc6

Browse files
introducing schema in config yaml so that user can supply a different schema vetted by MSD.
This is useful for MDTF as MDTF may need tiny tweaks until we can stablize and align completetely
1 parent f138d94 commit 3fc1bc6

File tree

4 files changed

+154
-4
lines changed

4 files changed

+154
-4
lines changed
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
{
2+
"esmcat_version": "0.0.1",
3+
"attributes": [
4+
{
5+
"column_name": "activity_id",
6+
"vocabulary": "",
7+
"required": false
8+
},
9+
{
10+
"column_name": "institution_id",
11+
"vocabulary": "",
12+
"required": false
13+
},
14+
{
15+
"column_name": "source_id",
16+
"vocabulary": "",
17+
"required": false
18+
},
19+
{
20+
"column_name": "experiment_id",
21+
"vocabulary": "",
22+
"required": true
23+
},
24+
{
25+
"column_name": "frequency",
26+
"vocabulary": "https://raw.githubusercontent.com/NOAA-GFDL/CMIP6_CVs/master/CMIP6_frequency.json",
27+
"required": true
28+
},
29+
{
30+
"column_name": "realm",
31+
"vocabulary": "",
32+
"required": true
33+
},
34+
{
35+
"column_name": "table_id",
36+
"vocabulary": "",
37+
"required": false
38+
},
39+
{
40+
"column_name": "member_id",
41+
"vocabulary": "",
42+
"required": false
43+
},
44+
{
45+
"column_name": "grid_label",
46+
"vocabulary": "",
47+
"required": false
48+
},
49+
{
50+
"column_name": "variable_id",
51+
"vocabulary": "",
52+
"required": true
53+
},
54+
{
55+
"column_name": "time_range",
56+
"vocabulary": "",
57+
"required": true
58+
},
59+
{
60+
"column_name": "chunk_freq",
61+
"required": false
62+
},
63+
{
64+
"column_name":"platform",
65+
"vocabulary": "",
66+
"required": false
67+
},
68+
{
69+
"column_name":"target",
70+
"vocabulary": "",
71+
"required": false
72+
},
73+
{
74+
"column_name": "cell_methods",
75+
"vocabulary": "",
76+
"required": "enhanced"
77+
},
78+
{
79+
"column_name": "path",
80+
"vocabulary": "",
81+
"required": true
82+
},
83+
{
84+
"column_name": "dimensions",
85+
"vocabulary": "",
86+
"required": "enhanced"
87+
},
88+
{
89+
"column_name": "version_id",
90+
"vocabulary": "",
91+
"required": false
92+
},
93+
{
94+
"column_name": "standard_name",
95+
"vocabulary": "",
96+
"required": "enhanced"
97+
}
98+
],
99+
"assets": {
100+
"column_name": "path",
101+
"format": "netcdf",
102+
"format_column_name": null
103+
},
104+
"aggregation_control": {
105+
"variable_column_name": "variable_id",
106+
"groupby_attrs": [
107+
"source_id",
108+
"experiment_id",
109+
"frequency",
110+
"table_id",
111+
"grid_label",
112+
"realm",
113+
"member_id",
114+
"chunk_freq",
115+
"time_range"
116+
],
117+
"aggregations": [
118+
{
119+
"type": "union",
120+
"attribute_name": "variable_id",
121+
"options": {}
122+
},
123+
{
124+
"type": "join_existing",
125+
"attribute_name": "time_range",
126+
"options": {
127+
"dim": "time",
128+
"coords": "minimal",
129+
"compat": "override"
130+
}
131+
}
132+
]
133+
},
134+
"id": "esm_catalog_ESM4",
135+
"description": null,
136+
"title": null,
137+
"last_updated": "2023-05-07T16:35:52Z",
138+
"catalog_file": "gfdl_autotest.csv"
139+
}

catalogbuilder/intakebuilder/configparser.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,9 @@ def __init__(self, config):
3030
print("output_file_template :", self.output_file_template)
3131
except:
3232
raise KeyError("output_file_template does not exist in config")
33+
try:
34+
self.schema = configfile['schema']
35+
print("schema:", self.schema)
36+
except:
37+
raise KeyError("schema does not exist in config")
3338

catalogbuilder/scripts/gen_intake_gfdl.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
2828

2929
package_dir = os.path.dirname(os.path.abspath(__file__))
30-
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
30+
#template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
3131

3232
def create_catalog(input_path=None, output_path=None, config=None, filter_realm=None, filter_freq=None, filter_chunk=None,
3333
overwrite=False, append=False, slow = False):
@@ -42,7 +42,13 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=
4242

4343
input_path = configyaml.input_path
4444
output_path = configyaml.output_path
45-
45+
46+
if configyaml.schema is None or not configyaml.schema:
47+
print("We will use catalog builder catalogbuilder/cats/gfdl_template.json as your json schema")
48+
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')
49+
else:
50+
template_path = configyaml.schema
51+
print("Using schema from config file", template_path)
4652
if not os.path.exists(input_path):
4753
sys.exit("Input path does not exist. Adjust configuration.")
4854
if not os.path.exists(Path(output_path).parent.absolute()):

catalogbuilder/tests/config-mdtf.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@ output_file_template: ['realm','time_range','variable_id']
3737

3838
#######################################################
3939

40-
json_template: "/home/a1r/git/forkCatalogBuilder-/catalogbuilder/cats/mdtf-template.json" #if your json schema is slighlty different but vetted with MSD, you may use your json schema here
40+
schema: "/home/a1r/git/forkCatalogBuilder-/catalogbuilder/cats/mdtf_template.json" #if your json schema is slighlty different but vetted with MSD, you may use your json schema here
4141
input_path: "/archive/am5/am5/am5f7b10r0/c96L65_am5f7b10r0_amip/gfdl.ncrc5-deploy-prod-openmp/pp/"
42-
output_path: "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip30" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
42+
output_path: "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip30_test" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)

0 commit comments

Comments
 (0)