Skip to content

Commit f0f857b

Browse files
committed
Merge branch 'main' into 127_OM2_ERA5
2 parents d493ff5 + f83a7bb commit f0f857b

File tree

4 files changed

+77
-48
lines changed

4 files changed

+77
-48
lines changed

.readthedocs.yml

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
version: 2
22

3+
build:
4+
os: "ubuntu-22.04"
5+
tools:
6+
python: "mambaforge-22.9"
7+
38
sphinx:
49
configuration: docs/conf.py
510

src/access_nri_intake/catalog/manager.py

+48-35
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def build_esm(
6969
**kwargs,
7070
):
7171
"""
72-
Build an Intake-ESM datastore
72+
Build an Intake-ESM datastore and add it to the catalog
7373
7474
Parameters
7575
----------
@@ -81,98 +81,99 @@ def build_esm(
8181
The builder to use to build the Intake-ESM datastore
8282
path: str or list of str
8383
Path or list of paths to crawl for assets/files to add to the Intake-ESM datastore.
84-
translator: :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator`
84+
translator: :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator`, optional
8585
An instance of the :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator` class
8686
for translating info in the Intake-ESM datastore into intake-dataframe-catalog column metadata.
87-
Defaults to access_nri_intake.catalog.translators.DefaultTranslator.
87+
Defaults to access_nri_intake.catalog.translators.DefaultTranslator
8888
metadata: dict, optional
8989
Additional info to store in the intake cat.metadata attribute. This info will be available
9090
to the translator and to users of the Intake-ESM datastore
9191
directory: str
9292
The directory to save the Intake-ESM datastore to. If None, use the current directory
9393
overwrite: bool, optional
94-
Whether to overwrite any existing entries in the catalog with the same name
94+
Whether to overwrite if an Intake-ESM datastore with the same name already exists
9595
kwargs: dict
9696
Additional kwargs to pass to the builder
9797
"""
9898

9999
metadata = metadata or {}
100+
directory = directory or ""
100101

101102
json_file = os.path.abspath(f"{os.path.join(directory, name)}.json")
102103
if os.path.isfile(json_file):
103104
if not overwrite:
104105
raise CatalogManagerError(
105106
f"An Intake-ESM datastore already exists for {name}. To overwrite, "
106-
"pass `overwrite=True` to CatalogBuilder.build"
107+
"pass `overwrite=True` to CatalogBuilder.build_esm"
107108
)
108109

109110
builder = builder(path, **kwargs).build()
110111
builder.save(name=name, description=description, directory=directory)
111112

112113
self.source, self.source_metadata = _open_and_translate(
113114
json_file,
115+
"esm_datastore",
114116
name,
115117
description,
116118
metadata,
117119
translator,
118120
columns_with_iterables=list(builder.columns_with_iterables),
119121
)
120122

121-
return self
123+
self._add()
122124

123125
def load(
124126
self,
125127
name,
126128
description,
127129
path,
128-
translator,
130+
driver="esm_datastore",
131+
translator=DefaultTranslator,
129132
metadata=None,
130133
**kwargs,
131134
):
132135
"""
133-
Load an existing intake catalog and add it to the catalog
136+
Load an existing data source using Intake and add it to the catalog
134137
135138
Parameters
136139
----------
137140
name: str
138-
The name of the catalog
141+
The name of the data source
139142
description: str
140-
Description of the contents of the catalog
143+
Description of the contents of the data source
141144
path: str
142-
The path to the intake-esm catalog JSON file
143-
translator: :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator`
145+
The path to the Intake data source
146+
driver: str
147+
The name of the Intake driver to use to open the data source
148+
translator: :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator`, optional
144149
An instance of the :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator` class for
145-
translating intake-esm column metadata into intake-dataframe-catalog column metadata
150+
translating data source metadata into intake-dataframe-catalog column metadata. Defaults to
151+
access_nri_intake.catalog.translators.DefaultTranslator
146152
metadata: dict, optional
147-
Additional info to store in the intake cat.metadata attribute. This info will be available to
148-
the translator and to users of the catalog
153+
Additional info to store in the intake metadata attribute for this data source. This info will be
154+
available to the translator and to users of the catalog
149155
kwargs: dict, optional
150-
Additional kwargs to pass to :py:class:`~intake.open_esm_datastore`
156+
Additional kwargs to pass to :py:class:`~intake.open_<driver>`
151157
"""
152158

153159
if isinstance(path, list):
154160
if len(path) != 1:
155-
raise ValueError(
156-
"Only a single JSON file can be passed to CatalogManager.load_esm. Received {len(path)}"
161+
raise CatalogManagerError(
162+
f"Only a single data source can be passed to CatalogManager.load. Received {len(path)}"
157163
)
158164
path = path[0]
159165

160166
metadata = metadata or {}
161167

162168
self.source, self.source_metadata = _open_and_translate(
163-
path, name, description, metadata, translator, **kwargs
169+
path, driver, name, description, metadata, translator, **kwargs
164170
)
165171

166-
return self
172+
self._add()
167173

168-
def add(self, **kwargs):
174+
def _add(self):
169175
"""
170176
Add a source to the catalog
171-
172-
Parameters
173-
----------
174-
kwargs: dict, optional
175-
Additional keyword arguments passed to :py:func:`~pandas.DataFrame.to_csv`.
176177
"""
177178

178179
if self.source is None:
@@ -183,7 +184,7 @@ def add(self, **kwargs):
183184
# Overwrite the catalog name with the name_column entry in metadata
184185
name = self.source_metadata[NAME_COLUMN].unique()
185186
if len(name) != 1:
186-
raise ValueError(
187+
raise CatalogManagerError(
187188
f"Metadata column '{NAME_COLUMN}' must be the same for all rows in source_metadata "
188189
"since this corresponds to the source name"
189190
)
@@ -199,19 +200,31 @@ def add(self, **kwargs):
199200
self.dfcat.add(self.source, row.to_dict(), overwrite=overwrite)
200201
overwrite = False
201202

203+
def save(self, **kwargs):
204+
"""
205+
Save the catalog
206+
207+
Parameters
208+
----------
209+
kwargs: dict, optional
210+
Additional keyword arguments passed to :py:func:`~pandas.DataFrame.to_csv`.
211+
"""
202212
self.dfcat.save(**kwargs)
203213

204214

205-
def _open_and_translate(json_file, name, description, metadata, translator, **kwargs):
215+
def _open_and_translate(
216+
file, driver, name, description, metadata, translator, **kwargs
217+
):
206218
"""
207-
Open an esm-datastore, assign name, description and metadata attrs and
219+
Open an Intake data source, assign name, description and metadata attrs and
208220
translate using the provided translator
209221
"""
210-
cat = intake.open_esm_datastore(json_file, **kwargs)
211-
cat.name = name
212-
cat.description = description
213-
cat.metadata = metadata
222+
open_ = getattr(intake, f"open_{driver}")
223+
source = open_(file, **kwargs)
224+
source.name = name
225+
source.description = description
226+
source.metadata = metadata
214227

215-
metadata = translator(cat, CORE_COLUMNS).translate(TRANSLATOR_GROUPBY_COLUMNS)
228+
metadata = translator(source, CORE_COLUMNS).translate(TRANSLATOR_GROUPBY_COLUMNS)
216229

217-
return cat, metadata
230+
return source, metadata

src/access_nri_intake/cli.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,13 @@ def _get_project(path):
188188
storage_flags = "+".join(sorted([f"gdata/{proj}" for proj in project]))
189189

190190
# Build the catalog
191+
cm = CatalogManager(path=metacatalog_path)
191192
for method, args in parsed_sources:
192-
man = CatalogManager(path=metacatalog_path)
193193
logger.info(f"Adding '{args['name']}' to metacatalog '{metacatalog_path}'")
194-
getattr(man, method)(**args).add()
194+
getattr(cm, method)(**args)
195195

196196
# Write catalog yaml file
197-
cat = man.dfcat
197+
cat = cm.dfcat
198198
cat.name = "access_nri"
199199
cat.description = "ACCESS-NRI intake catalog"
200200
yaml_dict = yaml.safe_load(cat.yaml())
@@ -211,6 +211,9 @@ def _get_project(path):
211211
"version": {"description": "Catalog version", "type": "str", "default": version}
212212
}
213213

214+
# Save the catalog
215+
cm.save()
216+
214217
_here = os.path.abspath(os.path.dirname(__file__))
215218
if update:
216219
with open(os.path.join(_here, "data", "catalog.yaml"), "w") as fobj:

tests/test_manager.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_CatalogManager_init(tmp_path):
2525
assert hasattr(cat, "dfcat")
2626

2727
with pytest.raises(CatalogManagerError) as excinfo:
28-
cat.add()
28+
cat._add()
2929
assert "first load or build the source" in str(excinfo.value)
3030

3131

@@ -55,16 +55,17 @@ def test_CatalogManager_build_esm(tmp_path, test_data, builder, basedir, kwargs)
5555
directory=str(tmp_path),
5656
**kwargs,
5757
)
58-
cat.build_esm(**args).add()
58+
cat.build_esm(**args)
5959

6060
# Try to rebuild without setting overwrite
6161
with pytest.raises(CatalogManagerError) as excinfo:
6262
cat.build_esm(**args)
6363
assert "An Intake-ESM datastore already exists" in str(excinfo.value)
6464

6565
# Overwrite
66-
cat.build_esm(**args, overwrite=True).add()
66+
cat.build_esm(**args, overwrite=True)
6767

68+
cat.save()
6869
cat = CatalogManager(path)
6970
assert cat.mode == "a"
7071

@@ -89,7 +90,8 @@ def test_CatalogManager_load(tmp_path, test_data, translator, datastore, metadat
8990
translator=translator,
9091
metadata=metadata,
9192
)
92-
cat.load(**args).add()
93+
cat.load(**args)
94+
cat.save()
9395

9496
cat = CatalogManager(path)
9597
assert cat.mode == "a"
@@ -110,9 +112,9 @@ def test_CatalogManager_load_error(tmp_path, test_data):
110112
cat.load(**args, path=[path])
111113

112114
# Test fails when len > 1
113-
with pytest.raises(ValueError) as excinfo:
115+
with pytest.raises(CatalogManagerError) as excinfo:
114116
cat.load(**args, path=[path, path])
115-
assert "Only a single JSON file" in str(excinfo.value)
117+
assert "Only a single data source" in str(excinfo.value)
116118

117119

118120
def test_CatalogManager_all(tmp_path, test_data):
@@ -129,7 +131,10 @@ def test_CatalogManager_all(tmp_path, test_data):
129131
)
130132
cat.load(
131133
**load_args,
132-
).add()
134+
)
135+
assert len(cat.dfcat) == 1
136+
cat.save()
137+
assert len(CatalogManager(path).dfcat) == 1
133138

134139
# Build source
135140
cat.build_esm(
@@ -143,12 +148,15 @@ def test_CatalogManager_all(tmp_path, test_data):
143148
]
144149
),
145150
directory=str(tmp_path),
146-
).add()
147-
151+
)
152+
# Still only one entry on disk
148153
assert len(cat.dfcat) == 2
154+
assert len(CatalogManager(path).dfcat) == 1
149155

150156
# Check that entry with same name overwrites correctly
151157
cat.load(
152158
**load_args,
153-
).add()
159+
)
154160
assert len(cat.dfcat) == 2
161+
cat.save()
162+
assert len(CatalogManager(path).dfcat) == 2

0 commit comments

Comments
 (0)