@@ -69,7 +69,7 @@ def build_esm(
69
69
** kwargs ,
70
70
):
71
71
"""
72
- Build an Intake-ESM datastore
72
+ Build an Intake-ESM datastore and add it to the catalog
73
73
74
74
Parameters
75
75
----------
@@ -81,98 +81,99 @@ def build_esm(
81
81
The builder to use to build the Intake-ESM datastore
82
82
path: str or list of str
83
83
Path or list of paths to crawl for assets/files to add to the Intake-ESM datastore.
84
- translator: :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator`
84
+ translator: :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator`, optional
85
85
An instance of the :py:class:`~access_nri_intake.catalog.translators.DefaultTranslator` class
86
86
for translating info in the Intake-ESM datastore into intake-dataframe-catalog column metadata.
87
- Defaults to access_nri_intake.catalog.translators.DefaultTranslator.
87
+ Defaults to access_nri_intake.catalog.translators.DefaultTranslator
88
88
metadata: dict, optional
89
89
Additional info to store in the intake cat.metadata attribute. This info will be available
90
90
to the translator and to users of the Intake-ESM datastore
91
91
directory: str
92
92
The directory to save the Intake-ESM datastore to. If None, use the current directory
93
93
overwrite: bool, optional
94
- Whether to overwrite any existing entries in the catalog with the same name
94
+ Whether to overwrite if an Intake-ESM datastore with the same name already exists
95
95
kwargs: dict
96
96
Additional kwargs to pass to the builder
97
97
"""
98
98
99
99
metadata = metadata or {}
100
+ directory = directory or ""
100
101
101
102
json_file = os .path .abspath (f"{ os .path .join (directory , name )} .json" )
102
103
if os .path .isfile (json_file ):
103
104
if not overwrite :
104
105
raise CatalogManagerError (
105
106
f"An Intake-ESM datastore already exists for { name } . To overwrite, "
106
- "pass `overwrite=True` to CatalogBuilder.build "
107
+ "pass `overwrite=True` to CatalogBuilder.build_esm "
107
108
)
108
109
109
110
builder = builder (path , ** kwargs ).build ()
110
111
builder .save (name = name , description = description , directory = directory )
111
112
112
113
self .source , self .source_metadata = _open_and_translate (
113
114
json_file ,
115
+ "esm_datastore" ,
114
116
name ,
115
117
description ,
116
118
metadata ,
117
119
translator ,
118
120
columns_with_iterables = list (builder .columns_with_iterables ),
119
121
)
120
122
121
- return self
123
+ self . _add ()
122
124
123
125
def load (
124
126
self ,
125
127
name ,
126
128
description ,
127
129
path ,
128
- translator ,
130
+ driver = "esm_datastore" ,
131
+ translator = DefaultTranslator ,
129
132
metadata = None ,
130
133
** kwargs ,
131
134
):
132
135
"""
133
- Load an existing intake catalog and add it to the catalog
136
+ Load an existing data source using Intake and add it to the catalog
134
137
135
138
Parameters
136
139
----------
137
140
name: str
138
- The name of the catalog
141
+ The name of the data source
139
142
description: str
140
- Description of the contents of the catalog
143
+ Description of the contents of the data source
141
144
path: str
142
- The path to the intake-esm catalog JSON file
143
- translator: :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator`
145
+ The path to the Intake data source
146
+ driver: str
147
+ The name of the Intake driver to use to open the data source
148
+ translator: :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator`, optional
144
149
An instance of the :py:class:`~access_nri_catalog.metacat.translators.DefaultTranslator` class for
145
- translating intake-esm column metadata into intake-dataframe-catalog column metadata
150
+ translating data source metadata into intake-dataframe-catalog column metadata. Defaults to
151
+ access_nri_intake.catalog.translators.DefaultTranslator
146
152
metadata: dict, optional
147
- Additional info to store in the intake cat. metadata attribute. This info will be available to
148
- the translator and to users of the catalog
153
+ Additional info to store in the intake metadata attribute for this data source . This info will be
154
+ available to the translator and to users of the catalog
149
155
kwargs: dict, optional
150
- Additional kwargs to pass to :py:class:`~intake.open_esm_datastore `
156
+ Additional kwargs to pass to :py:class:`~intake.open_<driver> `
151
157
"""
152
158
153
159
if isinstance (path , list ):
154
160
if len (path ) != 1 :
155
- raise ValueError (
156
- "Only a single JSON file can be passed to CatalogManager.load_esm . Received {len(path)}"
161
+ raise CatalogManagerError (
162
+ f "Only a single data source can be passed to CatalogManager.load . Received { len (path )} "
157
163
)
158
164
path = path [0 ]
159
165
160
166
metadata = metadata or {}
161
167
162
168
self .source , self .source_metadata = _open_and_translate (
163
- path , name , description , metadata , translator , ** kwargs
169
+ path , driver , name , description , metadata , translator , ** kwargs
164
170
)
165
171
166
- return self
172
+ self . _add ()
167
173
168
- def add (self , ** kwargs ):
174
+ def _add (self ):
169
175
"""
170
176
Add a source to the catalog
171
-
172
- Parameters
173
- ----------
174
- kwargs: dict, optional
175
- Additional keyword arguments passed to :py:func:`~pandas.DataFrame.to_csv`.
176
177
"""
177
178
178
179
if self .source is None :
@@ -183,7 +184,7 @@ def add(self, **kwargs):
183
184
# Overwrite the catalog name with the name_column entry in metadata
184
185
name = self .source_metadata [NAME_COLUMN ].unique ()
185
186
if len (name ) != 1 :
186
- raise ValueError (
187
+ raise CatalogManagerError (
187
188
f"Metadata column '{ NAME_COLUMN } ' must be the same for all rows in source_metadata "
188
189
"since this corresponds to the source name"
189
190
)
@@ -199,19 +200,31 @@ def add(self, **kwargs):
199
200
self .dfcat .add (self .source , row .to_dict (), overwrite = overwrite )
200
201
overwrite = False
201
202
203
+ def save (self , ** kwargs ):
204
+ """
205
+ Save the catalog
206
+
207
+ Parameters
208
+ ----------
209
+ kwargs: dict, optional
210
+ Additional keyword arguments passed to :py:func:`~pandas.DataFrame.to_csv`.
211
+ """
202
212
self .dfcat .save (** kwargs )
203
213
204
214
205
- def _open_and_translate (json_file , name , description , metadata , translator , ** kwargs ):
215
+ def _open_and_translate (
216
+ file , driver , name , description , metadata , translator , ** kwargs
217
+ ):
206
218
"""
207
- Open an esm-datastore , assign name, description and metadata attrs and
219
+ Open an Intake data source , assign name, description and metadata attrs and
208
220
translate using the provided translator
209
221
"""
210
- cat = intake .open_esm_datastore (json_file , ** kwargs )
211
- cat .name = name
212
- cat .description = description
213
- cat .metadata = metadata
222
+ open_ = getattr (intake , f"open_{ driver } " )
223
+ source = open_ (file , ** kwargs )
224
+ source .name = name
225
+ source .description = description
226
+ source .metadata = metadata
214
227
215
- metadata = translator (cat , CORE_COLUMNS ).translate (TRANSLATOR_GROUPBY_COLUMNS )
228
+ metadata = translator (source , CORE_COLUMNS ).translate (TRANSLATOR_GROUPBY_COLUMNS )
216
229
217
- return cat , metadata
230
+ return source , metadata
0 commit comments