Skip to content

Commit 7c06bd5

Browse files
TRT-571 - Update net2cog to process multiple variables.
1 parent 77b1936 commit 7c06bd5

File tree

4 files changed

+361
-187
lines changed

4 files changed

+361
-187
lines changed

net2cog/netcdf_convert.py

Lines changed: 100 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
import os
1212
import pathlib
1313
import subprocess
14-
import tempfile
1514
from os.path import join as pjoin, basename, dirname, exists, splitext
1615
from subprocess import check_call
16+
from tempfile import TemporaryDirectory
1717
from typing import List
1818

1919
import rasterio
2020
import rioxarray # noqa
2121
import xarray as xr
22+
from harmony_service_lib.util import generate_output_filename
2223
from rasterio import CRS
2324
from rio_cogeo.cogeo import cog_translate
2425
from rio_cogeo.profiles import cog_profiles
@@ -37,46 +38,21 @@ def __init__(self, msg):
3738
super().__init__(msg)
3839

3940

40-
def run_command(command, work_dir):
41-
"""
42-
A simple utility to execute a subprocess command.
43-
"""
44-
try:
45-
out_call = check_call(command, stderr=subprocess.STDOUT, cwd=work_dir)
46-
return out_call
47-
except subprocess.CalledProcessError as err:
48-
LOGGER.error("command '%s' return with error (code %s): %s",
49-
err.cmd, err.returncode, err.output)
50-
raise
51-
52-
53-
def check_dir(fname):
54-
"""
55-
To return filename and path without file extension
56-
"""
57-
file_name = fname.split('/')
58-
rel_path = pjoin(*file_name[-2:])
59-
file_wo_extension, _ = splitext(rel_path)
60-
return file_wo_extension
41+
def _rioxr_swapdims(netcdf_xarray):
42+
netcdf_xarray.coords['y'] = ('lat', netcdf_xarray.lat)
43+
netcdf_xarray.coords['x'] = ('lon', netcdf_xarray.lon)
6144

45+
return netcdf_xarray.swap_dims({'lat': 'y', 'lon': 'x'})
6246

63-
def get_gtiff_name(output_file):
64-
"""
65-
To create tmp filename to convert to COG and create a filename
66-
just as source but without '.TIF' extension
67-
"""
68-
outf = os.path.basename(output_file)
69-
dir_path = dirname(output_file)
70-
rel_path = check_dir(outf)
71-
out_fname = pjoin(dir_path, rel_path)
72-
if not exists(out_fname):
73-
os.makedirs(out_fname)
74-
return pjoin(out_fname, rel_path)
7547

76-
77-
def _write_cogtiff(out_f_name, nc_xarray):
48+
def _write_cogtiff(
49+
output_directory: str,
50+
nc_xarray: xr.Dataset,
51+
variable_name: str,
52+
input_filename: str
53+
) -> str | None:
7854
"""
79-
This function converts each variable inside a NetCDF file into a
55+
This function converts a variable inside a NetCDF file into a
8056
cloud optimized geotiff.
8157
8258
Parameters
@@ -94,81 +70,89 @@ def _write_cogtiff(out_f_name, nc_xarray):
9470
Assumption that 0 is always on the prime meridian/equator.
9571
"""
9672

97-
cogs_generated = []
98-
with tempfile.TemporaryDirectory() as tempdir:
99-
100-
# variables in netcdf
101-
for var in nc_xarray.variables:
102-
if var in EXCLUDE_VARS:
103-
continue
104-
LOGGER.debug("NetCDF Var: %s", var)
105-
106-
def rioxr_swapdims(netcdf_xarray):
107-
netcdf_xarray.coords['y'] = ('lat', netcdf_xarray.lat)
108-
netcdf_xarray.coords['x'] = ('lon', netcdf_xarray.lon)
109-
110-
return netcdf_xarray.swap_dims({'lat': 'y', 'lon': 'x'})
111-
112-
# copy to a tempfolder
113-
out_fname = out_f_name + '_' + var + '.tif'
114-
temp_fname = pjoin(tempdir, basename(out_fname))
115-
73+
LOGGER.debug("NetCDF Var: %s", variable_name)
74+
75+
if variable_name in EXCLUDE_VARS:
76+
LOGGER.debug(f"Variable {variable_name} is excluded. Will not produce COG")
77+
return None
78+
79+
output_basename = generate_output_filename(
80+
input_filename,
81+
ext='tif',
82+
variable_subset=[variable_name],
83+
is_reformatted=True,
84+
)
85+
output_file_name = output_directory.pathjoin(output_basename)
86+
87+
with TemporaryDirectory() as tempdir:
88+
temp_file_name = os.path.join(tempdir, output_basename)
89+
90+
# copy to a tempfolder
91+
# out_fname = out_f_name + '_' + var + '.tif'
92+
# temp_fname = pjoin(tempdir, basename(out_fname))
93+
94+
try:
95+
nc_xarray[variable_name].rio.to_raster(temp_file_name)
96+
except LookupError as err:
97+
LOGGER.info("Variable %s cannot be converted to tif: %s", variable_name, err)
98+
return None
99+
except DimensionError as dmerr:
116100
try:
117-
nc_xarray[var].rio.to_raster(temp_fname)
118-
except LookupError as err:
119-
LOGGER.info("Variable %s cannot be converted to tif: %s", var, err)
120-
continue
121-
except DimensionError as dmerr:
122-
try:
123-
LOGGER.info("%s: No x or y xarray dimensions, adding them...", dmerr)
124-
nc_xarray_tmp = rioxr_swapdims(nc_xarray)
125-
nc_xarray_tmp[var].rio.to_raster(temp_fname)
126-
except RuntimeError as runerr:
127-
LOGGER.info("Variable %s cannot be converted to tif: %s", var, runerr)
128-
continue
129-
except Exception as aerr: # pylint: disable=broad-except
130-
LOGGER.info("Variable %s cannot be converted to tif: %s", var, aerr)
131-
continue
132-
133-
# Option to add additional GDAL config settings
134-
# config = dict(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_OVR_BLOCKSIZE="128")
135-
# with rasterio.Env(**config):
136-
137-
LOGGER.info("Starting conversion... %s", out_fname)
138-
139-
# default CRS setting
140-
# crs = rasterio.crs.CRS({"init": "epsg:3857"})
141-
142-
with rasterio.open(temp_fname, mode='r+') as src_dataset:
143-
# if src_dst.crs is None:
144-
# src_dst.crs = crs
145-
src_dataset.crs = CRS.from_proj4(proj="+proj=latlong")
146-
dst_profile = cog_profiles.get("deflate")
147-
cog_translate(src_dataset, out_fname, dst_profile, use_cog_driver=True)
148-
149-
cogs_generated.append(out_fname)
150-
LOGGER.info("Finished conversion, writing variable: %s", out_fname)
151-
LOGGER.info("NetCDF conversion complete. Returning COGs generated.")
152-
return cogs_generated
153-
154-
155-
def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.Path, var_list: list = ()) -> List[str]:
156-
"""
157-
Primary function for beginning NetCDF conversion using rasterio,
101+
LOGGER.info("%s: No x or y xarray dimensions, adding them...", dmerr)
102+
nc_xarray_tmp = _rioxr_swapdims(nc_xarray)
103+
nc_xarray_tmp[variable_name].rio.to_raster(temp_file_name)
104+
except RuntimeError as runerr:
105+
LOGGER.info("Variable %s cannot be converted to tif: %s", variable_name, runerr)
106+
return None
107+
except Exception as aerr: # pylint: disable=broad-except
108+
LOGGER.info("Variable %s cannot be converted to tif: %s", variable_name, aerr)
109+
return None
110+
111+
# Option to add additional GDAL config settings
112+
# config = dict(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_OVR_BLOCKSIZE="128")
113+
# with rasterio.Env(**config):
114+
115+
LOGGER.info("Starting conversion... %s", output_file_name)
116+
117+
# default CRS setting
118+
# crs = rasterio.crs.CRS({"init": "epsg:3857"})
119+
120+
with rasterio.open(temp_file_name, mode='r+') as src_dataset:
121+
# if src_dst.crs is None:
122+
# src_dst.crs = crs
123+
src_dataset.crs = CRS.from_proj4(proj="+proj=latlong")
124+
dst_profile = cog_profiles.get("deflate")
125+
cog_translate(
126+
src_dataset,
127+
output_file_name,
128+
dst_profile,
129+
use_cog_driver=True
130+
)
131+
132+
LOGGER.info("Finished conversion, writing variable: %s", output_file_name)
133+
LOGGER.info("NetCDF conversion complete. Returning COG generated.")
134+
return output_file_name
135+
136+
137+
def netcdf_converter(
138+
input_nc_file: pathlib.Path,
139+
output_directory: pathlib.Path,
140+
var_list: list[str] | None
141+
) -> List[str]:
142+
"""Primary function for beginning NetCDF conversion using rasterio,
158143
rioxarray and xarray
159144
160145
Parameters
161146
----------
162147
input_nc_file : pathlib.Path
163148
Path to NetCDF file to process
164-
output_cog_pathname : pathlib.Path
165-
COG Output path and NetCDF filename, filename converted to cog variable
166-
filename (.tif)
167-
ex: tests/data/tmpygj2vgxf/
168-
RSS_smap_SSS_L3_8day_running_2020_005_FNL_v04.0.nc
169-
var_list : list
149+
output_directory : pathlib.Path
150+
Path to temporary directory into which results will be placed before
151+
staging in S3.
152+
var_list : str | None
170153
List of variable names to be converted to various single band cogs,
171-
ex: ['gland', 'fland', 'sss_smap']
154+
ex: ['gland', 'fland', 'sss_smap']. If a Harmony request asks for "all"
155+
variables, the input value will be None.
172156
173157
Notes
174158
-----
@@ -178,11 +162,8 @@ def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.P
178162
netcdf_file = os.path.abspath(input_nc_file)
179163
LOGGER.debug('NetCDF Path: %s', netcdf_file)
180164

181-
gtiff_fname = get_gtiff_name(output_cog_pathname)
182-
183165
if netcdf_file.endswith('.nc'):
184166
LOGGER.info("Reading %s", basename(netcdf_file))
185-
LOGGER.info('Tmp GTiff filename: %s', gtiff_fname)
186167

187168
xds = xr.open_dataset(netcdf_file)
188169

@@ -192,13 +173,18 @@ def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.P
192173
or ({"x", "y"}.issubset(set(xds.dims)))):
193174
# used to invert y axis
194175
# xds_reversed = xds.reindex(lat=xds.lat[::-1])
195-
LOGGER.info("Writing COG to %s", basename(gtiff_fname))
196-
if var_list:
197-
try:
198-
xds = xds[var_list]
199-
except KeyError as error:
200-
raise Net2CogError(f"Variable {error} not found in dataset") from error
201-
return _write_cogtiff(gtiff_fname, xds)
176+
177+
if var_list is None:
178+
var_list = list(xds.data_vars.keys())
179+
180+
try:
181+
return [
182+
_write_cogtiff(output_directory, xds, variable_name, input_nc_file)
183+
for variable_name in var_list
184+
]
185+
except KeyError as error:
186+
raise Net2CogError(f"Variable {error} not found in dataset") from error
187+
202188
LOGGER.error("%s: NetCDF file does not contain spatial dimensions such as lat / lon "
203189
"or x / y", netcdf_file)
204190
return []

0 commit comments

Comments
 (0)