Skip to content

Commit

Permalink
TRT-571 - Update net2cog to process multiple variables.
Browse files Browse the repository at this point in the history
  • Loading branch information
owenlittlejohns committed Jan 24, 2025
1 parent 77b1936 commit 7c06bd5
Show file tree
Hide file tree
Showing 4 changed files with 361 additions and 187 deletions.
214 changes: 100 additions & 114 deletions net2cog/netcdf_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
import os
import pathlib
import subprocess
import tempfile
from os.path import join as pjoin, basename, dirname, exists, splitext
from subprocess import check_call
from tempfile import TemporaryDirectory
from typing import List

import rasterio
import rioxarray # noqa
import xarray as xr
from harmony_service_lib.util import generate_output_filename
from rasterio import CRS
from rio_cogeo.cogeo import cog_translate
from rio_cogeo.profiles import cog_profiles
Expand All @@ -37,46 +38,21 @@ def __init__(self, msg):
super().__init__(msg)


def run_command(command, work_dir):
"""
A simple utility to execute a subprocess command.
"""
try:
out_call = check_call(command, stderr=subprocess.STDOUT, cwd=work_dir)
return out_call
except subprocess.CalledProcessError as err:
LOGGER.error("command '%s' return with error (code %s): %s",
err.cmd, err.returncode, err.output)
raise


def check_dir(fname):
"""
To return filename and path without file extension
"""
file_name = fname.split('/')
rel_path = pjoin(*file_name[-2:])
file_wo_extension, _ = splitext(rel_path)
return file_wo_extension
def _rioxr_swapdims(netcdf_xarray):
netcdf_xarray.coords['y'] = ('lat', netcdf_xarray.lat)
netcdf_xarray.coords['x'] = ('lon', netcdf_xarray.lon)

return netcdf_xarray.swap_dims({'lat': 'y', 'lon': 'x'})

def get_gtiff_name(output_file):
"""
To create tmp filename to convert to COG and create a filename
just as source but without '.TIF' extension
"""
outf = os.path.basename(output_file)
dir_path = dirname(output_file)
rel_path = check_dir(outf)
out_fname = pjoin(dir_path, rel_path)
if not exists(out_fname):
os.makedirs(out_fname)
return pjoin(out_fname, rel_path)


def _write_cogtiff(out_f_name, nc_xarray):
def _write_cogtiff(
output_directory: str,
nc_xarray: xr.Dataset,
variable_name: str,
input_filename: str
) -> str | None:
"""
This function converts each variable inside a NetCDF file into a
This function converts a variable inside a NetCDF file into a
cloud optimized geotiff.
Parameters
Expand All @@ -94,81 +70,89 @@ def _write_cogtiff(out_f_name, nc_xarray):
Assumption that 0 is always on the prime meridian/equator.
"""

cogs_generated = []
with tempfile.TemporaryDirectory() as tempdir:

# variables in netcdf
for var in nc_xarray.variables:
if var in EXCLUDE_VARS:
continue
LOGGER.debug("NetCDF Var: %s", var)

def rioxr_swapdims(netcdf_xarray):
netcdf_xarray.coords['y'] = ('lat', netcdf_xarray.lat)
netcdf_xarray.coords['x'] = ('lon', netcdf_xarray.lon)

return netcdf_xarray.swap_dims({'lat': 'y', 'lon': 'x'})

# copy to a tempfolder
out_fname = out_f_name + '_' + var + '.tif'
temp_fname = pjoin(tempdir, basename(out_fname))

LOGGER.debug("NetCDF Var: %s", variable_name)

if variable_name in EXCLUDE_VARS:
LOGGER.debug(f"Variable {variable_name} is excluded. Will not produce COG")
return None

output_basename = generate_output_filename(
input_filename,
ext='tif',
variable_subset=[variable_name],
is_reformatted=True,
)
output_file_name = output_directory.pathjoin(output_basename)

with TemporaryDirectory() as tempdir:
temp_file_name = os.path.join(tempdir, output_basename)

# copy to a tempfolder
# out_fname = out_f_name + '_' + var + '.tif'
# temp_fname = pjoin(tempdir, basename(out_fname))

try:
nc_xarray[variable_name].rio.to_raster(temp_file_name)
except LookupError as err:
LOGGER.info("Variable %s cannot be converted to tif: %s", variable_name, err)
return None
except DimensionError as dmerr:
try:
nc_xarray[var].rio.to_raster(temp_fname)
except LookupError as err:
LOGGER.info("Variable %s cannot be converted to tif: %s", var, err)
continue
except DimensionError as dmerr:
try:
LOGGER.info("%s: No x or y xarray dimensions, adding them...", dmerr)
nc_xarray_tmp = rioxr_swapdims(nc_xarray)
nc_xarray_tmp[var].rio.to_raster(temp_fname)
except RuntimeError as runerr:
LOGGER.info("Variable %s cannot be converted to tif: %s", var, runerr)
continue
except Exception as aerr: # pylint: disable=broad-except
LOGGER.info("Variable %s cannot be converted to tif: %s", var, aerr)
continue

# Option to add additional GDAL config settings
# config = dict(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_OVR_BLOCKSIZE="128")
# with rasterio.Env(**config):

LOGGER.info("Starting conversion... %s", out_fname)

# default CRS setting
# crs = rasterio.crs.CRS({"init": "epsg:3857"})

with rasterio.open(temp_fname, mode='r+') as src_dataset:
# if src_dst.crs is None:
# src_dst.crs = crs
src_dataset.crs = CRS.from_proj4(proj="+proj=latlong")
dst_profile = cog_profiles.get("deflate")
cog_translate(src_dataset, out_fname, dst_profile, use_cog_driver=True)

cogs_generated.append(out_fname)
LOGGER.info("Finished conversion, writing variable: %s", out_fname)
LOGGER.info("NetCDF conversion complete. Returning COGs generated.")
return cogs_generated


def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.Path, var_list: list = ()) -> List[str]:
"""
Primary function for beginning NetCDF conversion using rasterio,
LOGGER.info("%s: No x or y xarray dimensions, adding them...", dmerr)
nc_xarray_tmp = _rioxr_swapdims(nc_xarray)
nc_xarray_tmp[variable_name].rio.to_raster(temp_file_name)
except RuntimeError as runerr:
LOGGER.info("Variable %s cannot be converted to tif: %s", variable_name, runerr)
return None
except Exception as aerr: # pylint: disable=broad-except
LOGGER.info("Variable %s cannot be converted to tif: %s", variable_name, aerr)
return None

# Option to add additional GDAL config settings
# config = dict(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_OVR_BLOCKSIZE="128")
# with rasterio.Env(**config):

LOGGER.info("Starting conversion... %s", output_file_name)

# default CRS setting
# crs = rasterio.crs.CRS({"init": "epsg:3857"})

with rasterio.open(temp_file_name, mode='r+') as src_dataset:
# if src_dst.crs is None:
# src_dst.crs = crs
src_dataset.crs = CRS.from_proj4(proj="+proj=latlong")
dst_profile = cog_profiles.get("deflate")
cog_translate(
src_dataset,
output_file_name,
dst_profile,
use_cog_driver=True
)

LOGGER.info("Finished conversion, writing variable: %s", output_file_name)
LOGGER.info("NetCDF conversion complete. Returning COG generated.")
return output_file_name


def netcdf_converter(
input_nc_file: pathlib.Path,
output_directory: pathlib.Path,
var_list: list[str] | None
) -> List[str]:
"""Primary function for beginning NetCDF conversion using rasterio,
rioxarray and xarray
Parameters
----------
input_nc_file : pathlib.Path
Path to NetCDF file to process
output_cog_pathname : pathlib.Path
COG Output path and NetCDF filename, filename converted to cog variable
filename (.tif)
ex: tests/data/tmpygj2vgxf/
RSS_smap_SSS_L3_8day_running_2020_005_FNL_v04.0.nc
var_list : list
output_directory : pathlib.Path
Path to temporary directory into which results will be placed before
staging in S3.
var_list : str | None
List of variable names to be converted to various single band cogs,
ex: ['gland', 'fland', 'sss_smap']
ex: ['gland', 'fland', 'sss_smap']. If a Harmony request asks for "all"
variables, the input value will be None.
Notes
-----
Expand All @@ -178,11 +162,8 @@ def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.P
netcdf_file = os.path.abspath(input_nc_file)
LOGGER.debug('NetCDF Path: %s', netcdf_file)

gtiff_fname = get_gtiff_name(output_cog_pathname)

if netcdf_file.endswith('.nc'):
LOGGER.info("Reading %s", basename(netcdf_file))
LOGGER.info('Tmp GTiff filename: %s', gtiff_fname)

xds = xr.open_dataset(netcdf_file)

Expand All @@ -192,13 +173,18 @@ def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.P
or ({"x", "y"}.issubset(set(xds.dims)))):
# used to invert y axis
# xds_reversed = xds.reindex(lat=xds.lat[::-1])
LOGGER.info("Writing COG to %s", basename(gtiff_fname))
if var_list:
try:
xds = xds[var_list]
except KeyError as error:
raise Net2CogError(f"Variable {error} not found in dataset") from error
return _write_cogtiff(gtiff_fname, xds)

if var_list is None:
var_list = list(xds.data_vars.keys())

try:
return [
_write_cogtiff(output_directory, xds, variable_name, input_nc_file)
for variable_name in var_list
]
except KeyError as error:
raise Net2CogError(f"Variable {error} not found in dataset") from error

LOGGER.error("%s: NetCDF file does not contain spatial dimensions such as lat / lon "
"or x / y", netcdf_file)
return []
Expand Down
Loading

0 comments on commit 7c06bd5

Please sign in to comment.