11
11
import os
12
12
import pathlib
13
13
import subprocess
14
- import tempfile
15
14
from os .path import join as pjoin , basename , dirname , exists , splitext
16
15
from subprocess import check_call
16
+ from tempfile import TemporaryDirectory
17
17
from typing import List
18
18
19
19
import rasterio
20
20
import rioxarray # noqa
21
21
import xarray as xr
22
+ from harmony_service_lib .util import generate_output_filename
22
23
from rasterio import CRS
23
24
from rio_cogeo .cogeo import cog_translate
24
25
from rio_cogeo .profiles import cog_profiles
@@ -37,46 +38,21 @@ def __init__(self, msg):
37
38
super ().__init__ (msg )
38
39
39
40
40
- def run_command (command , work_dir ):
41
- """
42
- A simple utility to execute a subprocess command.
43
- """
44
- try :
45
- out_call = check_call (command , stderr = subprocess .STDOUT , cwd = work_dir )
46
- return out_call
47
- except subprocess .CalledProcessError as err :
48
- LOGGER .error ("command '%s' return with error (code %s): %s" ,
49
- err .cmd , err .returncode , err .output )
50
- raise
51
-
52
-
53
- def check_dir (fname ):
54
- """
55
- To return filename and path without file extension
56
- """
57
- file_name = fname .split ('/' )
58
- rel_path = pjoin (* file_name [- 2 :])
59
- file_wo_extension , _ = splitext (rel_path )
60
- return file_wo_extension
41
+ def _rioxr_swapdims (netcdf_xarray ):
42
+ netcdf_xarray .coords ['y' ] = ('lat' , netcdf_xarray .lat )
43
+ netcdf_xarray .coords ['x' ] = ('lon' , netcdf_xarray .lon )
61
44
45
+ return netcdf_xarray .swap_dims ({'lat' : 'y' , 'lon' : 'x' })
62
46
63
- def get_gtiff_name (output_file ):
64
- """
65
- To create tmp filename to convert to COG and create a filename
66
- just as source but without '.TIF' extension
67
- """
68
- outf = os .path .basename (output_file )
69
- dir_path = dirname (output_file )
70
- rel_path = check_dir (outf )
71
- out_fname = pjoin (dir_path , rel_path )
72
- if not exists (out_fname ):
73
- os .makedirs (out_fname )
74
- return pjoin (out_fname , rel_path )
75
47
76
-
77
- def _write_cogtiff (out_f_name , nc_xarray ):
48
+ def _write_cogtiff (
49
+ output_directory : str ,
50
+ nc_xarray : xr .Dataset ,
51
+ variable_name : str ,
52
+ input_filename : str
53
+ ) -> str | None :
78
54
"""
79
- This function converts each variable inside a NetCDF file into a
55
+ This function converts a variable inside a NetCDF file into a
80
56
cloud optimized geotiff.
81
57
82
58
Parameters
@@ -94,81 +70,89 @@ def _write_cogtiff(out_f_name, nc_xarray):
94
70
Assumption that 0 is always on the prime meridian/equator.
95
71
"""
96
72
97
- cogs_generated = []
98
- with tempfile .TemporaryDirectory () as tempdir :
99
-
100
- # variables in netcdf
101
- for var in nc_xarray .variables :
102
- if var in EXCLUDE_VARS :
103
- continue
104
- LOGGER .debug ("NetCDF Var: %s" , var )
105
-
106
- def rioxr_swapdims (netcdf_xarray ):
107
- netcdf_xarray .coords ['y' ] = ('lat' , netcdf_xarray .lat )
108
- netcdf_xarray .coords ['x' ] = ('lon' , netcdf_xarray .lon )
109
-
110
- return netcdf_xarray .swap_dims ({'lat' : 'y' , 'lon' : 'x' })
111
-
112
- # copy to a tempfolder
113
- out_fname = out_f_name + '_' + var + '.tif'
114
- temp_fname = pjoin (tempdir , basename (out_fname ))
115
-
73
+ LOGGER .debug ("NetCDF Var: %s" , variable_name )
74
+
75
+ if variable_name in EXCLUDE_VARS :
76
+ LOGGER .debug (f"Variable { variable_name } is excluded. Will not produce COG" )
77
+ return None
78
+
79
+ output_basename = generate_output_filename (
80
+ input_filename ,
81
+ ext = 'tif' ,
82
+ variable_subset = [variable_name ],
83
+ is_reformatted = True ,
84
+ )
85
+ output_file_name = output_directory .pathjoin (output_basename )
86
+
87
+ with TemporaryDirectory () as tempdir :
88
+ temp_file_name = os .path .join (tempdir , output_basename )
89
+
90
+ # copy to a tempfolder
91
+ # out_fname = out_f_name + '_' + var + '.tif'
92
+ # temp_fname = pjoin(tempdir, basename(out_fname))
93
+
94
+ try :
95
+ nc_xarray [variable_name ].rio .to_raster (temp_file_name )
96
+ except LookupError as err :
97
+ LOGGER .info ("Variable %s cannot be converted to tif: %s" , variable_name , err )
98
+ return None
99
+ except DimensionError as dmerr :
116
100
try :
117
- nc_xarray [var ].rio .to_raster (temp_fname )
118
- except LookupError as err :
119
- LOGGER .info ("Variable %s cannot be converted to tif: %s" , var , err )
120
- continue
121
- except DimensionError as dmerr :
122
- try :
123
- LOGGER .info ("%s: No x or y xarray dimensions, adding them..." , dmerr )
124
- nc_xarray_tmp = rioxr_swapdims (nc_xarray )
125
- nc_xarray_tmp [var ].rio .to_raster (temp_fname )
126
- except RuntimeError as runerr :
127
- LOGGER .info ("Variable %s cannot be converted to tif: %s" , var , runerr )
128
- continue
129
- except Exception as aerr : # pylint: disable=broad-except
130
- LOGGER .info ("Variable %s cannot be converted to tif: %s" , var , aerr )
131
- continue
132
-
133
- # Option to add additional GDAL config settings
134
- # config = dict(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_OVR_BLOCKSIZE="128")
135
- # with rasterio.Env(**config):
136
-
137
- LOGGER .info ("Starting conversion... %s" , out_fname )
138
-
139
- # default CRS setting
140
- # crs = rasterio.crs.CRS({"init": "epsg:3857"})
141
-
142
- with rasterio .open (temp_fname , mode = 'r+' ) as src_dataset :
143
- # if src_dst.crs is None:
144
- # src_dst.crs = crs
145
- src_dataset .crs = CRS .from_proj4 (proj = "+proj=latlong" )
146
- dst_profile = cog_profiles .get ("deflate" )
147
- cog_translate (src_dataset , out_fname , dst_profile , use_cog_driver = True )
148
-
149
- cogs_generated .append (out_fname )
150
- LOGGER .info ("Finished conversion, writing variable: %s" , out_fname )
151
- LOGGER .info ("NetCDF conversion complete. Returning COGs generated." )
152
- return cogs_generated
153
-
154
-
155
- def netcdf_converter (input_nc_file : pathlib .Path , output_cog_pathname : pathlib .Path , var_list : list = ()) -> List [str ]:
156
- """
157
- Primary function for beginning NetCDF conversion using rasterio,
101
+ LOGGER .info ("%s: No x or y xarray dimensions, adding them..." , dmerr )
102
+ nc_xarray_tmp = _rioxr_swapdims (nc_xarray )
103
+ nc_xarray_tmp [variable_name ].rio .to_raster (temp_file_name )
104
+ except RuntimeError as runerr :
105
+ LOGGER .info ("Variable %s cannot be converted to tif: %s" , variable_name , runerr )
106
+ return None
107
+ except Exception as aerr : # pylint: disable=broad-except
108
+ LOGGER .info ("Variable %s cannot be converted to tif: %s" , variable_name , aerr )
109
+ return None
110
+
111
+ # Option to add additional GDAL config settings
112
+ # config = dict(GDAL_NUM_THREADS="ALL_CPUS", GDAL_TIFF_OVR_BLOCKSIZE="128")
113
+ # with rasterio.Env(**config):
114
+
115
+ LOGGER .info ("Starting conversion... %s" , output_file_name )
116
+
117
+ # default CRS setting
118
+ # crs = rasterio.crs.CRS({"init": "epsg:3857"})
119
+
120
+ with rasterio .open (temp_file_name , mode = 'r+' ) as src_dataset :
121
+ # if src_dst.crs is None:
122
+ # src_dst.crs = crs
123
+ src_dataset .crs = CRS .from_proj4 (proj = "+proj=latlong" )
124
+ dst_profile = cog_profiles .get ("deflate" )
125
+ cog_translate (
126
+ src_dataset ,
127
+ output_file_name ,
128
+ dst_profile ,
129
+ use_cog_driver = True
130
+ )
131
+
132
+ LOGGER .info ("Finished conversion, writing variable: %s" , output_file_name )
133
+ LOGGER .info ("NetCDF conversion complete. Returning COG generated." )
134
+ return output_file_name
135
+
136
+
137
+ def netcdf_converter (
138
+ input_nc_file : pathlib .Path ,
139
+ output_directory : pathlib .Path ,
140
+ var_list : list [str ] | None
141
+ ) -> List [str ]:
142
+ """Primary function for beginning NetCDF conversion using rasterio,
158
143
rioxarray and xarray
159
144
160
145
Parameters
161
146
----------
162
147
input_nc_file : pathlib.Path
163
148
Path to NetCDF file to process
164
- output_cog_pathname : pathlib.Path
165
- COG Output path and NetCDF filename, filename converted to cog variable
166
- filename (.tif)
167
- ex: tests/data/tmpygj2vgxf/
168
- RSS_smap_SSS_L3_8day_running_2020_005_FNL_v04.0.nc
169
- var_list : list
149
+ output_directory : pathlib.Path
150
+ Path to temporary directory into which results will be placed before
151
+ staging in S3.
152
+ var_list : str | None
170
153
List of variable names to be converted to various single band cogs,
171
- ex: ['gland', 'fland', 'sss_smap']
154
+ ex: ['gland', 'fland', 'sss_smap']. If a Harmony request asks for "all"
155
+ variables, the input value will be None.
172
156
173
157
Notes
174
158
-----
@@ -178,11 +162,8 @@ def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.P
178
162
netcdf_file = os .path .abspath (input_nc_file )
179
163
LOGGER .debug ('NetCDF Path: %s' , netcdf_file )
180
164
181
- gtiff_fname = get_gtiff_name (output_cog_pathname )
182
-
183
165
if netcdf_file .endswith ('.nc' ):
184
166
LOGGER .info ("Reading %s" , basename (netcdf_file ))
185
- LOGGER .info ('Tmp GTiff filename: %s' , gtiff_fname )
186
167
187
168
xds = xr .open_dataset (netcdf_file )
188
169
@@ -192,13 +173,18 @@ def netcdf_converter(input_nc_file: pathlib.Path, output_cog_pathname: pathlib.P
192
173
or ({"x" , "y" }.issubset (set (xds .dims )))):
193
174
# used to invert y axis
194
175
# xds_reversed = xds.reindex(lat=xds.lat[::-1])
195
- LOGGER .info ("Writing COG to %s" , basename (gtiff_fname ))
196
- if var_list :
197
- try :
198
- xds = xds [var_list ]
199
- except KeyError as error :
200
- raise Net2CogError (f"Variable { error } not found in dataset" ) from error
201
- return _write_cogtiff (gtiff_fname , xds )
176
+
177
+ if var_list is None :
178
+ var_list = list (xds .data_vars .keys ())
179
+
180
+ try :
181
+ return [
182
+ _write_cogtiff (output_directory , xds , variable_name , input_nc_file )
183
+ for variable_name in var_list
184
+ ]
185
+ except KeyError as error :
186
+ raise Net2CogError (f"Variable { error } not found in dataset" ) from error
187
+
202
188
LOGGER .error ("%s: NetCDF file does not contain spatial dimensions such as lat / lon "
203
189
"or x / y" , netcdf_file )
204
190
return []
0 commit comments