Skip to content

Commit

Permalink
delete encoding info after reading netcdf, debug of getColNames
Browse files Browse the repository at this point in the history
  • Loading branch information
BaptisteVandecrux committed Jun 14, 2024
1 parent 4533b7f commit 320ab52
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 96 deletions.
33 changes: 1 addition & 32 deletions src/pypromice/process/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ def __init__(self, config_file, inpath, var_file=None, meta_file=None):
L0 = self.loadL0()
self.L0=[]
for l in L0:
n = write.getColNames(self.vars,
l.attrs['number_of_booms'],
l.attrs['format'])
n = write.getColNames(self.vars, l)
self.L0.append(utilities.popCols(l, n))

self.L1 = None
Expand Down Expand Up @@ -106,18 +104,6 @@ def getL3(self):
logger.info('Level 3 processing...')
self.L3 = toL3(self.L2)

# def resample(self, dataset):
# '''Resample dataset to specific temporal resolution (based on input
# data type)'''
# f = [l.attrs['format'] for l in self.L0]
# if 'raw' in f or 'STM' in f:
# logger.info('Resampling to 10 minute')
# resampled = resample_dataset(dataset, '10min')
# else:
# resampled = resample_dataset(dataset, '60min')
# logger.info('Resampling to hour')
# return resampled

def writeArr(self, dataset, outpath, t=None):
'''Write L3 data to .nc and .csv hourly and daily files
Expand All @@ -141,23 +127,6 @@ def writeArr(self, dataset, outpath, t=None):
else:
write.prepare_and_write(dataset, outpath, self.vars,
self.meta, '60min')

# def addAttributes(self, dataset):
# '''Add variable and attribute metadata

# Parameters
# ----------
# dataset : xr.Dataset
# Dataset (i.e. L2 or L3) object

# Returns
# -------
# d2 : xr.Dataset
# Data object with attributes
# '''
# d2 = utilities.addVars(dataset, self.vars)
# d2 = utilities.addMeta(d2, self.meta)
# return d2

def loadConfig(self, config_file, inpath):
'''Load configuration from .toml file
Expand Down
5 changes: 4 additions & 1 deletion src/pypromice/process/get_l2tol3.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ def get_l2tol3():

# Define Level 2 dataset from file
with xr.open_dataset(args.inpath) as l2:
l2.load()
l2.load()
for varname in l2.variables:
if 'encoding' in l2[varname].attrs:
del l2[varname].attrs['encoding']
if 'bedrock' in l2.attrs.keys():
l2.attrs['bedrock'] = l2.attrs['bedrock'] == 'True'
if 'number_of_booms' in l2.attrs.keys():
Expand Down
3 changes: 3 additions & 0 deletions src/pypromice/process/join_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def loadArr(infile):
elif infile.split('.')[-1].lower() == 'nc':
with xr.open_dataset(infile) as ds:
ds.load()
for varname in ds.variables:
if 'encoding' in ds[varname].attrs:
del ds[varname].attrs['encoding']

try:
name = ds.attrs['station_id']
Expand Down
3 changes: 3 additions & 0 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def loadArr(infile):
f.close()
elif infile.split('.')[-1].lower() in 'nc':
ds = xr.open_dataset(infile)
for varname in ds.variables:
if 'encoding' in ds[varname].attrs:
del ds[varname].attrs['encoding']

try:
name = ds.attrs['station_name']
Expand Down
3 changes: 3 additions & 0 deletions src/pypromice/process/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def reformat_lon(dataset, exempt=['UWN', 'Roof_GEUS', 'Roof_PROMICE']):
id = dataset.attrs['site_id']

if id not in exempt:
if 'gps_lon' not in dataset.keys():
print("?????????", id, "missing gps_lon")
return dataset
dataset['gps_lon'] = dataset['gps_lon'] * -1
return dataset

Expand Down
2 changes: 1 addition & 1 deletion src/pypromice/process/variables.csv
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ rot,platform_azimuth_angle,Station rotation from true North,degrees,0,360,,all,a
gps_lat,gps_latitude,Latitude,degrees_north,50,83,,all,all,6,coordinate,time lat lon alt,True,
gps_lon,gps_longitude,Longitude,degrees_east,5,70,,all,all,6,coordinate,time lat lon alt,True,
gps_alt,gps_altitude,Altitude,m,0,3000,,all,all,2,coordinate,time lat lon alt,True,
gps_time,gps_time,GPS time,s,0,240000,,all,all,,coordinate,time lat lon alt,True,
gps_time,gps_time,GPS time,s,0,240000,,all,all,,physicalMeasurement,time lat lon alt,True,
gps_geoid,gps_geoid_separation,Height of EGM96 geoid over WGS84 ellipsoid,m,,,,one-boom,all,,physicalMeasurement,time lat lon alt,True,
gps_geounit,gps_geounit,GeoUnit,-,,,,all,,,qualityInformation,time lat lon alt,True,L0 only
gps_hdop,gps_hdop,GPS horizontal dillution of precision (HDOP),m,,,,all,all,2,qualityInformation,time lat lon alt,True,NMEA: Horizontal dilution of precision
Expand Down
89 changes: 27 additions & 62 deletions src/pypromice/process/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
logger = logging.getLogger(__name__)

from pypromice.process.resample import resample_dataset
from pypromice.process import utilities, write
from pypromice.process import utilities

def prepare_and_write(dataset, outpath, vars_df, meta_dict, time='60min', resample=True):
'''Prepare data with resampling, formating and metadata population; then
Expand Down Expand Up @@ -37,9 +37,17 @@ def prepare_and_write(dataset, outpath, vars_df, meta_dict, time='60min', resamp
# Reformat time
d2 = utilities.reformat_time(d2)

# finding station/site name
if 'station_id' in d2.attrs.keys():
name = d2.attrs['station_id']
else:
name = d2.attrs['site_id']

# Reformat longitude (to negative values)
d2 = utilities.reformat_lon(d2)

if 'gps_lon' in d2.keys():
d2 = utilities.reformat_lon(d2)
else:
logger.info('%s does not have gpd_lon'%name)
# Add variable attributes and metadata
d2 = utilities.addVars(d2, vars_df)
d2 = utilities.addMeta(d2, meta_dict)
Expand All @@ -48,16 +56,10 @@ def prepare_and_write(dataset, outpath, vars_df, meta_dict, time='60min', resamp
d2 = utilities.roundValues(d2, vars_df)

# Get variable names to write out
col_names = write.getColNames(
vars_df,
d2)
col_names = getColNames(vars_df, d2, remove_nan_fields=True)

# Define filename based on resample rate
t = int(pd.Timedelta((d2['time'][1] - d2['time'][0]).values).total_seconds())
if 'station_id' in d2.attrs.keys():
name = d2.attrs['station_id']
else:
name = d2.attrs['site_id']

# Create out directory
outdir = os.path.join(outpath, name)
Expand All @@ -80,11 +82,11 @@ def prepare_and_write(dataset, outpath, vars_df, meta_dict, time='60min', resamp
os.mkdir(outdir)
# Write to csv file
logger.info('Writing to files...')
write.writeCSV(out_csv, d2, col_names)
writeCSV(out_csv, d2, col_names)

# Write to netcdf file
col_names = col_names + ['lat', 'lon', 'alt']
write.writeNC(out_nc, d2, col_names)
writeNC(out_nc, d2, col_names)
logger.info(f'Written to {out_csv}')
logger.info(f'Written to {out_nc}')

Expand Down Expand Up @@ -152,7 +154,7 @@ def writeNC(outfile, Lx, col_names=None):
names = list(Lx.keys())
Lx[names].to_netcdf(outfile, mode='w', format='NETCDF4', compute=True)

def getColNames(vars_df, ds):
def getColNames(vars_df, ds, remove_nan_fields=False):
'''Get all variable names for a given data type, based on a variables
look-up table. This is mainly for exporting purposes
Expand All @@ -172,55 +174,18 @@ def getColNames(vars_df, ds):
vars_df = vars_df.loc[vars_df['data_type'].isin(['TX','all'])]
elif ds.attrs['data_type']=='STM' or ds.attrs['data_type']=='raw':
vars_df = vars_df.loc[vars_df['data_type'].isin(['raw','all'])]

if 'number_of_booms' in ds.attrs.keys():
if ds.attrs['number_of_booms']==1:
vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])]
elif ds.attrs['number_of_booms']==2:
vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])]
var_list = list(vars_df.index)
for v in var_list:
if v not in ds.keys():
var_list.remove(v)
continue
if ds[v].isnull().all():
var_list.remove(v)
if remove_nan_fields:
for v in var_list:
if v not in ds.keys():
var_list.remove(v)
continue
if ds[v].isnull().all():
var_list.remove(v)
return var_list


def getColNames_old(vars_df, booms=None, data_type=None, bedrock=False):
'''Get all variable names for a given data type, based on a variables
look-up table. This is mainly for exporting purposes
Parameters
----------
vars_df : pd.DataFrame
Variables look-up table
booms : int, optional
Number of booms. If this parameter is empty then all variables
regardless of boom type will be passed. The default is None.
data_type : str, optional
Data type, "tx", "STM" or "raw". If this parameter is empty then all
variables regardless of data type will be passed. The default is None.
Returns
-------
list
Variable names
'''
if booms==1:
vars_df = vars_df.loc[vars_df['station_type'].isin(['one-boom','all'])]
elif booms==2:
vars_df = vars_df.loc[vars_df['station_type'].isin(['two-boom','all'])]

if data_type=='TX':
vars_df = vars_df.loc[vars_df['data_type'].isin(['TX','all'])]
elif data_type=='STM' or data_type=='raw':
vars_df = vars_df.loc[vars_df['data_type'].isin(['raw','all'])]

col_names = list(vars_df.index)
if isinstance(bedrock, str):
bedrock = (bedrock.lower() == 'true')
if bedrock == True:
col_names.remove('cc')
for v in ['dlhf_u', 'dlhf_l', 'dshf_u', 'dshf_l']:
try:
col_names.remove(v)
except:
pass
return col_names

0 comments on commit 320ab52

Please sign in to comment.