4
4
Carlo (MCMC). Example use:
5
5
6
6
python calibration.py tune --pft=1
7
- python calibration.py tune --pft=1 --config=MOD16_calibration_config.json
8
-
9
- The default configuration file provided in the repository,
10
- `mod16/data/MOD16_calibration_config.json`, should be copied and modified to
11
- suit your needs.
12
7
13
8
The general calibration protocol used here involves:
14
9
32
27
A thinned posterior can be exported from the command line:
33
28
34
29
python calibration.py export-bplut output.csv --burn=1000 --thin=10
30
+
31
+ **The Cal-Val dataset** is a single HDF5 file that contains all the input
32
+ variables necessary to drive MOD16 as well as the observed latent heat fluxes
33
+ against which the model is calibrated. The HDF5 file specification is as
34
+ follows, where the shape of multidimensional arrays is given in terms of
35
+ T, the number of time steps (days); N, the number of tower sites; and P,
36
+ a sub-grid of MODIS pixels surrounding a tower:
37
+
38
+
39
+ FLUXNET/
40
+ SEB -- (T x N) Surface energy balance, from tower data
41
+ air_temperature -- (T x N) Air temperatures reported at the tower
42
+ latent_heat -- (T x N) Observed latent heat flux [W m-2]
43
+ site_id -- (N) Unique identifier for each site, e.g., "US-BZS"
44
+ validation_mask -- (T x N) Indicates what site-days are reserved
45
+
46
+ MERRA2/
47
+ LWGNT -- (T x N) Net long-wave radiation, 24-hr mean [W m-2]
48
+ LWGNT_daytime -- (T x N) ... for daytime hours only
49
+ LWGNT_nighttime -- (T x N) ... for nighttime hours only
50
+ PS -- (T x N) Surface air pressure [Pa]
51
+ PS_daytime -- (T x N) ... for daytime hours only
52
+ PS_nighttime -- (T x N) ... for nighttime hours only
53
+ QV10M -- (T x N) Water vapor mixing ratio at 10-meter height
54
+ QV10M_daytime -- (T x N) ... for daytime hours only
55
+ QV10M_nighttime -- (T x N) ... for nighttime hours only
56
+ SWGDN -- (T x N) Down-welling short-wave radiation [W m-2]
57
+ SWGDN_daytime -- (T x N) ... for daytime hours only
58
+ SWGDN_nighttime -- (T x N) ... for nighttime hours only
59
+ T10M -- (T x N) Air temperature at 10-meter height [deg C]
60
+ T10M_daytime -- (T x N) ... for daytime hours only
61
+ T10M_nighttime -- (T x N) ... for nighttime hours only
62
+ Tmin -- (T x N) Daily minimum air temperature [deg C]
63
+
64
+ MODIS/
65
+ MCD43GF_black_sky_sw_albedo
66
+ -- (T x N x P) Short-wave albedo under black-sky conditions
67
+ MOD15A2HGF_LAI
68
+ -- (T x N x P) Leaf area index in scaled units (10 * [m3 m-3])
69
+ MOD15A2HGF_LAI_interp
70
+ -- (T x N x P) Daily interpolation of the MOD15A2HGF_LAI field
71
+ MOD15A2HGF_fPAR
72
+ -- (T x N x P) Fraction of photosynthetically active radiation [%]
73
+ MOD15A2HGF_fPAR_interp
74
+ -- (T x N x P) Daily interpolation of MOD15A2HGF_fPAR_interp field
75
+
76
+ coordinates/
77
+ lng_lat -- (2 x N) Longitude, latitude coordinates of each tower
78
+
79
+ state/
80
+ PFT -- (N x P) The plant functional type (PFT) of each pixel
81
+ PFT_dominant -- (N) The majority PFT at each tower
82
+ elevation_m -- (N) The elevation in meters above sea level
83
+
84
+ time -- (T x 3) The Year, Month, Day of each daily time step
85
+ weights -- (N) A number between 0 and 1 used to down-weight towers
35
86
'''
36
87
37
88
import datetime
38
- import json
89
+ import yaml
39
90
import os
40
91
import numpy as np
41
92
import h5py
42
93
import pymc as pm
43
- import aesara .tensor as at
94
+ import pytensor .tensor as pt
44
95
import arviz as az
45
96
import mod16
46
97
from multiprocessing import get_context , set_start_method
@@ -101,6 +152,17 @@ def compile_et_model(
101
152
Creates a new ET model based on the prior distribution. Model can be
102
153
re-compiled multiple times, e.g., for cross validation.
103
154
155
+ There are two attributes that are set on the sampler when it is
156
+ initialized that could be helpful here:
157
+
158
+ self.priors
159
+ self.bounds
160
+
161
+ `self.priors` is a dict with a key for each parameter that has
162
+ informative priors. For parameters with a non-informative (Uniform)
163
+ prior, `self.bounds` is a similar dict (with a key for each parameter)
164
+ that describes the lower and upper bounds of the Uniform prior.
165
+
104
166
Parameters
105
167
----------
106
168
observed : Sequence
@@ -127,21 +189,21 @@ def compile_et_model(
127
189
tmin_open = self .params ['tmin_open' ]
128
190
vpd_open = self .params ['vpd_open' ]
129
191
vpd_close = self .params ['vpd_close' ]
130
- gl_sh = pm .Triangular ('gl_sh' , ** self .prior ['gl_sh' ])
131
- gl_wv = pm .Triangular ('gl_wv' , ** self .prior ['gl_wv' ])
192
+ gl_sh = pm .LogNormal ('gl_sh' , ** self .prior ['gl_sh' ])
193
+ gl_wv = pm .LogNormal ('gl_wv' , ** self .prior ['gl_wv' ])
132
194
g_cuticular = pm .LogNormal (
133
195
'g_cuticular' , ** self .prior ['g_cuticular' ])
134
196
csl = pm .LogNormal ('csl' , ** self .prior ['csl' ])
135
- rbl_min = pm .Triangular ('rbl_min' , ** self .prior ['rbl_min' ])
136
- rbl_max = pm .Triangular ('rbl_max' , ** self .prior ['rbl_max' ])
137
- beta = pm .Triangular ('beta' , ** self .prior ['beta' ])
197
+ rbl_min = pm .Uniform ('rbl_min' , ** self .bounds ['rbl_min' ])
198
+ rbl_max = pm .Uniform ('rbl_max' , ** self .bounds ['rbl_max' ])
199
+ beta = pm .Uniform ('beta' , ** self .bounds ['beta' ])
138
200
# (Stochstic) Priors for unknown model parameters
139
201
# Convert model parameters to a tensor vector
140
202
params_list = [
141
203
tmin_close , tmin_open , vpd_open , vpd_close , gl_sh , gl_wv ,
142
204
g_cuticular , csl , rbl_min , rbl_max , beta
143
205
]
144
- params = at .as_tensor_variable (params_list )
206
+ params = pt .as_tensor_variable (params_list )
145
207
# Key step: Define the log-likelihood as an added potential
146
208
pm .Potential ('likelihood' , log_likelihood (params ))
147
209
return model
@@ -157,9 +219,9 @@ def __init__(self, config = None):
157
219
config_file = config
158
220
if config_file is None :
159
221
config_file = os .path .join (
160
- MOD16_DIR , 'data/MOD16_calibration_config.json ' )
222
+ MOD16_DIR , 'data/MOD16_calibration_config.yaml ' )
161
223
with open (config_file , 'r' ) as file :
162
- self .config = json . load (file )
224
+ self .config = yaml . safe_load (file )
163
225
self .hdf5 = self .config ['data' ]['file' ]
164
226
165
227
def _filter (self , raw : Sequence , size : int ):
@@ -314,71 +376,74 @@ def tune(
314
376
# NOTE: This value was hard-coded in the extant version of MOD16
315
377
if np .isnan (params_dict ['beta' ]):
316
378
params_dict ['beta' ] = 250
317
- model = MOD16 (params_dict )
379
+
380
+ # Read in driver datasets from the HDF5 file
318
381
with h5py .File (self .hdf5 , 'r' ) as hdf :
319
382
sites = hdf ['FLUXNET/site_id' ][:].tolist ()
320
383
if hasattr (sites [0 ], 'decode' ):
321
384
sites = [s .decode ('utf-8' ) for s in sites ]
322
385
# Get dominant PFT
323
- pft_map = pft_dominant (hdf ['state/PFT' ][:], site_list = sites )
386
+ pft_array = hdf [self .config ['data' ]['class_map' ]][:]
387
+ pft_map = pft_dominant (pft_array , site_list = sites )
324
388
# Blacklist various sites
325
389
blacklist = self .config ['data' ]['sites_blacklisted' ]
390
+
391
+ # Get a binary mask that indicates which tower-days should be used
392
+ # to calibrate the current PFT class
326
393
pft_mask = np .logical_and (pft_map == pft , ~ np .in1d (sites , blacklist ))
327
- weights = hdf ['weights' ][pft_mask ]
394
+ nsteps = hdf ['time' ].shape [0 ] # Number of time steps
395
+ if self .config ['data' ]['classes_are_dynamic' ]:
396
+ assert pft_mask .ndim == 2 and pft_mask .shape [0 ] == nsteps ,\
397
+ 'Configuration setting "classes_are_dynamic" implies the "class_map" should be (T x N x ...) but it is not'
398
+ weights = hdf ['weights' ][pft_mask ]
399
+ else :
400
+ # If using static PFT classes, duplicate them along the time axis
401
+ pft_mask = pft_mask [np .newaxis ,:]\
402
+ .repeat (nsteps , axis = 0 )
403
+ weights = hdf ['weights' ][:][np .newaxis ,:]\
404
+ .repeat (nsteps , axis = 0 )[pft_mask ]
405
+
328
406
# Read in tower observations
329
- tower_obs = hdf ['FLUXNET/latent_heat' ][:][:, pft_mask ]
407
+ tower_obs = hdf ['FLUXNET/latent_heat' ][:][pft_mask ]
330
408
# Read the validation mask; mask out observations that are
331
409
# reserved for validation
332
410
print ('Masking out validation data...' )
333
411
mask = hdf ['FLUXNET/validation_mask' ][pft ]
334
412
tower_obs [mask ] = np .nan
335
- # Read start and end dates and mask data appropriately
336
- timestamps = [
337
- f'{ y } -{ str (m ).zfill (2 )} -{ str (d ).zfill (2 )} '
338
- for y , m , d in hdf ['time' ][:].tolist ()
339
- ]
340
- start = self .config ['data' ]['dates' ]['start' ]
341
- end = self .config ['data' ]['dates' ]['end' ]
342
- t0 = timestamps .index (start )
343
- t1 = timestamps .index (end ) + 1
344
- tower_obs = tower_obs [t0 :t1 ]
413
+
345
414
# Read in driver datasets
346
415
print ('Loading driver datasets...' )
347
- lw_net_day = hdf ['MERRA2/LWGNT_daytime' ][:][t0 :t1 ,pft_mask ]
348
- lw_net_night = hdf ['MERRA2/LWGNT_nighttime' ][:][t0 :t1 ,pft_mask ]
349
- if self .config ['optimization' ]['platform' ] == 'VIIRS' :
350
- sw_albedo = hdf ['VIIRS/VNP43MA3_black_sky_sw_albedo' ][:][t0 :t1 ,pft_mask ]
351
- else :
352
- sw_albedo = hdf ['MODIS/MCD43GF_black_sky_sw_albedo' ][:][t0 :t1 ,pft_mask ]
416
+ group = self .config ['data' ]['met_group' ]
417
+ lw_net_day = hdf [f'{ group } /LWGNT_daytime' ][:][pft_mask ]
418
+ lw_net_night = hdf [f'{ group } /LWGNT_nighttime' ][:][pft_mask ]
419
+ sw_albedo = hdf [self .config ['data' ]['datasets' ]['albedo' ]][:][pft_mask ]
353
420
sw_albedo = np .nanmean (sw_albedo , axis = - 1 )
354
- sw_rad_day = hdf ['MERRA2 /SWGDN_daytime' ][:][t0 : t1 , pft_mask ]
355
- sw_rad_night = hdf ['MERRA2 /SWGDN_nighttime' ][:][t0 : t1 , pft_mask ]
356
- temp_day = hdf ['MERRA2 /T10M_daytime' ][:][t0 : t1 , pft_mask ]
357
- temp_night = hdf ['MERRA2 /T10M_nighttime' ][:][t0 : t1 , pft_mask ]
358
- tmin = hdf ['MERRA2 /Tmin' ][:][t0 : t1 , pft_mask ]
421
+ sw_rad_day = hdf [f' { group } /SWGDN_daytime' ][:][pft_mask ]
422
+ sw_rad_night = hdf [f' { group } /SWGDN_nighttime' ][:][pft_mask ]
423
+ temp_day = hdf [f' { group } /T10M_daytime' ][:][pft_mask ]
424
+ temp_night = hdf [f' { group } /T10M_nighttime' ][:][pft_mask ]
425
+ tmin = hdf [f' { group } /Tmin' ][:][pft_mask ]
359
426
# As long as the time series is balanced w.r.t. years (i.e., same
360
427
# number of records per year), the overall mean is the annual mean
361
- temp_annual = hdf ['MERRA2 /T10M' ][:][t0 : t1 , pft_mask ].mean (axis = 0 )
428
+ temp_annual = hdf [f' { group } /T10M' ][:][pft_mask ].mean (axis = 0 )
362
429
vpd_day = MOD16 .vpd (
363
- hdf ['MERRA2 /QV10M_daytime' ][:][t0 : t1 , pft_mask ],
364
- hdf ['MERRA2 /PS_daytime' ][:][t0 : t1 , pft_mask ],
430
+ hdf [f' { group } /QV10M_daytime' ][:][pft_mask ],
431
+ hdf [f' { group } /PS_daytime' ][:][pft_mask ],
365
432
temp_day )
366
433
vpd_night = MOD16 .vpd (
367
- hdf ['MERRA2 /QV10M_nighttime' ][:][t0 : t1 , pft_mask ],
368
- hdf ['MERRA2 /PS_nighttime' ][:][t0 : t1 , pft_mask ],
434
+ hdf [f' { group } /QV10M_nighttime' ][:][pft_mask ],
435
+ hdf [f' { group } /PS_nighttime' ][:][pft_mask ],
369
436
temp_night )
370
- pressure = hdf ['MERRA2 /PS' ][:][t0 : t1 , pft_mask ]
437
+ pressure = hdf [f' { group } /PS' ][:][pft_mask ]
371
438
# Read in fPAR, LAI, and convert from (%) to [0,1]
372
- prefix = 'MODIS/MOD'
373
- if self .config ['optimization' ]['platform' ] == 'VIIRS' :
374
- prefix = 'VIIRS/VNP'
375
439
fpar = np .nanmean (
376
- hdf [f' { prefix } 15A2HGF_fPAR_interp ' ][:][ t0 : t1 , pft_mask ], axis = - 1 )
440
+ hdf [self . config [ 'data ' ]['datasets' ][ 'fPAR' ]][:][ pft_mask ], axis = - 1 )
377
441
lai = np .nanmean (
378
- hdf [f' { prefix } 15A2HGF_LAI_interp ' ][:][ t0 : t1 , pft_mask ], axis = - 1 )
442
+ hdf [self . config [ 'data ' ]['datasets' ][ 'LAI' ]][:][ pft_mask ], axis = - 1 )
379
443
# Convert fPAR from (%) to [0,1] and re-scale LAI; reshape fPAR and LAI
380
444
fpar /= 100
381
445
lai /= 10
446
+
382
447
# Compile driver datasets
383
448
drivers = [
384
449
lw_net_day , lw_net_night , sw_rad_day , sw_rad_night , sw_albedo ,
@@ -390,6 +455,7 @@ def tune(
390
455
sampler = MOD16StochasticSampler (
391
456
self .config , MOD16 ._et , params_dict , backend = backend ,
392
457
weights = weights )
458
+
393
459
if plot_trace or ipdb :
394
460
# This matplotlib setting prevents labels from overplotting
395
461
pyplot .rcParams ['figure.constrained_layout.use' ] = True
@@ -400,10 +466,11 @@ def tune(
400
466
az .plot_trace (trace , var_names = MOD16 .required_parameters )
401
467
pyplot .show ()
402
468
return
469
+
403
470
tower_obs = self .clean_observed (tower_obs , drivers )
404
471
# Get (informative) priors for just those parameters that have them
405
472
with open (self .config ['optimization' ]['prior' ], 'r' ) as file :
406
- prior = json . load (file )
473
+ prior = yaml . safe_load (file )
407
474
prior_params = list (filter (
408
475
lambda p : p in prior .keys (), sampler .required_parameters ['ET' ]))
409
476
prior = dict ([
0 commit comments