diff --git a/requirements-github.txt b/requirements-github.txt index 2f0dbd3a..7f480e7d 100644 --- a/requirements-github.txt +++ b/requirements-github.txt @@ -2,7 +2,7 @@ click==8.1.5 jinja2==3.1.2 pyyaml==6.0 pycodestyle==2.10.0 -#pandas==1.4.0 +pandas==1.4.0 isodate==0.6.1 f90nml==1.4.3 questionary==1.10.0 diff --git a/src/swell/__init__.py b/src/swell/__init__.py index ed68af91..0d854d1a 100644 --- a/src/swell/__init__.py +++ b/src/swell/__init__.py @@ -9,4 +9,4 @@ repo_directory = os.path.dirname(__file__) # Set the version for swell -__version__ = '1.7.1' +__version__ = '1.8.0' diff --git a/src/swell/configuration/jedi/interfaces/geos_atmosphere/observations/amsua_n19.yaml b/src/swell/configuration/jedi/interfaces/geos_atmosphere/observations/amsua_n19.yaml index c6effcd4..fe1f2edd 100644 --- a/src/swell/configuration/jedi/interfaces/geos_atmosphere/observations/amsua_n19.yaml +++ b/src/swell/configuration/jedi/interfaces/geos_atmosphere/observations/amsua_n19.yaml @@ -191,9 +191,7 @@ obs post filters: channels: *amsua_n19_available_channels use passive_bc: true sensor: *Sensor_ID - use_flag: [-1, -1, -1, 1, 1, - 1, -1, -1, 1, 1, - 1, 1, 1, 1, -1] + use_flag: &amsua_n19_use_flag {{amsua_n19_active_channels}} maxvalue: 1.0e-12 action: name: reject @@ -207,9 +205,8 @@ obs post filters: channels: *amsua_n19_available_channels options: channels: *amsua_n19_available_channels - use_flag: [-1, -1, -1, 1, 1, - 1, -1, -1, 1, 1, - 1, 1, 1, 1, -1] +# use passive_bc: true + use_flag: *amsua_n19_use_flag minvalue: 1.0e-12 action: name: reject diff --git a/src/swell/configuration/jedi/interfaces/geos_atmosphere/task_questions.yaml b/src/swell/configuration/jedi/interfaces/geos_atmosphere/task_questions.yaml index ca42751a..9a11b899 100644 --- a/src/swell/configuration/jedi/interfaces/geos_atmosphere/task_questions.yaml +++ b/src/swell/configuration/jedi/interfaces/geos_atmosphere/task_questions.yaml @@ -184,6 +184,12 @@ observations: - omi_aura - ompsnm_npp +observing_system_records_gsi_path: + default_value: None + +observing_system_records_path: + default_value: None + path_to_ensemble: default_value: /discover/nobackup/drholdaw/SwellTestData/letk/ensemble/Y%Y/M%m/D%d/H%H/geos*%Y%m%d_%H%M%Sz.nc4 diff --git a/src/swell/suites/geosadas/flow.cylc b/src/swell/suites/geosadas/flow.cylc index f92cba9c..769c7ee7 100644 --- a/src/swell/suites/geosadas/flow.cylc +++ b/src/swell/suites/geosadas/flow.cylc @@ -31,9 +31,15 @@ # Stage JEDI static files CloneJedi => StageJedi + + # Clone geos ana for generating observing system records + CloneGeosAna """ T00 = """ + # Generate satellite channel records + CloneGeosAna[^] => GenerateObservingSystemRecords + # Get and convert bias correction coefficients GetGsiBc => GsiBcToIoda @@ -44,6 +50,7 @@ GetGeosAdasBackground # Run Jedi variational executable + GenerateObservingSystemRecords => RunJediVariationalExecutable BuildJediByLinking[^] => RunJediVariationalExecutable StageJedi[^] => RunJediVariationalExecutable GsiBcToIoda => RunJediVariationalExecutable @@ -69,6 +76,12 @@ # Tasks # ----- + [[CloneGeosAna]] + script = "swell task CloneGeosAna $config" + + [[GenerateObservingSystemRecords]] + script = "swell task GenerateObservingSystemRecords $config -d $datetime -m geos_atmosphere" + [[CloneJedi]] script = "swell task CloneJedi $config" diff --git a/src/swell/suites/hofx/flow.cylc b/src/swell/suites/hofx/flow.cylc index 7a82a323..86e9d43e 100644 --- a/src/swell/suites/hofx/flow.cylc +++ b/src/swell/suites/hofx/flow.cylc @@ -38,6 +38,9 @@ {% for model_component in model_components %} # Stage JEDI static files CloneJedi => StageJedi-{{model_component}} + + # Clone geos ana for generating observing system records + CloneGeosAna-{{model_component}} {% endfor %} """ @@ -45,8 +48,12 @@ {{cycle_time.cycle_time}} = """ {% for model_component in model_components %} {% if cycle_time[model_component] %} + # Task triggers for: {{model_component}} # ------------------ + # Generate satellite channel records + CloneGeosAna-{{model_component}}[^] => GenerateObservingSystemRecords-{{model_component}} + # Get background GetBackground-{{model_component}} @@ -62,6 +69,7 @@ StageJediCycle-{{model_component}} => RunJediHofxExecutable-{{model_component}} GetBackground-{{model_component}} => RunJediHofxExecutable-{{model_component}} GetObservations-{{model_component}} => RunJediHofxExecutable-{{model_component}} + GenerateObservingSystemRecords-{{model_component}} => RunJediHofxExecutable-{{model_component}} # EvaObservations RunJediHofxExecutable-{{model_component}} => EvaObservations-{{model_component}} @@ -115,6 +123,13 @@ {% endif %} {% for model_component in model_components %} + + [[CloneGeosAna-{{model_component}}]] + script = "swell task CloneGeosAna $config -m {{model_component}}" + + [[GenerateObservingSystemRecords-{{model_component}}]] + script = "swell task GenerateObservingSystemRecords $config -d $datetime -m {{model_component}}" + [[StageJedi-{{model_component}}]] script = "swell task StageJedi $config -m {{model_component}}" diff --git a/src/swell/suites/ufo_testing/flow.cylc b/src/swell/suites/ufo_testing/flow.cylc index b6c51e42..81b40920 100644 --- a/src/swell/suites/ufo_testing/flow.cylc +++ b/src/swell/suites/ufo_testing/flow.cylc @@ -34,11 +34,17 @@ # If not able to link to build create the build BuildJediByLinking:fail? => BuildJedi + + # Clone geos ana for generating observing system records + CloneGeosAna """ {% for cycle_time in cycle_times %} {{cycle_time.cycle_time}} = """ + # Generate satellite channel records + CloneGeosAna[^] => GenerateObservingSystemRecords + # Convert bias correction to ioda GetGsiBc GetGsiBc => GsiBcToIoda @@ -52,6 +58,7 @@ GetGeovals # Run Jedi hofx executable + GenerateObservingSystemRecords => RunJediUfoTestsExecutable GsiNcdiagToIoda => RunJediUfoTestsExecutable GsiBcToIoda => RunJediUfoTestsExecutable GetGeovals => RunJediUfoTestsExecutable @@ -101,6 +108,12 @@ --partition={{scheduling["BuildJedi"]["partition"]}} {% endif %} + [[CloneGeosAna]] + script = "swell task CloneGeosAna $config -m geos_atmosphere" + + [[GenerateObservingSystemRecords]] + script = "swell task GenerateObservingSystemRecords $config -d $datetime -m geos_atmosphere" + [[ GetGsiBc ]] script = "swell task GetGsiBc $config -d $datetime -m geos_atmosphere" diff --git a/src/swell/tasks/base/task_base.py b/src/swell/tasks/base/task_base.py index 2ff149c3..72408415 100644 --- a/src/swell/tasks/base/task_base.py +++ b/src/swell/tasks/base/task_base.py @@ -88,7 +88,8 @@ def __init__(self, config_input, datetime_input, model, task_name): # Add JEDI config rendering helper # -------------------------------- self.jedi_rendering = JediConfigRendering(self.logger, self.__experiment_root__, - self.__experiment_id__, cycle_dir, self.__model__) + self.__experiment_id__, cycle_dir, + self.__datetime__, self.__model__) # Add GEOS utils # -------------- diff --git a/src/swell/tasks/clone_geos_ana.py b/src/swell/tasks/clone_geos_ana.py new file mode 100644 index 00000000..2d0aada9 --- /dev/null +++ b/src/swell/tasks/clone_geos_ana.py @@ -0,0 +1,48 @@ +# (C) Copyright 2021- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +import os +from swell.tasks.base.task_base import taskBase +from swell.utilities.build import link_path + +# -------------------------------------------------------------------------------------------------- + + +class CloneGeosAna(taskBase): + + def execute(self): + + """ + Generate the satellite channel record from GEOSadas files + """ + + # This task should only execute for geos_atmosphere + # ------------------------------------------------- + if self.get_model() != 'geos_atmosphere': + self.logger.info('Skipping GenerateObservingSystemRecords for: ' + self.get_model()) + return + + # Parse config + # ------------ + path_to_geosana_gridcomp = self.config.observing_system_records_gsi_path() + + # If observing_system_records_gsi_path is None, clone GEOSana_GridComp repo to experiment + # directory + if path_to_geosana_gridcomp == 'None': + # Clone GEOSana_GridComp develop repo to experiment directory + os.system('git clone https://github.com/GEOS-ESM/GEOSana_GridComp.git ' + + os.path.join(self.experiment_path(), 'GEOSana_GridComp')) + else: + # Link the source code directory + link_path(self.config.observing_system_records_gsi_path(), + os.path.join(self.experiment_path(), 'GEOSana_GridComp')) + + +# ---------------------------------------------------------------------------------------------- diff --git a/src/swell/tasks/eva_observations.py b/src/swell/tasks/eva_observations.py index 4f67f9bd..04ad8581 100644 --- a/src/swell/tasks/eva_observations.py +++ b/src/swell/tasks/eva_observations.py @@ -81,10 +81,13 @@ def execute(self): 445, 552, 573, 906, 1121, 1194, 1427, 1585], } - # Loop over observations and create dictionaries - # ---------------------------------------------- + # Loop over observations + # ------------------- eva_dicts = [] # Empty list of dictionaries + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + for observation in self.config.observations(): # Load the observation dictionary diff --git a/src/swell/tasks/generate_observing_system_records.py b/src/swell/tasks/generate_observing_system_records.py new file mode 100644 index 00000000..0ce33621 --- /dev/null +++ b/src/swell/tasks/generate_observing_system_records.py @@ -0,0 +1,50 @@ +# (C) Copyright 2021- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +import os + +from swell.tasks.base.task_base import taskBase +from swell.utilities.observing_system_records import ObservingSystemRecords + +# -------------------------------------------------------------------------------------------------- + + +class GenerateObservingSystemRecords(taskBase): + + def execute(self): + + """ + Generate the observing system channel records from GEOSadas files + """ + + # This task should only execute for geos_atmosphere + # ------------------------------------------------- + if self.get_model() != 'geos_atmosphere': + self.logger.info('Skipping GenerateObservingSystemRecords for: ' + self.get_model()) + return + + # Parse GSI records and save channel selection yamls + # -------------------------------------------------- + observations = self.config.observations() + observing_system_records_path = self.config.observing_system_records_path(None) + if observing_system_records_path == 'None': + cycle_dir = self.cycle_dir() + observing_system_records_path = os.path.join(cycle_dir, 'observing_system_records') + + path_to_geosana_gridcomp = self.config.observing_system_records_gsi_path() + if path_to_geosana_gridcomp == 'None': + path_to_geosana_gridcomp = os.path.join(self.experiment_path(), 'GEOSana_GridComp') + path_to_gsi_records = os.path.join(path_to_geosana_gridcomp, 'GEOSaana_GridComp', + 'GSI_GridComp', 'mksi', 'sidb') + sat_records = ObservingSystemRecords() + sat_records.parse_records(path_to_gsi_records) + sat_records.save_yamls(observing_system_records_path, observations) + +# ---------------------------------------------------------------------------------------------- diff --git a/src/swell/tasks/get_observations.py b/src/swell/tasks/get_observations.py index e382cd7f..974317fe 100644 --- a/src/swell/tasks/get_observations.py +++ b/src/swell/tasks/get_observations.py @@ -43,6 +43,9 @@ def execute(self): crtm_coeff_dir = self.config.crtm_coeff_dir(None) window_offset = self.config.window_offset() + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + # Get window begin time window_begin = self.da_window_params.window_begin(window_offset) background_time = self.da_window_params.background_time(window_offset, diff --git a/src/swell/tasks/gsi_bc_to_ioda.py b/src/swell/tasks/gsi_bc_to_ioda.py index 1da9d308..32946755 100644 --- a/src/swell/tasks/gsi_bc_to_ioda.py +++ b/src/swell/tasks/gsi_bc_to_ioda.py @@ -47,6 +47,10 @@ def execute(self): sensors = [] sensors_satbias = [] sensors_tlapse = [] + + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + for observation in observations: print('observation', observation) diff --git a/src/swell/tasks/run_jedi_hofx_executable.py b/src/swell/tasks/run_jedi_hofx_executable.py index 9c3684b3..0388835d 100644 --- a/src/swell/tasks/run_jedi_hofx_executable.py +++ b/src/swell/tasks/run_jedi_hofx_executable.py @@ -37,6 +37,9 @@ def execute(self): jedi_forecast_model = self.config.jedi_forecast_model(None) generate_yaml_and_exit = self.config.generate_yaml_and_exit(False) + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + # Compute data assimilation window parameters background_time = self.da_window_params.background_time(window_offset, background_time_offset) diff --git a/src/swell/tasks/run_jedi_local_ensemble_da_executable.py b/src/swell/tasks/run_jedi_local_ensemble_da_executable.py index 673bd0a4..8554c390 100644 --- a/src/swell/tasks/run_jedi_local_ensemble_da_executable.py +++ b/src/swell/tasks/run_jedi_local_ensemble_da_executable.py @@ -37,6 +37,9 @@ def execute(self): jedi_forecast_model = self.config.jedi_forecast_model(None) generate_yaml_and_exit = self.config.generate_yaml_and_exit(False) + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + # Compute data assimilation window parameters background_time = self.da_window_params.background_time(window_offset, background_time_offset) diff --git a/src/swell/tasks/run_jedi_ufo_tests_executable.py b/src/swell/tasks/run_jedi_ufo_tests_executable.py index 0fa7d716..ec1e966d 100644 --- a/src/swell/tasks/run_jedi_ufo_tests_executable.py +++ b/src/swell/tasks/run_jedi_ufo_tests_executable.py @@ -39,6 +39,9 @@ def execute(self): single_observations = self.config.single_observations() generate_yaml_and_exit = self.config.generate_yaml_and_exit(False) + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + # Compute data assimilation window parameters window_begin = self.da_window_params.window_begin(window_offset) window_begin_iso = self.da_window_params.window_begin_iso(window_offset) diff --git a/src/swell/tasks/run_jedi_variational_executable.py b/src/swell/tasks/run_jedi_variational_executable.py index 0b9e6727..8d69e3d4 100644 --- a/src/swell/tasks/run_jedi_variational_executable.py +++ b/src/swell/tasks/run_jedi_variational_executable.py @@ -38,6 +38,9 @@ def execute(self): jedi_forecast_model = self.config.jedi_forecast_model(None) generate_yaml_and_exit = self.config.generate_yaml_and_exit(False) + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + npx_proc = self.config.npx_proc(None) npy_proc = self.config.npy_proc(None) diff --git a/src/swell/tasks/save_obs_diags.py b/src/swell/tasks/save_obs_diags.py index b3b15b90..a2f83b11 100644 --- a/src/swell/tasks/save_obs_diags.py +++ b/src/swell/tasks/save_obs_diags.py @@ -29,6 +29,9 @@ def execute(self): observations = self.config.observations() window_offset = self.config.window_offset() + # Set the observing system records path + self.jedi_rendering.set_obs_records_path(self.config.observing_system_records_path(None)) + # Get window beginning window_begin = self.da_window_params.window_begin(window_offset) background_time = self.da_window_params.background_time(window_offset, diff --git a/src/swell/tasks/task_questions.yaml b/src/swell/tasks/task_questions.yaml index 5d5e6531..5dcdcd10 100644 --- a/src/swell/tasks/task_questions.yaml +++ b/src/swell/tasks/task_questions.yaml @@ -588,6 +588,7 @@ observations: prompt: Which observations do you want to include? tasks: - EvaObservations + - GenerateObservingSystemRecords - GetGeovals - GetObservations - GsiBcToIoda @@ -599,6 +600,35 @@ observations: - SaveObsDiags type: string-check-list +observing_system_records_gsi_path: + ask_question: false + default_value: defer_to_model + models: + - geos_atmosphere + prompt: What is the path to the GSI formatted observing system records? + tasks: + - CloneGeosAna + - GenerateObservingSystemRecords + type: string + +observing_system_records_path: + ask_question: false + default_value: defer_to_model + models: + - geos_atmosphere + prompt: What is the path to the Swell formatted observing system records? + tasks: + - EvaObservations + - GenerateObservingSystemRecords + - GetObservations + - GsiBcToIoda + - RunJediHofxExecutable + - RunJediLocalEnsembleDaExecutable + - RunJediUfoTestsExecutable + - RunJediVariationalExecutable + - SaveObsDiags + type: string + path_to_ensemble: ask_question: true default_value: defer_to_model diff --git a/src/swell/test/code_tests/active_channels_test_files/amsua_n19.yaml b/src/swell/test/code_tests/active_channels_test_files/amsua_n19.yaml new file mode 100644 index 00000000..6b69be8a --- /dev/null +++ b/src/swell/test/code_tests/active_channels_test_files/amsua_n19.yaml @@ -0,0 +1,218 @@ +obs space: + name: AMSU-A NOAA-19 + obsdatain: + engine: + type: H5File + obsfile: '{{cycle_dir}}/amsua_n19.{{window_begin}}.nc4' + obsdataout: + engine: + type: H5File + obsfile: '{{cycle_dir}}/{{experiment_id}}.amsua_n19.{{window_begin}}.nc4' + simulated variables: [brightnessTemperature] + channels: &amsua_n19_available_channels 1-15 + +obs operator: + name: CRTM + Absorbers: [H2O,O3,CO2] + obs options: + Sensor_ID: &Sensor_ID amsua_n19 + EndianType: little_endian + CoefficientPath: '{{crtm_coeff_dir}}' + linear obs operator: + Absorbers: [H2O,O3] + +obs bias: + input file: '{{cycle_dir}}/amsua_n19.{{background_time}}.satbias.nc4' + variables without bc: [brightnessTemperature] + channels: 14 + variational bc: + predictors: + - name: constant + - name: cloud_liquid_water + sensor: AMSUA + clwdif_ch238: 1 + clwdif_ch314: 2 + - name: lapse_rate + order: 2 + tlapse: &amsua_n19_tlapse '{{cycle_dir}}/amsua_n19.{{background_time}}.tlapse.txt' + - name: lapse_rate + tlapse: *amsua_n19_tlapse + - name: emissivity + - name: scan_angle + order: 4 + - name: scan_angle + order: 3 + - name: scan_angle + order: 2 + - name: scan_angle + covariance: + minimal required obs number: 20 + variance range: [1.0e-6, 10.0] + step size: 1.0e-4 + largest analysis variance: 10000.0 + prior: + input file: '{{cycle_dir}}/amsua_n19.{{background_time}}.satbias.nc4' + inflation: + ratio: 1.1 + ratio for small dataset: 2.0 + output file: '{{cycle_dir}}/amsua_n19.{{window_begin}}.satbias.nc4' + +obs filters: +# Window and surface-sensitive channels check + - filter: Bounds Check + filter variables: + - name: brightnessTemperature + channels: 1-6,15 + test variables: + - name: ObsValue/brightnessTemperature + channels: 1-6,15 + treat missing as out of bounds: true + minvalue: 100.0 + maxvalue: 500.0 + flag all filter variables if any test variable is out of bounds: true +# passedBenchmark: 1500 +# All channels unreasonable values check + - filter: Bounds Check + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + minvalue: 100.0 + maxvalue: 500.0 +# Hydrometeor Check (cloud/precipitation affected chanels) + - filter: Bounds Check + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + test variables: + - name: ObsFunction/HydrometeorCheckAMSUAclr + channels: *amsua_n19_available_channels + options: + sensor: *Sensor_ID + channels: *amsua_n19_available_channels + test_biaspredictor: cloud_liquid_waterPredictor + maxvalue: 0.0 + action: + name: reject +# Assign obs error + - filter: Perform Action + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + action: + name: assign error + error parameter vector: + [2.500, 2.000, 2.000, 0.550, 0.300, + 0.230, 0.230, 0.250, 0.250, 0.350, + 0.400, 0.550, 0.800, 5.000, 2.500] +# Topography check + - filter: BlackList + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + action: + name: inflate error + inflation variable: + name: ObsFunction/ObsErrorFactorTopoRad + channels: *amsua_n19_available_channels + options: + sensor: *Sensor_ID + channels: *amsua_n19_available_channels +# Transmittance Top Check + - filter: BlackList + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + action: + name: inflate error + inflation variable: + name: ObsFunction/ObsErrorFactorTransmitTopRad + channels: *amsua_n19_available_channels + options: + channels: *amsua_n19_available_channels +# Surface Jacobian check + - filter: BlackList + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + action: + name: inflate error + inflation variable: + name: ObsFunction/ObsErrorFactorSurfJacobianRad + channels: *amsua_n19_available_channels + options: + channels: *amsua_n19_available_channels + sensor: *Sensor_ID + use_biasterm: true + test_biasterm: ObsBiasTerm + obserr_demisf: [0.010, 0.020, 0.015, 0.020, 0.200] + obserr_dtempf: [0.500, 2.000, 1.000, 2.000, 4.500] +# Gross check + - filter: Background Check + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + function absolute threshold: + - name: ObsFunction/ObsErrorBoundMW + channels: *amsua_n19_available_channels + options: + sensor: *Sensor_ID + channels: *amsua_n19_available_channels + obserr_bound_latitude: + name: ObsFunction/ObsErrorFactorLatRad + options: + latitude_parameters: [25.0, 0.25, 0.04, 3.0] + obserr_bound_transmittop: + name: ObsFunction/ObsErrorFactorTransmitTopRad + channels: *amsua_n19_available_channels + options: + channels: *amsua_n19_available_channels + obserr_bound_topo: + name: ObsFunction/ObsErrorFactorTopoRad + channels: *amsua_n19_available_channels + options: + channels: *amsua_n19_available_channels + sensor: *Sensor_ID + error parameter vector: + [2.500, 2.000, 2.000, 0.550, 0.300, + 0.230, 0.230, 0.250, 0.250, 0.350, + 0.400, 0.550, 0.800, 5.000, 2.500] + obserr_bound_max: [4.5, 4.5, 4.5, 2.5, 2.0, + 2.0, 2.0, 2.0, 2.0, 2.0, + 2.5, 3.5, 4.5, 4.5, 4.5] + action: + name: reject +# Inter-channel check + - filter: Bounds Check + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + test variables: + - name: ObsFunction/InterChannelConsistencyCheck + channels: *amsua_n19_available_channels + options: + channels: *amsua_n19_available_channels + use passive_bc: true + sensor: *Sensor_ID + use_flag: [-1, -1, -1, 1, 1, + 1, -1, -1, 1, 1, + 1, 1, 1, 1, -1] + maxvalue: 1.0e-12 + action: + name: reject +# Useflag check + - filter: Bounds Check + filter variables: + - name: brightnessTemperature + channels: *amsua_n19_available_channels + test variables: + - name: ObsFunction/ChannelUseflagCheckRad + channels: *amsua_n19_available_channels + options: + channels: *amsua_n19_available_channels +# use passive_bc: true + use_flag: [-1, -1, -1, 1, 1, + 1, -1, -1, 1, 1, + 1, 1, 1, 1, -1] + minvalue: 1.0e-12 + action: + name: reject diff --git a/src/swell/test/code_tests/active_channels_test_files/amsua_n19_channel_info.yaml b/src/swell/test/code_tests/active_channels_test_files/amsua_n19_channel_info.yaml new file mode 100644 index 00000000..f5ce074f --- /dev/null +++ b/src/swell/test/code_tests/active_channels_test_files/amsua_n19_channel_info.yaml @@ -0,0 +1,77 @@ +active: +- begin date: '2009-04-14T00:00:00' + channels: + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + comments: '# M2: redundant entry intentional. Observations' + end date: '2009-12-15T06:00:00' +- begin date: '2009-12-15T12:00:00' + channels: + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + comments: no comment + end date: '2009-12-21T18:00:00' +- begin date: '2009-12-22T00:00:00' + channels: + - '4' + - '5' + - '6' + - '7' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + comments: '# ch8 noisy (noise started on 12/21/2009)' + end date: '2014-01-28T18:00:00' +- begin date: '2014-01-29T00:00:00' + channels: + - '4' + - '5' + - '6' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + comments: '# per NCEP r35918' + end date: '2100-12-31T18:00:00' +available: +- begin date: '2009-04-14T00:00:00' + channels: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + end date: '2999-12-31T18:00:00' diff --git a/src/swell/test/code_tests/generate_observing_system_test.py b/src/swell/test/code_tests/generate_observing_system_test.py new file mode 100644 index 00000000..ec7e34f1 --- /dev/null +++ b/src/swell/test/code_tests/generate_observing_system_test.py @@ -0,0 +1,13 @@ +import os +from swell.utilities.observing_system_records import ObservingSystemRecords + +observations = ['amsua_n19'] +observing_system_records_path = './yaml_output/' + +# Clone GeosAna +os.system('git clone https://github.com/GEOS-ESM/GEOSana_GridComp.git') +path_to_gsi_records = os.path.join('GEOSana_GridComp/', 'GEOSaana_GridComp', + 'GSI_GridComp', 'mksi', 'sidb') +sat_records = ObservingSystemRecords() +sat_records.parse_records(path_to_gsi_records) +sat_records.save_yamls(observing_system_records_path, observations) diff --git a/src/swell/test/code_tests/get_active_channels_test.py b/src/swell/test/code_tests/get_active_channels_test.py new file mode 100644 index 00000000..4f61ce02 --- /dev/null +++ b/src/swell/test/code_tests/get_active_channels_test.py @@ -0,0 +1,12 @@ +from swell.utilities.get_active_channels import get_active_channels + +use_flags = [-1, -1, -1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, 1, -1] + +cycle_time = '20211212T000000Z' +path_to_observing_sys_yamls = 'active_channels_test_files/' +path_to_configs = 'active_channels_test_files/' +observation = 'amsua_n19' + +generated_use_flags = get_active_channels(path_to_observing_sys_yamls, + path_to_configs, observation, cycle_time) +assert (use_flags == generated_use_flags) diff --git a/src/swell/utilities/get_active_channels.py b/src/swell/utilities/get_active_channels.py new file mode 100644 index 00000000..2a6ffb65 --- /dev/null +++ b/src/swell/utilities/get_active_channels.py @@ -0,0 +1,81 @@ +# (C) Copyright 2021- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + +import yaml +import os +from datetime import datetime as dt + +# -------------------------------------------------------------------------------------------------- + + +def process_channel_lists(channel_list): + + ''' + Function processes list of elements in channel list + ''' + + final_channels_list = [] + if not isinstance(channel_list, list): + channel_list = [channel_list] + for element in channel_list: + if '-' in element: + start, end = map(int, element.split('-')) + result_list = [x for x in range(start, end + 1)] + final_channels_list += result_list + else: + final_channels_list += [int(element)] + + return final_channels_list + +# -------------------------------------------------------------------------------------------------- + + +def get_channel_list(input_dict, dt_cycle_time): + + ''' + Function retrieves channel lists from dict loaded from a yaml file + ''' + + for element in input_dict: + begin_date = dt.strptime(element['begin date'], "%Y-%m-%dT%H:%M:%S") + end_date = dt.strptime(element['end date'], "%Y-%m-%dT%H:%M:%S") + if (dt_cycle_time > begin_date) and (dt_cycle_time < end_date): + return element['channels'] + +# -------------------------------------------------------------------------------------------------- + + +def get_active_channels(path_to_observing_sys_yamls, observation, dt_cycle_time): + + ''' + Comparing available channels and active channels from the observing + system records to create the use flag array needed in the + qc filter yaml files. + ''' + + # Retrieve available and active channels from records yaml + path_to_observing_sys_config = path_to_observing_sys_yamls + '/' + \ + observation + '_channel_info.yaml' + + if os.path.isfile(path_to_observing_sys_config): + with open(path_to_observing_sys_config, 'r') as file: + data = yaml.safe_load(file) + available_channels = get_channel_list(data['available'], dt_cycle_time) + active_channels = get_channel_list(data['active'], dt_cycle_time) + + available_channels_list = process_channel_lists(available_channels) + active_channels_list = process_channel_lists(active_channels) + use_flags = [1 if x in active_channels_list else -1 for x in available_channels_list] + + return use_flags + + else: + return None + +# -------------------------------------------------------------------------------------------------- diff --git a/src/swell/utilities/instr_state_machine.py b/src/swell/utilities/gsi_record_parser.py similarity index 56% rename from src/swell/utilities/instr_state_machine.py rename to src/swell/utilities/gsi_record_parser.py index 75586926..f51c47c0 100644 --- a/src/swell/utilities/instr_state_machine.py +++ b/src/swell/utilities/gsi_record_parser.py @@ -12,15 +12,15 @@ def check_end_times(end_times): - # Checks end times for 24 hour strings and converts them to 18 + ''' Checks end times for 24 hour strings and converts them to 18 ''' new_end_times = [] for end_time in end_times: - # note that python datetime does not allow for times with hour = 24 + # Note that python datetime does not allow for times with hour = 24 hour = end_time[8:10] if (hour == '24'): - # subtract 6 hours + # Subtract 6 hours tmp = int(end_time) - 60000 new_end_times.append(str(tmp)) else: @@ -29,28 +29,52 @@ def check_end_times(end_times): return new_end_times -class InstrStateMachine: +class GSIRecordParser: - def __init__(self, instr_df): + def __init__(self): - # Intakes a dataframe representing the rows for a particular - # instrument on a particular satellite. + ''' + This class employs a state machine algorithm to process raw data from GSI + .tbl files. The different states work together to parse through an initial + dataframe. The rows of the resulting dataframe correspond to clean entries + for a given instrument and satellite. + ''' + + self.idx = None + self.main_idx = None + self.start_times = None + self.end_times = None + self.instr_df = None + self.compare_channels = None + self.curr_channel_list = None + self.main_channel_list = None + self.return_df = None + + def reset(self): self.idx = 0 self.main_idx = 0 self.start_times = [] self.end_times = [] - self.instr_df = instr_df self.compare_channels = [] self.curr_channel_list = [] self.main_channel_list = [] - self.return_df = pd.DataFrame(columns=list(instr_df.columns.values)) - def run(self): + def run(self, instr_df): + + ''' + Effectively state one of the state machine. + Generates an ordered list of start times and corresponding + end times. Checks the end times and then proceeds to condition one. - # Effectively state one of the state machine. - # Generates an ordered list of start times and corresponding - # end times. Checks the end times and then proceeds to condition one. + input: + instr_df = a dataframe containing raw data for a given satellite and + instrument + ''' + + self.reset() + self.instr_df = instr_df + self.return_df = pd.DataFrame(columns=list(instr_df.columns.values)) self.start_times = list(np.unique(self.instr_df["start"].values)) self.start_times.sort(key=int) @@ -63,8 +87,10 @@ def run(self): def condition_one(self): - # If there is one row for the date range, go to state 2. Otherwise, - # go to state 3. + ''' + If there is one row for the date range, go to state 2. Otherwise, + go to state 3. + ''' start_time_df = self.instr_df.loc[self.instr_df["start"] == self.start_times[self.idx]] n_curr_start_rows = len(start_time_df) @@ -78,8 +104,10 @@ def condition_one(self): def state_two(self): - # Update return_df with new row, increment idx, and then go to - # condition 4. + ''' + Update return_df with new row, increment idx, and then go to + condition 4. + ''' row = self.instr_df.loc[self.instr_df["start"] == self.start_times[self.idx]] self.update_return_df(row) @@ -88,12 +116,14 @@ def state_two(self): def state_three(self): - # Gather channels for all rows in current datetime. If there are more - # than one end time within the rows, set main_start_idx and - # main_channels_list, set compare_channels to the current channel list, - # update return_df with a new row, increment index, - # and then go to condition 2. - # Else, go to state 2 with the updated current channel list. + ''' + Gather channels for all rows in current datetime. If there are more + than one end time within the rows, set main_start_idx and + main_channels_list, set compare_channels to the current channel list, + update return_df with a new row, increment index, + and then go to condition 2. + Else, go to state 2 with the updated current channel list. + ''' rows = self.instr_df.loc[self.instr_df["start"] == self.start_times[self.idx]] [self.curr_channel_list.extend(i) for i in rows["channels"].values] @@ -122,10 +152,12 @@ def state_three(self): def condition_two(self): - # If curr start/end is in main range, go to condition 3. Otherwise - # go to state 2 + ''' + If curr start/end is in main range, go to condition 3. Otherwise + go to state 2 + ''' - # return if end of df is reached + # Return if end of df is reached if (self.idx == len(self.start_times)): return @@ -143,23 +175,25 @@ def condition_two(self): def state_four(self): - # Update current channel list by whether values need to be turned - # on or turned off. Then update the return df, increment the index - # and go to condition 2. + ''' + Update current channel list by whether values need to be turned + on or turned off. Then update the return df, increment the index + and go to condition 2. + ''' row = self.instr_df.loc[self.instr_df["start"] == self.start_times[self.idx]] row_channel_list = row["channels"].values[0] - # if these are the same, the logic is off + # If these are the same, the logic is off assert (len(row_channel_list) != len(self.compare_channels)) self.curr_channel_list = self.main_channel_list if (len(row_channel_list) > len(self.compare_channels)): - # turn on + # Turn on turn_on = list(set(row_channel_list) - set(self.compare_channels)) self.curr_channel_list += turn_on else: - # turn off + # Turn off turn_off = list(set(self.compare_channels) - set(row_channel_list)) self.curr_channel_list = [x for x in self.curr_channel_list if x not in turn_off] @@ -170,9 +204,11 @@ def state_four(self): def condition_four(self): - # If next date range is right after previous date range, go to - # condition one. Otherwise, go to state 6. If there's no next - # then return. + ''' + If next date range is right after previous date range, go to + condition one. Otherwise, go to state 6. If there's no next + then return. + ''' assert (self.idx != 0) if (self.idx == len(self.start_times)): @@ -191,8 +227,10 @@ def condition_four(self): def state_six(self): - # Create new empty date range, update return df with new row, and then - # head over to state one. No update to the index. + ''' + Create new empty date range, update return df with new row, and then + head over to state one. No update to the index. + ''' missing_time = {} @@ -211,43 +249,43 @@ def state_six(self): def get_instr_df(self): - # Returns the dataframe that the state machine generated! + ''' Returns the dataframe that the state machine generated! ''' return self.return_df def update_return_df(self, row, no_comment=False, missing=False, missing_time={}): - # Updates the return df based on parameters + ''' Adding rows to final dataframe that will be returned through get_instr_df ''' + # Updates the return df based on parameters if (missing): - self.return_df = self.return_df.append({ - "sat": row["sat"].values[0], - "start": missing_time['begin_time'], - "end": missing_time['end_time'], - "instr": row["instr"].values[0], - "channel_num": 0, - "channels": [], - "comments": "missing for this period", - }, ignore_index=True) + new_row = pd.DataFrame.from_dict({ + 'sat': [row["sat"].values[0]], + 'start': [missing_time['begin_time']], + 'end': [missing_time['end_time']], + 'instr': [row['instr'].values[0]], + 'channel_num': [0], + 'channels': [[]], + 'comments': ['missing for this period']}) elif (no_comment): - self.return_df = self.return_df.append({ - "sat": row["sat"].values[0], - "start": self.start_times[self.idx], - "end": self.end_times[self.idx], - "instr": row["instr"].values[0], - "channel_num": len(self.curr_channel_list), - "channels": self.curr_channel_list, - "comments": "" - }, ignore_index=True) + new_row = pd.DataFrame.from_dict({ + 'sat': [row["sat"].values[0]], + 'start': [self.start_times[self.idx]], + 'end': [self.end_times[self.idx]], + 'instr': [row['instr'].values[0]], + 'channel_num': [len(self.curr_channel_list)], + 'channels': [self.curr_channel_list], + 'comments': ['']}) else: - self.return_df = self.return_df.append({ - "sat": row["sat"].values[0], - "start": self.start_times[self.idx], - "end": self.end_times[self.idx], - "instr": row["instr"].values[0], - "channel_num": len(self.curr_channel_list), - "channels": self.curr_channel_list, - "comments": row["comments"].values[0], - }, ignore_index=True) + new_row = pd.DataFrame.from_dict({ + 'sat': [row["sat"].values[0]], + 'start': [self.start_times[self.idx]], + 'end': [self.end_times[self.idx]], + 'instr': [row['instr'].values[0]], + 'channel_num': [len(self.curr_channel_list)], + 'channels': [self.curr_channel_list], + 'comments': [row["comments"].values[0]]}) + + self.return_df = pd.concat([self.return_df, new_row], ignore_index=True) diff --git a/src/swell/utilities/observing_system_records.py b/src/swell/utilities/observing_system_records.py new file mode 100644 index 00000000..3cf6eccd --- /dev/null +++ b/src/swell/utilities/observing_system_records.py @@ -0,0 +1,180 @@ +import os +import yaml +import pandas as pd +import numpy as np +import datetime as dt +from swell.utilities.gsi_record_parser import GSIRecordParser + +# -------------------------------------------------------------------------------------------------- + + +def format_date(old_date): + + ''' Formatting date into expected template ''' + + date = dt.datetime.strptime(old_date, '%Y%m%d%H%M%S') + return date.isoformat() + +# -------------------------------------------------------------------------------------------------- + + +def read_sat_db(path_to_sat_db, column_names): + + ''' + Reading GSI observing system records row by row into + a pandas dataframe to be used by the gsi_record_parser + ''' + + filename = path_to_sat_db + df = pd.DataFrame(columns=column_names) + + file = open(filename, 'r') + lines = file.readlines() + + # Read blindly into an array, throw line away if it starts with # or newline + idx = 0 + for line in lines: + line_parts = line.split() + if (line_parts): + + if (line_parts[0][0] != '#' and line_parts[0][0] != '\n'): + new_row = pd.DataFrame.from_dict({ + 'sat': [''], + 'start': [''], + 'end': [''], + 'instr': [''], + 'channel_num': [0], + 'channels': [[]], + 'comments': ['']}) + + df = pd.concat([df, new_row], ignore_index=True) + df['sat'][idx] = line_parts[0] + df['start'][idx] = line_parts[1]+line_parts[2] + df['end'][idx] = line_parts[3]+line_parts[4] + df['instr'][idx] = line_parts[5] + df['channel_num'][idx] = line_parts[6] + + comment_present = next((i for i, x in enumerate(line_parts) if x == '#'), None) + + if (comment_present): + channel_list = line_parts[7:comment_present] + comment = line_parts[comment_present:] + comment_str = ' '.join(comment) + # Accounting for no comment + if (len(comment_str) != 1): + df['comments'][idx] = comment_str + else: + channel_list = line_parts[7:] + + df['channels'][idx] = channel_list + idx += 1 + return df + +# -------------------------------------------------------------------------------------------------- + + +class ObservingSystemRecords: + + ''' + Class handles calls to parse GSI observing system records. Parsed + records are saved internally in dataframes and can be outputted into + yaml files. + ''' + + def __init__(self): + self.column_names = ['sat', 'start', 'end', + 'instr', 'channel_num', + 'channels', 'comments'] + self.active_df = None + self.available_df = None + self.obs_registry = [] + + def parse_records(self, path_to_sat_db): + + ''' + This method reads in the active.tbl and available.tbl files + from GEOSAna and loads them into dataframes. These dataframes + are parsed using GSIRecordParser to get the final dataframes. + ''' + + parser = GSIRecordParser() + channel_types = ['active', 'available'] + for channel_type in channel_types: + df = pd.DataFrame(columns=self.column_names) + path_to_records = os.path.join(path_to_sat_db, channel_type + '_channels.tbl') + + org_df = read_sat_db(path_to_records, self.column_names) + sat_list = np.unique(org_df['sat'].values) + for sat in sat_list: + sat_df = org_df.loc[org_df['sat'] == sat] + instr_list = np.unique(sat_df['instr'].values) + + for instr in instr_list: + instr_df = sat_df.loc[sat_df['instr'] == instr] + parser.run(instr_df) + new_instr_df = parser.get_instr_df() + df = pd.concat([df, new_instr_df], ignore_index=True) + if instr+'_'+sat not in self.obs_registry: + self.obs_registry.append(instr+'_'+sat) + + if channel_type == 'active': + self.active_df = df + elif channel_type == 'available': + self.available_df = df + else: + # logger assert abort? + print('record parsing unavailable for this type') + + def save_yamls(self, output_dir, observation_list=None): + + ''' + Fields are taken from the internal dataframes populated + by parse_records and saved to yaml files. + ''' + + if not observation_list: + observation_list = self.obs_registry + + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + # Assume that active and available channels have corresponding sat/instr fields + sat_list = np.unique(self.active_df['sat'].values) + for sat in sat_list: + active_df = self.active_df.loc[self.active_df['sat'] == sat] + available_df = self.available_df.loc[self.available_df['sat'] == sat] + instr_list = np.unique(active_df['instr'].values) + + for instr in instr_list: + sat_dict = {} + instr_active_df = active_df.loc[active_df['instr'] == instr] + instr_available_df = available_df.loc[available_df['instr'] == instr] + + compare_name = instr+'_'+sat + if compare_name in observation_list: + + active_field_list = [] + for idx, row in instr_active_df.iterrows(): + row_dict = {} + row_dict['begin date'] = format_date(row['start']) + row_dict['end date'] = format_date(row['end']) + row_dict['channels'] = row['channels'] + if (row['comments']): + row_dict['comments'] = row['comments'] + else: + row_dict['comments'] = 'no comment' + active_field_list.append(row_dict) + + available_field_list = [] + for idx, row in instr_available_df.iterrows(): + row_dict = {} + row_dict['begin date'] = format_date(row['start']) + row_dict['end date'] = format_date(row['end']) + row_dict['channels'] = row['channels'] + available_field_list.append(row_dict) + + sat_dict['available'] = available_field_list + sat_dict['active'] = active_field_list + + with open(output_dir+'/'+instr+'_'+sat+'_channel_info.yaml', 'w') as file: + yaml.dump(sat_dict, file) diff --git a/src/swell/utilities/render_jedi_interface_files.py b/src/swell/utilities/render_jedi_interface_files.py index 739fefe2..d508fdbe 100644 --- a/src/swell/utilities/render_jedi_interface_files.py +++ b/src/swell/utilities/render_jedi_interface_files.py @@ -11,34 +11,45 @@ import yaml from swell.utilities.jinja2 import template_string_jinja2 - +from swell.utilities.get_active_channels import get_active_channels # -------------------------------------------------------------------------------------------------- + class JediConfigRendering(): - def __init__(self, logger, experiment_root, experiment_id, cycle_dir, jedi_interface=None): + def __init__(self, logger, experiment_root, experiment_id, cycle_dir, cycle_time, + jedi_interface=None): # Keep a copy of the logger self.logger = logger + # Keep a copy of the cycle directory + self.cycle_dir = cycle_dir + # Copy the experiment configuration path self.jedi_config_path = os.path.join(experiment_root, experiment_id, 'configuration', 'jedi') + # Fields needed for get_active_channels + self.cycle_time = None + if cycle_time is not None: + self.cycle_time = cycle_time.dto() + self.observing_system_records_path = None + # Dictionary to hold things that can be templated - self.template_dict = {} + self.__template_dict__ = {} # Always store the cycle directory in the dictionary - self.template_dict['cycle_dir'] = cycle_dir + self.__template_dict__['cycle_dir'] = cycle_dir # Add the jedi interface to the dictionary self.jedi_interface = jedi_interface - self.template_dict['model_component'] = jedi_interface + self.__template_dict__['model_component'] = jedi_interface # Add experiment info to dictionary - self.template_dict['experiment_id'] = experiment_id - self.template_dict['experiment_root'] = experiment_root + self.__template_dict__['experiment_id'] = experiment_id + self.__template_dict__['experiment_root'] = experiment_root # List of all potential valid keys that can be used in templates self.valid_template_keys = [ @@ -96,7 +107,7 @@ def add_key(self, key, element): f'of the valid keys: \'{self.valid_template_keys}\'') # Add element to dictionary - self.template_dict[key] = element + self.__template_dict__[key] = element # ---------------------------------------------------------------------------------------------- @@ -113,7 +124,7 @@ def __open_file_render_to_dict__(self, config_file): # Fill templates in the configuration file using the config config_file_str = template_string_jinja2(self.logger, config_file_str_templated, - self.template_dict) + self.__template_dict__) # Convert string to dictionary return yaml.safe_load(config_file_str) @@ -148,6 +159,17 @@ def render_interface_model(self, config_name): # ---------------------------------------------------------------------------------------------- + def set_obs_records_path(self, path): + + # Never put a path that is string None in place + if path == 'None': + cd = self.cycle_dir + self.observing_system_records_path = os.path.join(cd, 'observing_system_records') + else: + self.observing_system_records_path = path + + # ---------------------------------------------------------------------------------------------- + # Prepare path to interface observations file and call rendering def render_interface_observations(self, config_name): @@ -160,12 +182,29 @@ def render_interface_observations(self, config_name): config_file = os.path.join(self.jedi_config_path, 'interfaces', self.jedi_interface, 'observations', f'{config_name}.yaml') + # Check that the self.observing_system_records_path was set + if self.observing_system_records_path is not None: + + # Check that observing_system_records_path and cycle_time are set + self.logger.assert_abort(self.cycle_time is not None, f'cycle_time must be set.') + + # Check that the config_name is not ufo_tests + if config_name != 'ufo_tests': + + # Get active channels + active_channels = get_active_channels(self.observing_system_records_path, + config_name, self.cycle_time) + + # Add active channels to template dictionary + self.__template_dict__[f'{config_name}_active_channels'] = active_channels + # Render templates in file and return dictionary return self.__open_file_render_to_dict__(config_file) # ---------------------------------------------------------------------------------------------- # Prepare path to interface metadata file and call rendering + def render_interface_meta(self, model_component_in=None): # Optionally open a different model interface diff --git a/src/swell/utilities/run_jedi_executables.py b/src/swell/utilities/run_jedi_executables.py index 79f5dcd7..f2d08ef5 100644 --- a/src/swell/utilities/run_jedi_executables.py +++ b/src/swell/utilities/run_jedi_executables.py @@ -23,7 +23,8 @@ def jedi_dictionary_iterator(jedi_config_dict, jedi_rendering, window_type, obs, # -------------------------------- for key, value in jedi_config_dict.items(): if isinstance(value, dict): - jedi_dictionary_iterator(value, jedi_rendering, window_type, obs, jedi_forecast_model) + jedi_dictionary_iterator(value, jedi_rendering, window_type, obs, + jedi_forecast_model) elif isinstance(value, bool): continue @@ -31,8 +32,10 @@ def jedi_dictionary_iterator(jedi_config_dict, jedi_rendering, window_type, obs, elif isinstance(value, list): for item in value: if isinstance(item, dict): - jedi_dictionary_iterator(item, jedi_rendering, window_type, obs, - jedi_forecast_model) + jedi_dictionary_iterator( + item, jedi_rendering, window_type, obs, + jedi_forecast_model + ) else: if 'TASKFILL' in value: @@ -46,7 +49,6 @@ def jedi_dictionary_iterator(jedi_config_dict, jedi_rendering, window_type, obs, if value_special == 'observations': observations = [] for ob in obs: - # Get observation dictionary obs_dict = jedi_rendering.render_interface_observations(ob) observations.append(obs_dict) jedi_config_dict[key] = observations diff --git a/src/swell/utilities/sat_db_utils.py b/src/swell/utilities/sat_db_utils.py deleted file mode 100644 index 2bb8344d..00000000 --- a/src/swell/utilities/sat_db_utils.py +++ /dev/null @@ -1,111 +0,0 @@ -# (C) Copyright 2021- United States Government as represented by the Administrator of the -# National Aeronautics and Space Administration. All Rights Reserved. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - - -# -------------------------------------------------------------------------------------------------- - - -import os -import pandas as pd -import numpy as np -from swell.utilities.git_utils import git_got -from swell.utilities.instr_state_machine import InstrStateMachine - - -# -------------------------------------------------------------------------------------------------- - - -def read_sat_db(path_to_sat_db, column_names): - - # read data into a dataframe, throw line away if it starts with # or newline - # --------------------------------------------------------------------------- - filename = os.path.join(path_to_sat_db, 'active_channels.tbl') - df = pd.DataFrame(columns=column_names) - - file = open(filename, 'r') - lines = file.readlines() - - # read blindly into an array, throw line away if it starts with # or newline - idx = 0 - for line in lines: - line_parts = line.split() - if (line_parts): - - if (line_parts[0][0] != '#' and line_parts[0][0] != '\n'): - - df = df.append({ - 'sat': '', - 'start': '', - 'end': '', - 'instr': '', - 'channel_num': 0, - 'channels': [], - 'comments': '' - }, ignore_index=True) - - df['sat'][idx] = line_parts[0] - df['start'][idx] = line_parts[1]+line_parts[2] - df['end'][idx] = line_parts[3]+line_parts[4] - df['instr'][idx] = line_parts[5] - df['channel_num'][idx] = line_parts[6] - - comment_present = next((i for i, x in enumerate(line_parts) if x == '#'), None) - - if (comment_present): - channel_list = line_parts[7:comment_present] - comment = line_parts[comment_present:] - comment_str = ' '.join(comment) - # accounting for no comment - if (len(comment_str) != 1): - df['comments'][idx] = comment_str - else: - channel_list = line_parts[7:] - - df['channels'][idx] = channel_list - idx += 1 - - return df - - -# -------------------------------------------------------------------------------------------------- - - -def run_sat_db_process(git_out_dir, logger): - - # Process satellite database files in GEOS-ESM and return dataframe - # ----------------------------------------------------------------------- - - git_repo = 'https://github.com/GEOS-ESM/GEOSana_GridComp.git' - git_branch = 'develop' - git_out_path = os.path.join(git_out_dir, 'GEOSana_GridComp') - - # clone repo - git_got(git_repo, git_branch, git_out_path, logger) - path_to_sat_db = os.path.join(git_out_path, 'GEOSaana_GridComp', 'GSI_GridComp', 'mksi', 'sidb') - - column_names = ['sat', 'start', 'end', 'instr', 'channel_num', - 'channels', 'comments'] - df = read_sat_db(path_to_sat_db, column_names) - final_df = pd.DataFrame(columns=column_names) - - sat_list = np.unique(df['sat'].values) - for sat in sat_list: - sat_df = df.loc[df['sat'] == sat] - - instr_list = np.unique(sat_df['instr'].values) - - for instr in instr_list: - instr_df = sat_df.loc[sat_df['instr'] == instr] - - state_machine = InstrStateMachine(instr_df) - state_machine.run() - new_instr_df = state_machine.get_instr_df() - final_df = final_df.append(new_instr_df) - - return final_df - - -# -------------------------------------------------------------------------------------------------- diff --git a/src/swell/utilities/scripts/swell_sat_db_processing.py b/src/swell/utilities/scripts/swell_sat_db_processing.py deleted file mode 100644 index 62cafa8c..00000000 --- a/src/swell/utilities/scripts/swell_sat_db_processing.py +++ /dev/null @@ -1,92 +0,0 @@ -# (C) Copyright 2021- United States Government as represented by the Administrator of the -# National Aeronautics and Space Administration. All Rights Reserved. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - - -import os -import yaml -import click -import numpy as np - -from datetime import datetime as dt - -from swell.utilities.sat_db_utils import run_sat_db_process - - -def make_yamls(final_df, output_dir): - - ''' - Uses the dataframe created by sat_db_processing - to write out yaml files - ''' - - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - sat_list = np.unique(final_df['sat'].values) - for sat in sat_list: - - df = final_df.loc[final_df['sat'] == sat] - instr_list = np.unique(df['instr'].values) - sat_dict = {} - - for instr in instr_list: - - sat_dict[instr] = {} - instr_df = df.loc[df['instr'] == instr] - - field_list = [] - for idx, row in instr_df.iterrows(): - - row_dict = {} - row_dict['begin date'] = format_date(row['start']) - row_dict['end date'] = format_date(row['end']) - row_dict['channels'] = row['channels'] - - if (row['comments']): - row_dict['comments'] = row['comments'] - else: - row_dict['comments'] = 'no comment' - - field_list.append(row_dict) - - sat_dict[instr] = field_list - - with open(output_dir+'/'+sat+'.yaml', 'w') as file: - yaml.dump(sat_dict, file) - - -def format_date(old_date): - - ''' - Formatting datetime object - ''' - - date = dt.strptime(old_date, '%Y%m%d%H%M%S') - return date.isoformat() - - -@click.command() -@click.argument('config') -def main(config): - - with open(config, 'r') as ymlfile: - config_dict = yaml.safe_load(ymlfile) - user = os.environ['USER'] - geos_sat_db_root = config_dict['geos_sat_db_root'].replace('${USER}', user) - - try: - os.makedirs(geos_sat_db_root) - except Exception: - print('SATELLITE DATABASE DIRECTORY IS ALREADY GENERATED') - - yaml_out_dir = geos_sat_db_root + '/satdb_yamls' - git_out_dir = geos_sat_db_root + '/GEOSana_GridComp' - - # run sat db processing util - processed_data = run_sat_db_process(git_out_dir) - - # create yamls - make_yamls(processed_data, yaml_out_dir)