Merge branch 'master' into hallett/wasting_module

mnjowe · mnjowe · commit 5c2fce316c11 · 2024-02-13T11:01:56.000+02:00
diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b06f61949b83b93f08f187af4cb9c7515b21ab8c28613e1df7ea04d0150a12bb
-size 304
+oid sha256:187302cf1744ee6a9538665c5dff5650f58e4d00c8d74844ff8b569b6b38f9d1
+size 335
diff --git a/resources/healthsystem/human_resources/absenteeism/HHFA_amended_ResourceFile_patient_facing_time.xlsx b/resources/healthsystem/human_resources/absenteeism/HHFA_amended_ResourceFile_patient_facing_time.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92d2a71c58a8232d9c1b50da58c63db18f1e1cf47d8a02adb7c0467afd40fb7a
+size 8903
diff --git a/resources/healthsystem/human_resources/const_HR_scaling/ResourceFile_const_HR_scaling.xlsx b/resources/healthsystem/human_resources/const_HR_scaling/ResourceFile_const_HR_scaling.xlsx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9415f5249a5c4ddd2b5ccc3fbf1a64b33132b4b9e379ad079a9539cb109b24e
+size 8515
diff --git a/src/scripts/data_file_processing/healthsystem/human_resources/formatting_absenteeism_factors_from_health_facility_assessment.py b/src/scripts/data_file_processing/healthsystem/human_resources/formatting_absenteeism_factors_from_health_facility_assessment.py
@@ -0,0 +1,48 @@
+import pandas as pd
+
+dict = { "1a" : "L1a_Av_Mins_Per_Day", "1b":"L1b_Av_Mins_Per_Day", "2":"L2_Av_Mins_Per_Day", "0":"L0_Av_Mins_Per_Day", "3": "L3_Av_Mins_Per_Day", "4": "L4_Av_Mins_Per_Day", "5": "L5_Av_Mins_Per_Day"}
+
+# Specify the file paths
+file_path1 = "resources/healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
+file_path2 = "resources/healthsystem/human_resources/definitions/ResourceFile_Officer_Types_Table.csv"
+file_path3 = "resources/healthsystem/absenteeism/HHFA_amended_ResourceFile_patient_facing_time.xlsx"
+
+# Load Excel files into DataFrames
+daily_capabilities = pd.read_csv(file_path1)
+officer_types = pd.read_csv(file_path2)
+survey_daily_capabilities = pd.read_excel(file_path3, sheet_name="Scenario 2")
+
+# Clean survey_daily_capabilities by replacing officer codes with category, and calculating mean within category
+merged_df = pd.merge(survey_daily_capabilities, officer_types, on="Officer_Type_Code", how="left")
+survey_daily_capabilities["Officer_Category"] = merged_df["Officer_Category"]
+del survey_daily_capabilities["Officer_Type_Code"]
+del survey_daily_capabilities["Total_Av_Working_Days"]
+survey_daily_capabilities = survey_daily_capabilities.groupby("Officer_Category").mean().reset_index()
+
+# Obtain average mins per day
+daily_capabilities["Av_mins_per_day"] = (daily_capabilities["Total_Mins_Per_Day"]/daily_capabilities["Staff_Count"]).fillna(0)
+
+# Obtain officers types
+officers = daily_capabilities["Officer_Category"].drop_duplicates()
+
+# Obtain mean daily capabilities for given facility level and officer category across all facilities
+summarise_daily_capabilities = pd.DataFrame(columns=survey_daily_capabilities.columns)
+summarise_daily_capabilities["Officer_Category"] = survey_daily_capabilities["Officer_Category"]
+
+for level in ["0", "1a", "1b", "2"]:
+    dc_at_level = daily_capabilities[daily_capabilities["Facility_Level"]==level]
+    for officer in officers:
+        dc_at_level_officer = dc_at_level[dc_at_level["Officer_Category"]==officer]
+        mean_val = dc_at_level_officer["Av_mins_per_day"].mean()
+        summarise_daily_capabilities.loc[summarise_daily_capabilities["Officer_Category"] == officer, dict[level]] = mean_val
+
+survey_daily_capabilities = survey_daily_capabilities.set_index("Officer_Category")
+summarise_daily_capabilities = summarise_daily_capabilities.set_index("Officer_Category")
+
+# If not data is available, assume scaling factor of 1
+absenteeism_factor = (survey_daily_capabilities/summarise_daily_capabilities).fillna(1.)
+
+# Output absenteeism file
+absenteeism_factor.to_excel("absenteeism_factor.xlsx")
+
+print(absenteeism_factor)
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
@@ -1217,7 +1217,8 @@ def construct_multiindex_if_implied(df):
 
 def mix_scenarios(*dicts) -> Dict:
     """Helper function to combine a Dicts that show which parameters should be over-written.
-     * Warnings are generated if a parameter appears in more than one Dict with a different value;
+     * If a parameter appears in more than one Dict, the value in the last-added dict is taken, and a UserWarning
+      is raised;
      * Items under the same top-level key (i.e., for the Module) are merged rather than being over-written."""
 
     d = defaultdict(lambda: defaultdict(dict))
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
@@ -541,6 +541,19 @@ class HealthSystem(Module):
                         " the priority, and on which categories of individuals classify for fast-tracking "
                         " for specific treatments"),
 
+        'const_HR_scaling_table': Parameter(
+            Types.DICT, "Factors by which capabilities of medical officer categories at different levels will be"
+                              "scaled at the start of the simulation to simulate a number of effects (e.g. absenteeism,"
+                              "boosting of specific medical cadres, etc). This is the imported from an"
+                              "Excel workbook: keys are the worksheet names and values are the worksheets in "
+                              "the format of pd.DataFrames."),
+
+        'const_HR_scaling_mode': Parameter(
+            Types.STRING, "Mode of HR scaling considered at the start of the simulation. Options are default"
+                          " (capabilities are scaled by a constaint factor of 1), data (factors informed by survey data),"
+                          "and custom (user can freely set these factors as parameters in the analysis).",
+        ),
+
         'tclose_overwrite': Parameter(
             Types.INT, "Decide whether to overwrite tclose variables assigned by disease modules"),
 
@@ -794,8 +807,22 @@ def read_parameters(self, data_folder):
                                                          'ResourceFile_PriorityRanking_ALLPOLICIES.xlsx',
                                                          sheet_name=None)
 
+        self.parameters['const_HR_scaling_table']: Dict = pd.read_excel(
+            path_to_resourcefiles_for_healthsystem /
+            "human_resources" /
+            "const_HR_scaling" /
+            "ResourceFile_const_HR_scaling.xlsx",
+            sheet_name=None  # all sheets read in
+        )
+
+
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""
+
+        # Ensure the mode of HR scaling to be considered in included in the tables loaded
+        assert self.parameters['const_HR_scaling_mode'] in self.parameters['const_HR_scaling_table'], \
+            f"Value of `const_HR_scaling_mode` not recognised: {self.parameters['const_HR_scaling_mode']}"
+
         # Create dedicated RNGs for separate functions done by the HealthSystem module
         self.rng_for_hsi_queue = np.random.RandomState(self.rng.randint(2 ** 31 - 1))
         self.rng_for_dx = np.random.RandomState(self.rng.randint(2 ** 31 - 1))
@@ -834,6 +861,7 @@ def pre_initialise_population(self):
         # Set up framework for considering a priority policy
         self.setup_priority_policy()
 
+
     def initialise_population(self, population):
         self.bed_days.initialise_population(population.props)
 
@@ -932,6 +960,8 @@ def setup_priority_policy(self):
     def process_human_resources_files(self, use_funded_or_actual_staffing: str):
         """Create the data-structures needed from the information read into the parameters."""
 
+
+
         # * Define Facility Levels
         self._facility_levels = set(self.parameters['Master_Facilities_List']['Facility_Level']) - {'5'}
         assert self._facility_levels == {'0', '1a', '1b', '2', '3', '4'}  # todo soft code this?
@@ -1031,6 +1061,28 @@ def process_human_resources_files(self, use_funded_or_actual_staffing: str):
         # never available.)
         self._officers_with_availability = set(self._daily_capabilities.index[self._daily_capabilities > 0])
 
+    def adjust_for_const_HR_scaling(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Adjust the Daily_Capabilities pd.DataFrame to account for assumptions about scaling of HR resources"""
+
+        # Get the set of scaling_factors that are specified by the 'const_HR_scaling_mode' assumption
+        const_HR_scaling_factor = self.parameters['const_HR_scaling_table'][self.parameters['const_HR_scaling_mode']]
+        const_HR_scaling_factor = const_HR_scaling_factor.set_index('Officer_Category')
+
+        level_conversion = {"1a": "L1a_Av_Mins_Per_Day", "1b": "L1b_Av_Mins_Per_Day",
+                            "2": "L2_Av_Mins_Per_Day", "0": "L0_Av_Mins_Per_Day", "3": "L3_Av_Mins_Per_Day",
+                            "4": "L4_Av_Mins_Per_Day", "5": "L5_Av_Mins_Per_Day"}
+
+        scaler = df[['Officer_Category', 'Facility_Level']].apply(
+            lambda row: const_HR_scaling_factor.loc[row['Officer_Category'], level_conversion[row['Facility_Level']]],
+            axis=1
+        )
+
+        # Apply scaling to 'Total_Mins_Per_Day'
+        df['Total_Mins_Per_Day'] *= scaler
+
+        return df
+
+
     def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Series:
         """
         This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to include
@@ -1044,7 +1096,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se
 
         # Get the capabilities data imported (according to the specified underlying assumptions).
         capabilities = pool_capabilities_at_levels_1b_and_2(
-            self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}'])
+            self.adjust_for_const_HR_scaling(
+                self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}']
+            )
+        )
         capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'})  # neaten
 
         # Create dataframe containing background information about facility and officer types
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
@@ -388,6 +388,37 @@ def test_mix_scenarios():
     assert 1 == len(record)
     assert record.list[0].message.args[0] == 'Parameter is being updated more than once: module=Mod1, parameter=param_b'
 
+    # Test the behaviour of the `mix_scenarios` taking the value in the right-most dict.
+    assert mix_scenarios(
+        {'Mod1': {
+            'param_a': 'value_in_dict1',
+            'param_b': 'value_in_dict1',
+            'param_c': 'value_in_dict1',
+        }},
+        {'Mod1': {
+            'param_a': 'value_in_dict2',
+            'param_b': 'value_in_dict2',
+            'param_c': 'value_in_dict2',
+        }},
+        {'Mod1': {
+            'param_a': 'value_in_dict3',
+            'param_b': 'value_in_dict_right_most',
+            'param_c': 'value_in_dict3',
+        }},
+        {'Mod1': {
+            'param_a': 'value_in_dict_right_most',
+            'param_c': 'value_in_dict4',
+        }},
+        {"Mod1": {
+            "param_c": "value_in_dict_right_most",
+        }},
+    ) == {
+        'Mod1': {'param_a': 'value_in_dict_right_most',
+                 'param_b': 'value_in_dict_right_most',
+                 'param_c': 'value_in_dict_right_most',
+                 }
+    }
+
 
 def test_scenario_switcher(seed):
     """Check the `ScenarioSwitcher` module can update parameter values in a manner similar to them being changed
diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
@@ -2240,3 +2240,40 @@ def get_hsi_log(service_availability, randomise_hsi_queue) -> pd.DataFrame:
 
         # Check that HSI event logs are identical
         pd.testing.assert_frame_equal(run_with_asterisk, run_with_list)
+
+
+def test_const_HR_scaling_assumption(seed, tmpdir):
+    """Check that we can use the parameter `const_HR_scaling_mode` to manipulate the minutes of time available for healthcare
+    workers."""
+
+    def get_capabilities_today(const_HR_scaling_mode: str) -> pd.Series:
+        sim = Simulation(start_date=start_date, seed=seed)
+        sim.register(
+            demography.Demography(resourcefilepath=resourcefilepath),
+            healthsystem.HealthSystem(resourcefilepath=resourcefilepath)
+        )
+        sim.modules['HealthSystem'].parameters['const_HR_scaling_mode'] = const_HR_scaling_mode
+        sim.make_initial_population(n=100)
+        sim.simulate(end_date=start_date + pd.DateOffset(days=0))
+
+        return sim.modules['HealthSystem'].capabilities_today
+
+    caps = {
+        _const_HR_scaling_mode: get_capabilities_today(_const_HR_scaling_mode)
+        for _const_HR_scaling_mode in ('default', 'data', 'custom')
+    }
+
+    # Check that the custom assumption (multiplying all capabilities by 0.5) gives expected result
+    assert np.allclose(
+        caps['custom'].values,
+        caps['default'].values * 0.5
+    )
+
+    # Check that the "data" assumptions leads to changes in the capabilities (of any direction)
+    assert not np.allclose(
+        caps['data'].values,
+        caps['default'].values
+    )
+
+
+

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:92d2a71c58a8232d9c1b50da58c63db18f1e1cf47d8a02adb7c0467afd40fb7a`
	`3`	`+size 8903`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:e9415f5249a5c4ddd2b5ccc3fbf1a64b33132b4b9e379ad079a9539cb109b24e`
	`3`	`+size 8515`