UCL · tbhallett · Feb 6, 2024 · Dec 18, 2023 · Dec 18, 2023 · Dec 19, 2023
diff --git a/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv b/resources/healthsystem/ResourceFile_HealthSystem_parameters.csv
diff --git a/resources/healthsystem/absenteeism/HHFA_amended_ResourceFile_patient_facing_time.xlsx b/resources/healthsystem/absenteeism/HHFA_amended_ResourceFile_patient_facing_time.xlsx
diff --git a/resources/healthsystem/absenteeism/ResourceFile_Absenteeism.xlsx b/resources/healthsystem/absenteeism/ResourceFile_Absenteeism.xlsx
diff --git a/...hsystem/human_resources/formatting_absenteeism_factors_from_health_facility_assessment.py b/...hsystem/human_resources/formatting_absenteeism_factors_from_health_facility_assessment.py
@@ -0,0 +1,48 @@
+import pandas as pd
+
+dict = { "1a" : "L1a_Av_Mins_Per_Day", "1b":"L1b_Av_Mins_Per_Day", "2":"L2_Av_Mins_Per_Day", "0":"L0_Av_Mins_Per_Day", "3": "L3_Av_Mins_Per_Day", "4": "L4_Av_Mins_Per_Day", "5": "L5_Av_Mins_Per_Day"}
+
+# Specify the file paths
+file_path1 = "resources/healthsystem/human_resources/actual/ResourceFile_Daily_Capabilities.csv"
+file_path2 = "resources/healthsystem/human_resources/definitions/ResourceFile_Officer_Types_Table.csv"
+file_path3 = "resources/healthsystem/absenteeism/HHFA_amended_ResourceFile_patient_facing_time.xlsx"
+
+# Load Excel files into DataFrames
+daily_capabilities = pd.read_csv(file_path1)
+officer_types = pd.read_csv(file_path2)
+survey_daily_capabilities = pd.read_excel(file_path3, sheet_name="Scenario 2")
+
+# Clean survey_daily_capabilities by replacing officer codes with category, and calculating mean within category
+merged_df = pd.merge(survey_daily_capabilities, officer_types, on="Officer_Type_Code", how="left")
+survey_daily_capabilities["Officer_Category"] = merged_df["Officer_Category"]
+del survey_daily_capabilities["Officer_Type_Code"]
+del survey_daily_capabilities["Total_Av_Working_Days"]
+survey_daily_capabilities = survey_daily_capabilities.groupby("Officer_Category").mean().reset_index()
+
+# Obtain average mins per day
+daily_capabilities["Av_mins_per_day"] = (daily_capabilities["Total_Mins_Per_Day"]/daily_capabilities["Staff_Count"]).fillna(0)
+
+# Obtain officers types
+officers = daily_capabilities["Officer_Category"].drop_duplicates()
+
+# Obtain mean daily capabilities for given facility level and officer category across all facilities
+summarise_daily_capabilities = pd.DataFrame(columns=survey_daily_capabilities.columns)
+summarise_daily_capabilities["Officer_Category"] = survey_daily_capabilities["Officer_Category"]
+
+for level in ["0", "1a", "1b", "2"]:
+    dc_at_level = daily_capabilities[daily_capabilities["Facility_Level"]==level]
+    for officer in officers:
+        dc_at_level_officer = dc_at_level[dc_at_level["Officer_Category"]==officer]
+        mean_val = dc_at_level_officer["Av_mins_per_day"].mean()
+        summarise_daily_capabilities.loc[summarise_daily_capabilities["Officer_Category"] == officer, dict[level]] = mean_val
+
+survey_daily_capabilities = survey_daily_capabilities.set_index("Officer_Category")
+summarise_daily_capabilities = summarise_daily_capabilities.set_index("Officer_Category")
+
+# If not data is available, assume scaling factor of 1
+absenteeism_factor = (survey_daily_capabilities/summarise_daily_capabilities).fillna(1.)
+
+# Output absenteeism file
+absenteeism_factor.to_excel("absenteeism_factor.xlsx")
+
+print(absenteeism_factor)
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
@@ -540,6 +540,15 @@ class HealthSystem(Module):
                         " the queueing system under different policies, where the lower the number the higher"
                         " the priority, and on which categories of individuals classify for fast-tracking "
                         " for specific treatments"),
+
+        'absenteeism_table': Parameter(
+            Types.DICT, "Factors by which capabilities of medical officer categories at different levels will be"
+                              "reduced to account for issues of absenteeism in the workforce."),
+
+        'absenteeism_mode': Parameter(
+            Types.STRING, "Mode of absenteeism considered. Options are default (capabilities are scaled by a "
+                          "constaint factor of 1), data (factors informed by survey data), and custom (user"
+                          "can freely set these factors as parameters in the analysis)."),
 
         'tclose_overwrite': Parameter(
             Types.INT, "Decide whether to overwrite tclose variables assigned by disease modules"),
@@ -793,6 +802,11 @@ def read_parameters(self, data_folder):
         self.parameters['priority_rank'] = pd.read_excel(path_to_resourcefiles_for_healthsystem / 'priority_policies' /
                                                          'ResourceFile_PriorityRanking_ALLPOLICIES.xlsx',
                                                          sheet_name=None)
+
+        self.parameters['absenteeism_table'] = pd.read_excel(path_to_resourcefiles_for_healthsystem / 'absenteeism' /
+                                                         'ResourceFile_Absenteeism.xlsx',
+                                                         sheet_name=None)
+
 
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""
@@ -833,6 +847,12 @@ def pre_initialise_population(self):
 
         # Set up framework for considering a priority policy
         self.setup_priority_policy()
+
+        # Ensure the mode of absenteeism to be considered in included in the tables loaded
+        assert self.parameters['absenteeism_mode'] in self.parameters['absenteeism_table']
+
+        # Scale
+
 
     def initialise_population(self, population):
         self.bed_days.initialise_population(population.props)
@@ -932,6 +952,8 @@ def setup_priority_policy(self):
     def process_human_resources_files(self, use_funded_or_actual_staffing: str):
         """Create the data-structures needed from the information read into the parameters."""
 
+
+
         # * Define Facility Levels
         self._facility_levels = set(self.parameters['Master_Facilities_List']['Facility_Level']) - {'5'}
         assert self._facility_levels == {'0', '1a', '1b', '2', '3', '4'}  # todo soft code this?
@@ -1041,6 +1063,17 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se
 
         (This is so that its easier to track where demands are being placed where there is no capacity)
         """
+
+        # Rescale assumed Daily Capabilities to account for absenteeism
+        absenteeism_factor = self.parameters['absenteeism_table'][self.parameters['absenteeism_mode']]
+        absenteeism_factor = absenteeism_factor.set_index('Officer_Category')
+
+        level_conversion = { "1a" : "L1a_Av_Mins_Per_Day", "1b":"L1b_Av_Mins_Per_Day",
+                             "2":"L2_Av_Mins_Per_Day", "0":"L0_Av_Mins_Per_Day", "3": "L3_Av_Mins_Per_Day",
+                             "4": "L4_Av_Mins_Per_Day", "5": "L5_Av_Mins_Per_Day"}
+
+        self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}']['Total_Mins_Per_Day'] *= self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}'].apply(lambda row: absenteeism_factor.loc[row['Officer_Category'],
+                                                                                   level_conversion[row['Facility_Level']]], axis=1)
 
         # Get the capabilities data imported (according to the specified underlying assumptions).
         capabilities = pool_capabilities_at_levels_1b_and_2(