codes_to_items_list: script generalised + PEP 8

EvaJanouskova · EvaJanouskova · commit a30017aaf2db · 2023-11-15T19:01:04.000Z
diff --git a/src/tlo/analysis/codes_to_items_list.py b/src/tlo/analysis/codes_to_items_list.py
@@ -18,39 +18,44 @@
 ------
 """
 
-import pandas as pd
 from pathlib import Path
 
-
-# ## CHANGE THIS IF YOU WANT TO USE DIFFERENT FILE AS INPUT
-csv_file_to_update_name = 'ResourceFile_Equipment_withoutEquipmentCodes'
+import pandas as pd
 
 # Get the path of the current script file
 script_path = Path(__file__)
 print(script_path)
 
-# Specify the file path to RF csv file
+# #############################
+# ## CHANGE THIS FOR YOUR FILE
+# Specify name of the csv file
+csv_file_to_update_name = 'ResourceFile_Equipment_withoutEquipmentCodes'
+# Specify the file path to csv file
 file_path = script_path.parent.parent.parent.parent / 'resources/healthsystem/infrastructure_and_equipment'
+# Specify the names of columns containing the item names and item codes
+item_col_name = 'Equip_Item'
+code_col_name = 'Equip_Code'
+# #############################
 
 # Load the CSV RF into a DataFrame
 df = pd.read_csv(Path(file_path) / str(csv_file_to_update_name + '.csv'))
 
 # Find unique values in Equipment that have no code and are not None or empty
 unique_values =\
-    df.loc[df['Equip_Code'].isna() & df['Equip_Item'].notna() & (df['Equip_Item'] != ''), 'Equip_Item'].unique()
+    df.loc[df[code_col_name].isna() & df[item_col_name].notna() & (df[item_col_name] != ''), item_col_name].unique()
 
 # Create a mapping of unique values to codes
 value_to_code = {}
 # Initialize the starting code value
-if not df['Equip_Code'].isna().all():
-    next_code = int(df['Equip_Code'].max()) + 1
+if not df[code_col_name].isna().all():
+    next_code = int(df[code_col_name].max()) + 1
 else:
     next_code = 0
 
 # Iterate through unique values
 for value in unique_values:
     # Check if there is at least one existing code for this value
-    matching_rows = df.loc[df['Equip_Item'] == value, 'Equip_Code'].dropna()
+    matching_rows = df.loc[df[item_col_name] == value, code_col_name].dropna()
     if not matching_rows.empty:
         # Use the existing code for this value
         existing_code = int(matching_rows.iloc[0])
@@ -60,11 +65,11 @@
         existing_code = next_code
         next_code += 1
     value_to_code[value] = existing_code
-    # Update the 'Equip_Code' column for matching rows
-    df.loc[df['Equip_Item'] == value, 'Equip_Code'] = existing_code
+    # Update the code_col_name column for matching rows
+    df.loc[df[item_col_name] == value, code_col_name] = existing_code
 
-# Convert 'Equip_Code' column to integers
-df['Equip_Code'] = df['Equip_Code'].astype('Int64')  # Convert to nullable integer type
+# Convert code_col_name column to integers
+df[code_col_name] = df[code_col_name].astype('Int64')  # Convert to nullable integer type
 
 # Save CSV with equipment codes
 df.to_csv(Path(file_path) / str(csv_file_to_update_name + '_new.csv'), index=False)