From 94e17a3d0908ae96585520d2e62d01ea93fae932 Mon Sep 17 00:00:00 2001
From: Patrick Lenz <patrick.lenz01@icloud.com>
Date: Mon, 8 Jan 2024 14:20:01 +0100
Subject: [PATCH 1/6] fine_res 0 bug

---
 .../memilio/epidata/getNPIData.py             | 297 +++++++++---------
 1 file changed, 152 insertions(+), 145 deletions(-)

diff --git a/pycode/memilio-epidata/memilio/epidata/getNPIData.py b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
index 91e2676eea..a88ec4e394 100644
--- a/pycode/memilio-epidata/memilio/epidata/getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
@@ -137,6 +137,7 @@ def read_files(directory, fine_resolution):
     @return Data frames df_npis_old (Decreed, encoded NPIs for all German
         counties) and df_npis_desc (Description of NPIs)
     """
+    run_check = True
     if fine_resolution > 0:
         try:
             try:
@@ -199,6 +200,12 @@ def read_files(directory, fine_resolution):
                 df_npis_old['NPI_code'] = df_npis_old['NPI_code'].str.replace(
                     'code_m', 'M')
             except FileNotFoundError:
+                # TODO: sanity check fails with this file due to different shapes of the dataframe
+                # analysis runs without problems, check if results are the same and either change
+                # sanity check or the way the data from this file is handled.
+                # For now, the sanity check is deactivated
+                run_check = False
+                print('WARNING: sanity check is deactivated. Results may be not as expected.')
                 df_npis_old = pd.read_csv(
                     os.path.join(
                         directory, 'kr_massnahmen_unterkategorien.csv'),
@@ -256,14 +263,15 @@ def read_files(directory, fine_resolution):
             df_npis_combinations_pre = pd.read_excel(
                 os.path.join(
                     directory, 'combination_npis_incl_ranking.xlsx'), engine='openpyxl')
+            if run_check == True:
+                npi_sanity_check(df_npis_old, df_npis_desc, df_npis_combinations_pre)
+            return df_npis_old, df_npis_desc, df_npis_combinations_pre
+        else:
+            return df_npis_old, df_npis_desc, None
     except FileNotFoundError:
         print('File not found.')
         raise FileNotFoundError
 
-    npi_sanity_check(df_npis_old, df_npis_desc, df_npis_combinations_pre)
-
-    return df_npis_old, df_npis_desc, df_npis_combinations_pre
-
 
 def activate_npis_based_on_incidence(
         local_incid, npi_lifting_days_threshold, npi_activation_days_threshold,
@@ -379,7 +387,7 @@ def drop_codes_and_categories(
     @return Returns dropped codes, prior codes and reduced original data frame.
     """
     if fine_resolution > 0:
-
+        # subcategories can only be removed for fine_resolution=1,2 as they dont exist for fine_resolution=0
         for i in range(1, 6):
             # correct M04_N codes to M_04_M_N, N in {1,...,5}, M in {120,110,100,130,140}
             # (M04_1, i.e. i=1, has been corrected in original file but not for i>1)
@@ -393,49 +401,49 @@ def drop_codes_and_categories(
         # correct 'M16_200_2' to missing 'M16_100_2'
         npi_codes_prior[npi_codes_prior == 'M16_200_2'] = 'M16_100_2'
 
-        # check for missing codes
-        npi_codes_prior_data = df_npis_old[dd.EngEng['npiCode']].unique()
+    # check for missing codes
+    npi_codes_prior_data = df_npis_old[dd.EngEng['npiCode']].unique()
 
-        missing_codes = list(set(npi_codes_prior).difference(
-            npi_codes_prior_data))
-        if len(missing_codes) > 0:
-            # if incidence is grouped, only search for grouping codes without
-            # having a detailed "_DETAIL" naming as of MCODE_NUMBER_DETAIL
-            if fine_resolution == 1:
-                missing_grouped_codes = []
-                for mcode in missing_codes:
-                    # only consider incidence independent npis
-                    # only exit if one of these (i.e., MCODE_NUMBER) is missing
-                    if len(mcode.split('_')) != 3:
-                        missing_grouped_codes.append(mcode)
-                if len(missing_grouped_codes) > 0:  # only MCODE_NUMBER codes
-                    raise gd.DataError('Missing NPI codes: ' +
-                                       str(missing_grouped_codes))
-            else:
-                raise gd.DataError('Missing NPI codes: ' + str(missing_codes))
-
-        # we dont have any explanations from "datensatzbeschreibung_massnahmen"
-        # on these codes, so drop the rows.
-        codes_dropped = list(set(npi_codes_prior_data).difference(
-            npi_codes_prior))
-        # also remove dummy 'Platzhalter' categories
-        dummy_categories = []
-        for i in range(len(npi_codes_prior)):
-            if 'Platzhalter' in npi_codes_prior_desc[i]:
-                dummy_categories.append(npi_codes_prior[i])
-        # codes without explanation and dummy categories
-        # sorting done for consistenty, maybe not necessary
-        codes_dropped = list(np.sort(codes_dropped + dummy_categories))
-        if len(codes_dropped) > 0:
-            df_npis_old = df_npis_old[~df_npis_old[dd.EngEng['npiCode']].isin(
-                codes_dropped)].reset_index(drop=True)
+    missing_codes = list(set(npi_codes_prior).difference(
+        npi_codes_prior_data))
+    if len(missing_codes) > 0:
+        # if incidence is grouped, only search for grouping codes without
+        # having a detailed "_DETAIL" naming as of MCODE_NUMBER_DETAIL
+        if fine_resolution == 1:
+            missing_grouped_codes = []
+            for mcode in missing_codes:
+                # only consider incidence independent npis
+                # only exit if one of these (i.e., MCODE_NUMBER) is missing
+                if len(mcode.split('_')) != 3:
+                    missing_grouped_codes.append(mcode)
+            if len(missing_grouped_codes) > 0:  # only MCODE_NUMBER codes
+                raise gd.DataError('Missing NPI codes: ' +
+                                    str(missing_grouped_codes))
+        else:
+            raise gd.DataError('Missing NPI codes: ' + str(missing_codes))
+
+    # we dont have any explanations from "datensatzbeschreibung_massnahmen"
+    # on these codes, so drop the rows.
+    codes_dropped = list(set(npi_codes_prior_data).difference(
+        npi_codes_prior))
+    
+    # also remove dummy 'Platzhalter' categories
+    dummy_categories = []
+    for i in range(len(npi_codes_prior)):
+        if 'Platzhalter' in npi_codes_prior_desc[i]:
+            dummy_categories.append(npi_codes_prior[i])
+    # codes without explanation and dummy categories
+    # sorting done for consistenty, maybe not necessary
+    codes_dropped = list(np.sort(codes_dropped + dummy_categories + missing_codes))
+    if len(codes_dropped) > 0:
+        df_npis_old = df_npis_old[~df_npis_old[dd.EngEng['npiCode']].isin(
+            codes_dropped)].reset_index(drop=True)
+        if fine_resolution > 0:
+            # no subcodes for fine_resolution = 0
             # for every main code removed, all 5 subcodes have to be removed;
             # if this is not the case, the naming of them is wrong/not consistent
             if (len(codes_dropped) % 6) != 0:
                 raise gd.DataError('Error in NPI names, please check.')
-    else:
-        # no dropping for fine_resolution == 0
-        codes_dropped = []
 
     return codes_dropped, npi_codes_prior, df_npis_old
 
@@ -553,22 +561,18 @@ def get_npi_data(fine_resolution=2,
     npi_start_col = np.where(
         df_npis_old.columns.str.contains('d2') == True)[0][0]
 
-    # get existing codes that are used; for fine resolution we don't
-    # have codes M22 - M24 but these are still listed in description
-    if fine_resolution > 0:
-        # count how many codes contain M22, M23 or M24
-        num_nonexistent_codes = df_npis_desc['Variablenname'].str.count(
-            "M22|M23|M24").sum()
-        # do not include these nonexistent codes
-        if num_nonexistent_codes != 0:
-            npi_codes_prior = df_npis_desc['Variablenname'].iloc[: -
-                                                                 num_nonexistent_codes]
-            npi_codes_prior_desc = df_npis_desc['Variable'].iloc[: -
-                                                                 num_nonexistent_codes]
-        else:
-            npi_codes_prior = df_npis_desc['Variablenname']
-            npi_codes_prior_desc = df_npis_desc['Variable']
-    # for fine_resolution = 0 df_npis_old M22-M24 are empty)
+    # get existing codes that are used; 
+    # we don't have codes M22 - M24 but these are still listed in description
+    
+    # count how many codes contain M22, M23 or M24
+    num_nonexistent_codes = df_npis_desc['Variablenname'].str.count(
+        "M22|M23|M24").sum()
+    # do not include these nonexistent codes
+    if num_nonexistent_codes != 0:
+        npi_codes_prior = df_npis_desc['Variablenname'].iloc[: -
+                                                                num_nonexistent_codes]
+        npi_codes_prior_desc = df_npis_desc['Variable'].iloc[: -
+                                                                num_nonexistent_codes]
     else:
         npi_codes_prior = df_npis_desc['Variablenname']
         npi_codes_prior_desc = df_npis_desc['Variable']
@@ -919,38 +923,39 @@ def get_npi_data(fine_resolution=2,
 
     # setup dataframe for each maingroup, same format as df_npi_combinations
     # used to count codes that occur simultaneously now (before any (de-)activation)
-    df_count_joint_codes = copy.deepcopy(df_npis_combinations)
-    for maincode in df_count_joint_codes.keys():
-        df_count_joint_codes[maincode][1] *= 0
-    df_counted_joint_codes = count_code_multiplicities(df_npis_old, df_count_joint_codes,
-                                                       counties_considered=counties_considered)
-    save_interaction_matrix(df_counted_joint_codes, 'joint_codes', directory)
-    plot_interaction_matrix('joint_codes', directory)
-
-    # create dataframe to count multiple codes after incidence dependent (de-)activation
-    df_incid_depend = pd.DataFrame()
-    df_count_incid_depend = copy.deepcopy(df_npis_combinations)
-    for maincode in df_count_incid_depend.keys():
-        df_count_incid_depend[maincode][1] *= 0
-
-    # create dataframe to count multiple codes after strictness deactivation
-    df_count_active = copy.deepcopy(df_npis_combinations)
-    for maincode in df_count_active.keys():
-        df_count_active[maincode][1] *= 0
-
-    # setup dataframe for each maingroup, same format as df_npi_combinations
-    # used to count number of codes that are deactivated
-    df_count_deactivation = copy.deepcopy(df_npis_combinations)
-    for maincode in df_count_deactivation.keys():
-        df_count_deactivation[maincode][1] *= 0
-
-    all_subcodes = []
-    for maincode in df_npis_combinations.keys():
-        all_subcodes += df_npis_combinations[maincode][1].columns.to_list()
-        # check (and validate) that element 0 and 1 in df_npis_combination match.
-        if df_npis_combinations[maincode][1].columns.to_list() != list(
-                df_npis_combinations[maincode][0].keys()):
-            raise gd.DataError('Error. Description and table do not match.')
+    if fine_resolution > 0:
+        df_count_joint_codes = df_npis_combinations[:]
+        for maincode in df_count_joint_codes.keys():
+            df_count_joint_codes[maincode][1] *= 0
+        df_counted_joint_codes = count_code_multiplicities(df_npis_old, df_count_joint_codes,
+                                                        counties_considered=counties_considered)
+        save_interaction_matrix(df_counted_joint_codes, 'joint_codes', directory)
+        plot_interaction_matrix('joint_codes', directory)
+
+        # create dataframe to count multiple codes after incidence dependent (de-)activation
+        df_incid_depend = pd.DataFrame()
+        df_count_incid_depend = copy.deepcopy(df_npis_combinations)
+        for maincode in df_count_incid_depend.keys():
+            df_count_incid_depend[maincode][1] *= 0
+
+        # create dataframe to count multiple codes after strictness deactivation
+        df_count_active = copy.deepcopy(df_npis_combinations)
+        for maincode in df_count_active.keys():
+            df_count_active[maincode][1] *= 0
+
+        # setup dataframe for each maingroup, same format as df_npi_combinations
+        # used to count number of codes that are deactivated
+        df_count_deactivation = copy.deepcopy(df_npis_combinations)
+        for maincode in df_count_deactivation.keys():
+            df_count_deactivation[maincode][1] *= 0
+
+        all_subcodes = []
+        for maincode in df_npis_combinations.keys():
+            all_subcodes += df_npis_combinations[maincode][1].columns.to_list()
+            # check (and validate) that element 0 and 1 in df_npis_combination match.
+            if df_npis_combinations[maincode][1].columns.to_list() != list(
+                    df_npis_combinations[maincode][0].keys()):
+                raise gd.DataError('Error. Description and table do not match.')
 
     for countyID in counties_considered:
         cid = 0
@@ -993,51 +998,53 @@ def get_npi_data(fine_resolution=2,
         df_local_old = copy.deepcopy(df_npis_old[df_npis_old[dd.EngEng['idCounty']]
                                                  == countyID])
 
-        # get number of codes of one NPI (incidence indep. + dep.)
-        # for fine_resolution=1, inc_codes=1, for fine_res=2, inc_codes=6
-        inc_codes = len(np.where(df_npis.columns.str.contains(
-            npis[dd.EngEng['npiCode']][0]))[0])
-
-        # Consistency of incidence independent and dependent NPIs:
-        # The same NPI should not be prescribed multiple times at the same day
-        # for different incidence-dependent thresholds or incidence-independently.
-        # In order to avoid contradictions, only retain the strictest mentioned
-        # implementation. Incidence-independent is always stricter than any
-        # incidence-dependent implementation.
-        # define if details are printed (probably to be deactivated)
-        print_details = True
-        for i in range(int(len(df_local_old)/inc_codes)):
-
-            # check if access is correct
-            if not all(
-                [npis[dd.EngEng['npiCode']][i * inc_codes] in npi_code_test
-                 for npi_code_test in df_local_old.iloc
-                 [inc_codes * i: inc_codes * (i + 1),
-                  npi_start_col - 1].to_list()]):
-                raise gd.DataError('Wrong NPI rows aggregated.')
-
-            sum_npi_inc = np.where(
-                df_local_old.iloc[inc_codes*i:inc_codes*(i+1), npi_start_col:].sum() > 1)
-            if (len(sum_npi_inc[0]) > 0) and print_details:
-                print(
-                    'Reduce multiple prescription in county ' + str(countyID) +
-                    ' for NPI ' + str(npis.loc[inc_codes*i, 'Description']))
-                for j in sum_npi_inc[0]:
-                    # get lowest index (i.e., strictest implementation of NPI).
-                    idx_start = np.where(
-                        df_local_old.iloc[inc_codes*i:inc_codes*(i+1), npi_start_col+j])[0].min()
-                    # Remove less strict and thus contradictory
-                    # implementations of the same NPI the same day.
-                    df_local_old.iloc[inc_codes*i+idx_start +
-                                      1:inc_codes*(i+1), npi_start_col+j] = 0
-
+        if fine_resolution == 2:
+            # get number of codes of one NPI (incidence indep. + dep.)
+            # for fine_resolution=2, inc_codes=6
+            inc_codes = len(np.where(df_npis.columns.str.contains(
+                npis[dd.EngEng['npiCode']][0]))[0])
+
+            # Consistency of incidence independent and dependent NPIs:
+            # The same NPI should not be prescribed multiple times at the same day
+            # for different incidence-dependent thresholds or incidence-independently.
+            # In order to avoid contradictions, only retain the strictest mentioned
+            # implementation. Incidence-independent is always stricter than any
+            # incidence-dependent implementation.
+            # define if details are printed (probably to be deactivated)
+            print_details = True
+            for i in range(int(len(df_local_old)/inc_codes)):
+
+                # check if access is correct
                 if not all(
-                    df_local_old.iloc
+                    [npis[dd.EngEng['npiCode']][i * inc_codes] in npi_code_test
+                    for npi_code_test in df_local_old.iloc
                     [inc_codes * i: inc_codes * (i + 1),
-                     npi_start_col + sum_npi_inc[0]].sum() == 1):
-                    raise gd.DataError('Consistency correction failed.')
-
-        ## end of consistency correction ##
+                    npi_start_col - 1].to_list()]):
+                    raise gd.DataError('Wrong NPI rows aggregated.')
+
+                sum_npi_inc = np.where(
+                    df_local_old.iloc[inc_codes*i:inc_codes*(i+1), npi_start_col:].sum() > 1)
+                if (len(sum_npi_inc[0]) > 0):
+                    if print_details:
+                        print(
+                            'Reduce multiple prescription in county ' + str(countyID) +
+                            ' for NPI ' + str(npis.loc[inc_codes*i, 'Description']))
+                    for j in sum_npi_inc[0]:
+                        # get lowest index (i.e., strictest implementation of NPI).
+                        idx_start = np.where(
+                            df_local_old.iloc[inc_codes*i:inc_codes*(i+1), npi_start_col+j])[0].min()
+                        # Remove less strict and thus contradictory
+                        # implementations of the same NPI the same day.
+                        df_local_old.iloc[inc_codes*i+idx_start +
+                                        1:inc_codes*(i+1), npi_start_col+j] = 0
+
+                    if not all(
+                        df_local_old.iloc
+                        [inc_codes * i: inc_codes * (i + 1),
+                        npi_start_col + sum_npi_inc[0]].sum() == 1):
+                        raise gd.DataError('Consistency correction failed.')
+
+            ## end of consistency correction ##
 
         # potentially remove rows if they are not in npis dict
         npi_rows = [i in npis[dd.EngEng['npiCode']].values
@@ -1230,9 +1237,16 @@ def get_npi_data(fine_resolution=2,
                   '. Estimated time remaining: ' +
                   str(int(time_remain / 60)) + ' min.')
 
-    save_interaction_matrix(df_count_deactivation,
-                            'count_deactivation', directory)
-    plot_interaction_matrix('count_deactivation', directory)
+    if fine_resolution > 0:
+        save_interaction_matrix(df_count_deactivation,
+                                'count_deactivation', directory)
+        plot_interaction_matrix('count_deactivation', directory)
+        save_interaction_matrix(df_count_incid_depend,
+                                'joint_codes_incid_depend', directory)
+        plot_interaction_matrix('joint_codes_incid_depend', directory)
+
+        save_interaction_matrix(df_count_active, 'joint_codes_active', directory)
+        plot_interaction_matrix('joint_codes_active', directory)
 
     if counter_cases_start >= len(counties_considered)*0.05:
         print('WARNING: DataFrame starts with reported cases > 0 '
@@ -1242,13 +1256,6 @@ def get_npi_data(fine_resolution=2,
               'Please consider a start date of some weeks ahead of the '
               'time window to be analyzed for NPI\'s effects.')
 
-    save_interaction_matrix(df_count_incid_depend,
-                            'joint_codes_incid_depend', directory)
-    plot_interaction_matrix('joint_codes_incid_depend', directory)
-
-    save_interaction_matrix(df_count_active, 'joint_codes_active', directory)
-    plot_interaction_matrix('joint_codes_active', directory)
-
     # print sub counters
     print('Sub task counters are: ')
     print(counters)
@@ -1491,7 +1498,7 @@ def main():
 
     # arg_dict = gd.cli("testing")
     df = get_npi_data(start_date=date(2020, 1, 1),
-                      fine_resolution=2, file_format='csv')
+                      fine_resolution=0, file_format='csv')
 
 
 if __name__ == "__main__":

From 248fd4af25df2fe932d1a0a8f1df4252f58e9c23 Mon Sep 17 00:00:00 2001
From: Patrick Lenz <patrick.lenz01@icloud.com>
Date: Mon, 15 Jan 2024 09:47:39 +0100
Subject: [PATCH 2/6] fine_res 1 error

---
 .../memilio-epidata/memilio/epidata/getNPIData.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/pycode/memilio-epidata/memilio/epidata/getNPIData.py b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
index a88ec4e394..2e664ef67f 100644
--- a/pycode/memilio-epidata/memilio/epidata/getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
@@ -262,7 +262,7 @@ def read_files(directory, fine_resolution):
         if fine_resolution > 0:
             df_npis_combinations_pre = pd.read_excel(
                 os.path.join(
-                    directory, 'combination_npis_incl_ranking.xlsx'), engine='openpyxl')
+                    directory, 'combination_npis_incl_ranking_v3.xlsx'), engine='openpyxl')
             if run_check == True:
                 npi_sanity_check(df_npis_old, df_npis_desc, df_npis_combinations_pre)
             return df_npis_old, df_npis_desc, df_npis_combinations_pre
@@ -921,9 +921,12 @@ def get_npi_data(fine_resolution=2,
     df_npis_old.replace([-99, 2, 3, 4, 5], [0, 1, 1, 1, 1], inplace=True)
     counter_cases_start = 0
 
+    # create dataframe to count multiple codes after incidence dependent (de-)activation
+    df_incid_depend = pd.DataFrame()
+
     # setup dataframe for each maingroup, same format as df_npi_combinations
     # used to count codes that occur simultaneously now (before any (de-)activation)
-    if fine_resolution > 0:
+    if fine_resolution == 2:
         df_count_joint_codes = df_npis_combinations[:]
         for maincode in df_count_joint_codes.keys():
             df_count_joint_codes[maincode][1] *= 0
@@ -932,8 +935,6 @@ def get_npi_data(fine_resolution=2,
         save_interaction_matrix(df_counted_joint_codes, 'joint_codes', directory)
         plot_interaction_matrix('joint_codes', directory)
 
-        # create dataframe to count multiple codes after incidence dependent (de-)activation
-        df_incid_depend = pd.DataFrame()
         df_count_incid_depend = copy.deepcopy(df_npis_combinations)
         for maincode in df_count_incid_depend.keys():
             df_count_incid_depend[maincode][1] *= 0
@@ -949,6 +950,8 @@ def get_npi_data(fine_resolution=2,
         for maincode in df_count_deactivation.keys():
             df_count_deactivation[maincode][1] *= 0
 
+    if fine_resolution > 0:
+
         all_subcodes = []
         for maincode in df_npis_combinations.keys():
             all_subcodes += df_npis_combinations[maincode][1].columns.to_list()
@@ -1237,7 +1240,7 @@ def get_npi_data(fine_resolution=2,
                   '. Estimated time remaining: ' +
                   str(int(time_remain / 60)) + ' min.')
 
-    if fine_resolution > 0:
+    if fine_resolution > 2:
         save_interaction_matrix(df_count_deactivation,
                                 'count_deactivation', directory)
         plot_interaction_matrix('count_deactivation', directory)
@@ -1498,7 +1501,7 @@ def main():
 
     # arg_dict = gd.cli("testing")
     df = get_npi_data(start_date=date(2020, 1, 1),
-                      fine_resolution=0, file_format='csv')
+                      fine_resolution=2, file_format='csv')
 
 
 if __name__ == "__main__":

From 15979527184cb1c94b497a136ca6c1751d6c6578 Mon Sep 17 00:00:00 2001
From: Patrick Lenz <patrick.lenz01@icloud.com>
Date: Mon, 15 Jan 2024 09:51:36 +0100
Subject: [PATCH 3/6] pre-commit

---
 .../memilio/epidata/getNPIData.py             | 42 +++++++++++--------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/pycode/memilio-epidata/memilio/epidata/getNPIData.py b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
index 2e664ef67f..cd95291ba2 100644
--- a/pycode/memilio-epidata/memilio/epidata/getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
@@ -205,7 +205,8 @@ def read_files(directory, fine_resolution):
                 # sanity check or the way the data from this file is handled.
                 # For now, the sanity check is deactivated
                 run_check = False
-                print('WARNING: sanity check is deactivated. Results may be not as expected.')
+                print(
+                    'WARNING: sanity check is deactivated. Results may be not as expected.')
                 df_npis_old = pd.read_csv(
                     os.path.join(
                         directory, 'kr_massnahmen_unterkategorien.csv'),
@@ -264,7 +265,8 @@ def read_files(directory, fine_resolution):
                 os.path.join(
                     directory, 'combination_npis_incl_ranking_v3.xlsx'), engine='openpyxl')
             if run_check == True:
-                npi_sanity_check(df_npis_old, df_npis_desc, df_npis_combinations_pre)
+                npi_sanity_check(df_npis_old, df_npis_desc,
+                                 df_npis_combinations_pre)
             return df_npis_old, df_npis_desc, df_npis_combinations_pre
         else:
             return df_npis_old, df_npis_desc, None
@@ -418,7 +420,7 @@ def drop_codes_and_categories(
                     missing_grouped_codes.append(mcode)
             if len(missing_grouped_codes) > 0:  # only MCODE_NUMBER codes
                 raise gd.DataError('Missing NPI codes: ' +
-                                    str(missing_grouped_codes))
+                                   str(missing_grouped_codes))
         else:
             raise gd.DataError('Missing NPI codes: ' + str(missing_codes))
 
@@ -426,7 +428,7 @@ def drop_codes_and_categories(
     # on these codes, so drop the rows.
     codes_dropped = list(set(npi_codes_prior_data).difference(
         npi_codes_prior))
-    
+
     # also remove dummy 'Platzhalter' categories
     dummy_categories = []
     for i in range(len(npi_codes_prior)):
@@ -434,7 +436,8 @@ def drop_codes_and_categories(
             dummy_categories.append(npi_codes_prior[i])
     # codes without explanation and dummy categories
     # sorting done for consistenty, maybe not necessary
-    codes_dropped = list(np.sort(codes_dropped + dummy_categories + missing_codes))
+    codes_dropped = list(
+        np.sort(codes_dropped + dummy_categories + missing_codes))
     if len(codes_dropped) > 0:
         df_npis_old = df_npis_old[~df_npis_old[dd.EngEng['npiCode']].isin(
             codes_dropped)].reset_index(drop=True)
@@ -561,18 +564,18 @@ def get_npi_data(fine_resolution=2,
     npi_start_col = np.where(
         df_npis_old.columns.str.contains('d2') == True)[0][0]
 
-    # get existing codes that are used; 
+    # get existing codes that are used;
     # we don't have codes M22 - M24 but these are still listed in description
-    
+
     # count how many codes contain M22, M23 or M24
     num_nonexistent_codes = df_npis_desc['Variablenname'].str.count(
         "M22|M23|M24").sum()
     # do not include these nonexistent codes
     if num_nonexistent_codes != 0:
         npi_codes_prior = df_npis_desc['Variablenname'].iloc[: -
-                                                                num_nonexistent_codes]
+                                                             num_nonexistent_codes]
         npi_codes_prior_desc = df_npis_desc['Variable'].iloc[: -
-                                                                num_nonexistent_codes]
+                                                             num_nonexistent_codes]
     else:
         npi_codes_prior = df_npis_desc['Variablenname']
         npi_codes_prior_desc = df_npis_desc['Variable']
@@ -931,8 +934,9 @@ def get_npi_data(fine_resolution=2,
         for maincode in df_count_joint_codes.keys():
             df_count_joint_codes[maincode][1] *= 0
         df_counted_joint_codes = count_code_multiplicities(df_npis_old, df_count_joint_codes,
-                                                        counties_considered=counties_considered)
-        save_interaction_matrix(df_counted_joint_codes, 'joint_codes', directory)
+                                                           counties_considered=counties_considered)
+        save_interaction_matrix(df_counted_joint_codes,
+                                'joint_codes', directory)
         plot_interaction_matrix('joint_codes', directory)
 
         df_count_incid_depend = copy.deepcopy(df_npis_combinations)
@@ -958,7 +962,8 @@ def get_npi_data(fine_resolution=2,
             # check (and validate) that element 0 and 1 in df_npis_combination match.
             if df_npis_combinations[maincode][1].columns.to_list() != list(
                     df_npis_combinations[maincode][0].keys()):
-                raise gd.DataError('Error. Description and table do not match.')
+                raise gd.DataError(
+                    'Error. Description and table do not match.')
 
     for countyID in counties_considered:
         cid = 0
@@ -1020,9 +1025,9 @@ def get_npi_data(fine_resolution=2,
                 # check if access is correct
                 if not all(
                     [npis[dd.EngEng['npiCode']][i * inc_codes] in npi_code_test
-                    for npi_code_test in df_local_old.iloc
-                    [inc_codes * i: inc_codes * (i + 1),
-                    npi_start_col - 1].to_list()]):
+                     for npi_code_test in df_local_old.iloc
+                     [inc_codes * i: inc_codes * (i + 1),
+                     npi_start_col - 1].to_list()]):
                     raise gd.DataError('Wrong NPI rows aggregated.')
 
                 sum_npi_inc = np.where(
@@ -1039,12 +1044,12 @@ def get_npi_data(fine_resolution=2,
                         # Remove less strict and thus contradictory
                         # implementations of the same NPI the same day.
                         df_local_old.iloc[inc_codes*i+idx_start +
-                                        1:inc_codes*(i+1), npi_start_col+j] = 0
+                                          1:inc_codes*(i+1), npi_start_col+j] = 0
 
                     if not all(
                         df_local_old.iloc
                         [inc_codes * i: inc_codes * (i + 1),
-                        npi_start_col + sum_npi_inc[0]].sum() == 1):
+                         npi_start_col + sum_npi_inc[0]].sum() == 1):
                         raise gd.DataError('Consistency correction failed.')
 
             ## end of consistency correction ##
@@ -1248,7 +1253,8 @@ def get_npi_data(fine_resolution=2,
                                 'joint_codes_incid_depend', directory)
         plot_interaction_matrix('joint_codes_incid_depend', directory)
 
-        save_interaction_matrix(df_count_active, 'joint_codes_active', directory)
+        save_interaction_matrix(
+            df_count_active, 'joint_codes_active', directory)
         plot_interaction_matrix('joint_codes_active', directory)
 
     if counter_cases_start >= len(counties_considered)*0.05:

From 12c4ebf380bf226d1d03e614a0780adf366588c7 Mon Sep 17 00:00:00 2001
From: Patrick Lenz <patrick.lenz01@icloud.com>
Date: Wed, 17 Jan 2024 12:47:46 +0100
Subject: [PATCH 4/6] fix test codes_dropped

---
 pycode/memilio-epidata/memilio/epidata/getNPIData.py         | 5 +++--
 .../memilio/epidata_test/test_epidata_getNPIData.py          | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pycode/memilio-epidata/memilio/epidata/getNPIData.py b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
index cd95291ba2..8b9ff396e3 100644
--- a/pycode/memilio-epidata/memilio/epidata/getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
@@ -439,10 +439,11 @@ def drop_codes_and_categories(
     codes_dropped = list(
         np.sort(codes_dropped + dummy_categories + missing_codes))
     if len(codes_dropped) > 0:
+        # no subcodes for fine_resolution = 0
         df_npis_old = df_npis_old[~df_npis_old[dd.EngEng['npiCode']].isin(
             codes_dropped)].reset_index(drop=True)
-        if fine_resolution > 0:
-            # no subcodes for fine_resolution = 0
+        if fine_resolution == 2:
+            # incidence subcodes only for fine_resolution = 2
             # for every main code removed, all 5 subcodes have to be removed;
             # if this is not the case, the naming of them is wrong/not consistent
             if (len(codes_dropped) % 6) != 0:
diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getNPIData.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getNPIData.py
index ef6d785d8d..def0a83178 100644
--- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getNPIData.py
@@ -242,8 +242,8 @@ def test_drop_codes_and_categories(self):
         fine_resolution = 1
         codes_dropped, npi_codes_prior, df_npis_old = gnd.drop_codes_and_categories(
             npi_codes_prior_test_mc.copy(), npi_codes_prior_desc_test_mc.copy(), df_npis_old_test.copy(), fine_resolution)
-        # no codes should be dropped
-        self.assertEqual(codes_dropped, [])
+        # only drop missing codes
+        self.assertEqual(codes_dropped, self.missing_codes)
         # codes should now be corrected
         self.assertEqual(
             npi_codes_prior.to_list(),

From 361185e0b099531c7bcdbfed9a0b511db14d7a35 Mon Sep 17 00:00:00 2001
From: patricklnz <patrick.lenz01@icloud.com>
Date: Mon, 22 Apr 2024 14:55:33 +0200
Subject: [PATCH 5/6] performance and typo

---
 .../memilio/epidata/getNPIData.py             | 264 +++++++++---------
 1 file changed, 139 insertions(+), 125 deletions(-)

diff --git a/pycode/memilio-epidata/memilio/epidata/getNPIData.py b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
index 8b9ff396e3..e735cd71a6 100644
--- a/pycode/memilio-epidata/memilio/epidata/getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
@@ -21,8 +21,10 @@
 from memilio.epidata import defaultDict as dd
 from memilio.epidata import geoModificationGermany as geoger
 from memilio.epidata import getDataIntoPandasDataFrame as gd
+from memilio.epidata import progress_indicator
 from datetime import date, datetime, timedelta
 import time
+import json
 import os
 import copy
 import pandas as pd
@@ -138,141 +140,152 @@ def read_files(directory, fine_resolution):
         counties) and df_npis_desc (Description of NPIs)
     """
     run_check = True
-    if fine_resolution > 0:
-        try:
+    with progress_indicator.Spinner():
+        if fine_resolution > 0:
             try:
-                codelist = [
-                    'm01a', 'm01b', 'm02a', 'm02b', 'm03', 'm04', 'm05', 'm06',
-                    'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14',
-                    'm15', 'm16', 'm17', 'm18', 'm19', 'm20', 'm21']
-                counter_codes = 0
-                for code in codelist:
-                    df_npis_per_code = pd.read_csv(
-                        os.path.join(directory,
-                                     f'kr_massn_unterkat_{code}.csv'),
+                try:
+                    filename = "npis_subcategories_raw"
+                    filepath = os.path.join(directory + filename + '.json')
+                    if os.path.exists(filepath):
+                        d=json.load(open(filepath))
+                        df_npis_old=pd.DataFrame(d)
+                    else:
+                        codelist = [
+                            'm01a', 'm01b', 'm02a', 'm02b', 'm03', 'm04', 'm05', 'm06',
+                            'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14',
+                            'm15', 'm16', 'm17', 'm18', 'm19', 'm20', 'm21']
+                        counter_codes = 0
+                        for code in codelist:
+                            df_npis_per_code = pd.read_csv(
+                                os.path.join(directory,
+                                            f'kr_massn_unterkat_{code}.csv'),
+                                sep=',')
+
+                            # set some parameters for dataframe
+                            if counter_codes == 0:
+                                counties = np.sort(df_npis_per_code.ags5.unique())
+                                num_counties = len(df_npis_per_code.ags5.unique())
+
+                                # extract dates from data
+                                dates = df_npis_per_code.iloc[:int(
+                                    df_npis_per_code.shape[0]/num_counties), 5]
+                                # rename dates so that they match dates from other npi dataframe
+                                dates_new = [
+                                    'd' + date.replace('-', '') for date in dates]
+
+                                df_local = [pd.DataFrame()
+                                            for i in range(num_counties)]
+
+                            #  set df for all counties
+                            for i in range(0, num_counties):
+                                if counter_codes == 0:
+                                    df_local[i] = pd.DataFrame(
+                                        columns=list(df_npis_per_code.columns[0: 5]) +
+                                        ['code'] + dates_new)
+
+                                dummy_to_append = pd.DataFrame(
+                                    columns=['code'] + dates_new,
+                                    data=copy.deepcopy(df_npis_per_code
+                                                    [df_npis_per_code.ags5 == counties[i]].
+                                                    iloc[:, 6:].T.reset_index().values))
+
+                                df_local[i] = pd.concat([df_local[i], dummy_to_append])
+
+                                if df_npis_per_code.iloc[i * len(dates): (i + 1) *
+                                                        len(dates),
+                                                        3].nunique() > 1:
+                                    raise gd.DataError(
+                                        'Dates are not sorted as expected.')
+
+                                # Set first five columns so that they match old format of data frame (from kr_massnahmen_unterkategorien.csv)
+                                if counter_codes == len(codelist)-1:
+                                    df_local[i][df_local[i].columns[0:5]
+                                                ] = df_npis_per_code.iloc[i*len(dates), 0:5].values
+
+                            counter_codes += 1
+                        df_npis_old = pd.concat([df_local[i]
+                                                for i in range(num_counties)])
+                        # 'bundesland' maps to stateID for DIVI, so rename it seperately here
+                        df_npis_old.rename({'bundesland':dd.EngEng('state')}, axis=1, inplace=True)
+                        # rename other columns according to default dict
+                        df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
+                        df_npis_old['NPI_code'] = df_npis_old['NPI_code'].str.replace(
+                            'code_m', 'M')
+                        gd.write_dataframe(df_npis_old, directory, filename, 'json')
+                except FileNotFoundError:
+                    # TODO: sanity check fails with this file due to different shapes of the dataframe
+                    # analysis runs without problems, check if results are the same and either change
+                    # sanity check or the way the data from this file is handled.
+                    # For now, the sanity check is deactivated
+                    run_check = False
+                    print(
+                        'WARNING: sanity check is deactivated. Results may be not as expected.')
+                    df_npis_old = pd.read_csv(
+                        os.path.join(
+                            directory, 'kr_massnahmen_unterkategorien.csv'),
                         sep=',')
+                    df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
+            except FileNotFoundError:
+                print_manual_download(
+                    'kr_massnahmen_unterkategorien.csv',
+                    'https://www.corona-datenplattform.de/dataset/massnahmen_unterkategorien_kreise')
+                raise FileNotFoundError
+            # check if rows hospitals and geriatric care are still empty;
+            # these fields have been empty so far and are thus not used
+            test_codes = ['M23_010', 'M23_020', 'M23_030', 'M23_040',
+                        'M23_050', 'M23_060', 'M24_010', 'M24_020',
+                        'M24_030', 'M24_040', 'M24_050', 'M24_060']
+            for tcode in test_codes:
+                for i in [''] + ["_" + str(i) for i in range(1, 6)]:
+                    if (df_npis_old[df_npis_old[dd.EngEng['npiCode']] == tcode+i].iloc[:, 6:].max().max() > 0):
+                        print(tcode+i + " used.")
+            # end check
+
+        else:  # read aggregated NPIs
 
-                    # set some parameters for dataframe
-                    if counter_codes == 0:
-                        counties = np.sort(df_npis_per_code.ags5.unique())
-                        num_counties = len(df_npis_per_code.ags5.unique())
-
-                        # extract dates from data
-                        dates = df_npis_per_code.iloc[:int(
-                            df_npis_per_code.shape[0]/num_counties), 5]
-                        # rename dates so that they match dates from other npi dataframe
-                        dates_new = [
-                            'd' + date.replace('-', '') for date in dates]
-
-                        df_local = [pd.DataFrame()
-                                    for i in range(num_counties)]
-
-                    #  set df for all counties
-                    for i in range(0, num_counties):
-                        if counter_codes == 0:
-                            df_local[i] = pd.DataFrame(
-                                columns=list(df_npis_per_code.columns[0: 5]) +
-                                ['code'] + dates_new)
-
-                        dummy_to_append = pd.DataFrame(
-                            columns=['code'] + dates_new,
-                            data=copy.deepcopy(df_npis_per_code
-                                               [df_npis_per_code.ags5 == counties[i]].
-                                               iloc[:, 6:].T.reset_index().values))
-
-                        df_local[i] = pd.concat([df_local[i], dummy_to_append])
-
-                        if df_npis_per_code.iloc[i * len(dates): (i + 1) *
-                                                 len(dates),
-                                                 3].nunique() > 1:
-                            raise gd.DataError(
-                                'Dates are not sorted as expected.')
-
-                        # Set first five columns so that they match old format of data frame (from kr_massnahmen_unterkategorien.csv)
-                        if counter_codes == len(codelist)-1:
-                            df_local[i][df_local[i].columns[0:5]
-                                        ] = df_npis_per_code.iloc[i*len(dates), 0:5].values
-
-                    counter_codes += 1
-                df_npis_old = pd.concat([df_local[i]
-                                        for i in range(num_counties)])
-                df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
-                df_npis_old['NPI_code'] = df_npis_old['NPI_code'].str.replace(
-                    'code_m', 'M')
+            try:
+                df_npis_old = pd.read_csv(os.path.join(
+                    directory, 'kr_massnahmen_oberkategorien.csv'))
             except FileNotFoundError:
-                # TODO: sanity check fails with this file due to different shapes of the dataframe
-                # analysis runs without problems, check if results are the same and either change
-                # sanity check or the way the data from this file is handled.
-                # For now, the sanity check is deactivated
-                run_check = False
-                print(
-                    'WARNING: sanity check is deactivated. Results may be not as expected.')
-                df_npis_old = pd.read_csv(
+                print_manual_download(
+                    'kr_massnahmen_oberkategorien.csv',
+                    'https://www.corona-datenplattform.de/dataset/massnahmen_oberkategorien_kreise')
+                raise FileNotFoundError
+            df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
+
+        # read dataframe of variable names and descriptions
+        try:
+            if fine_resolution > 0:
+                df_npis_desc = pd.read_excel(
+                    os.path.join(
+                        directory, 'datensatzbeschreibung_massnahmen.xlsx'),
+                    sheet_name=2, engine='openpyxl')
+            else:
+                df_npis_desc = pd.read_excel(
                     os.path.join(
-                        directory, 'kr_massnahmen_unterkategorien.csv'),
-                    sep=',')
-                df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
+                        directory, 'datensatzbeschreibung_massnahmen.xlsx'),
+                    sheet_name=3, engine='openpyxl')
         except FileNotFoundError:
             print_manual_download(
-                'kr_massnahmen_unterkategorien.csv',
+                'datensatzbeschreibung_massnahmen.xlsx',
                 'https://www.corona-datenplattform.de/dataset/massnahmen_unterkategorien_kreise')
             raise FileNotFoundError
-        # check if rows hospitals and geriatric care are still empty;
-        # these fields have been empty so far and are thus not used
-        test_codes = ['M23_010', 'M23_020', 'M23_030', 'M23_040',
-                      'M23_050', 'M23_060', 'M24_010', 'M24_020',
-                      'M24_030', 'M24_040', 'M24_050', 'M24_060']
-        for tcode in test_codes:
-            for i in [''] + ["_" + str(i) for i in range(1, 6)]:
-                if (df_npis_old[df_npis_old[dd.EngEng['npiCode']] == tcode+i].iloc[:, 6:].max().max() > 0):
-                    print(tcode+i + " used.")
-        # end check
-
-    else:  # read aggregated NPIs
 
+        # download combinations of npis
         try:
-            df_npis_old = pd.read_csv(os.path.join(
-                directory, 'kr_massnahmen_oberkategorien.csv'))
+            if fine_resolution > 0:
+                df_npis_combinations_pre = pd.read_excel(
+                    os.path.join(
+                        directory, 'combination_npis_incl_ranking_v3.xlsx'), engine='openpyxl')
+                if run_check == True:
+                    npi_sanity_check(df_npis_old, df_npis_desc,
+                                    df_npis_combinations_pre)
+                return df_npis_old, df_npis_desc, df_npis_combinations_pre
+            else:
+                return df_npis_old, df_npis_desc, None
         except FileNotFoundError:
-            print_manual_download(
-                'kr_massnahmen_oberkategorien.csv',
-                'https://www.corona-datenplattform.de/dataset/massnahmen_oberkategorien_kreise')
+            print('File not found.')
             raise FileNotFoundError
-        df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
-
-    # read data frame of variable names and descriptions
-    try:
-        if fine_resolution > 0:
-            df_npis_desc = pd.read_excel(
-                os.path.join(
-                    directory, 'datensatzbeschreibung_massnahmen.xlsx'),
-                sheet_name=2, engine='openpyxl')
-        else:
-            df_npis_desc = pd.read_excel(
-                os.path.join(
-                    directory, 'datensatzbeschreibung_massnahmen.xlsx'),
-                sheet_name=3, engine='openpyxl')
-    except FileNotFoundError:
-        print_manual_download(
-            'datensatzbeschreibung_massnahmen.xlsx',
-            'https://www.corona-datenplattform.de/dataset/massnahmen_unterkategorien_kreise')
-        raise FileNotFoundError
-
-    # download combinations of npis
-    try:
-        if fine_resolution > 0:
-            df_npis_combinations_pre = pd.read_excel(
-                os.path.join(
-                    directory, 'combination_npis_incl_ranking_v3.xlsx'), engine='openpyxl')
-            if run_check == True:
-                npi_sanity_check(df_npis_old, df_npis_desc,
-                                 df_npis_combinations_pre)
-            return df_npis_old, df_npis_desc, df_npis_combinations_pre
-        else:
-            return df_npis_old, df_npis_desc, None
-    except FileNotFoundError:
-        print('File not found.')
-        raise FileNotFoundError
 
 
 def activate_npis_based_on_incidence(
@@ -883,7 +896,7 @@ def get_npi_data(fine_resolution=2,
     # NPIs were active
     if fine_resolution > 0:
         df_infec_rki = pd.read_json(os.path.join(
-            directory, 'cases_all_county_all_dates_repdate.json'))
+            directory, 'cases_all_county_repdate_all_dates.json'))
         df_infec_rki[dd.EngEng['date']] = pd.to_datetime(
             df_infec_rki[dd.EngEng['date']])
         try:
@@ -931,7 +944,8 @@ def get_npi_data(fine_resolution=2,
     # setup dataframe for each maingroup, same format as df_npi_combinations
     # used to count codes that occur simultaneously now (before any (de-)activation)
     if fine_resolution == 2:
-        df_count_joint_codes = df_npis_combinations[:]
+        #use deepcopy to copy a dict
+        df_count_joint_codes = copy.deepcopy(df_npis_combinations)
         for maincode in df_count_joint_codes.keys():
             df_count_joint_codes[maincode][1] *= 0
         df_counted_joint_codes = count_code_multiplicities(df_npis_old, df_count_joint_codes,
@@ -1246,7 +1260,7 @@ def get_npi_data(fine_resolution=2,
                   '. Estimated time remaining: ' +
                   str(int(time_remain / 60)) + ' min.')
 
-    if fine_resolution > 2:
+    if fine_resolution == 2:
         save_interaction_matrix(df_count_deactivation,
                                 'count_deactivation', directory)
         plot_interaction_matrix('count_deactivation', directory)

From 6c41aee83503da83f7aa107e20bbcab2a7c43dcf Mon Sep 17 00:00:00 2001
From: patricklnz <patrick.lenz01@icloud.com>
Date: Thu, 25 Apr 2024 13:21:26 +0200
Subject: [PATCH 6/6] precommit

---
 .../memilio/epidata/getNPIData.py             | 38 +++++++++++--------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/pycode/memilio-epidata/memilio/epidata/getNPIData.py b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
index 3c5f174880..dd0bc48095 100644
--- a/pycode/memilio-epidata/memilio/epidata/getNPIData.py
+++ b/pycode/memilio-epidata/memilio/epidata/getNPIData.py
@@ -147,8 +147,8 @@ def read_files(directory, fine_resolution, run_checks):
                     filename = "npis_subcategories_raw"
                     filepath = os.path.join(directory + filename + '.json')
                     if os.path.exists(filepath):
-                        d=json.load(open(filepath))
-                        df_npis_old=pd.DataFrame(d)
+                        d = json.load(open(filepath))
+                        df_npis_old = pd.DataFrame(d)
                     else:
                         codelist = [
                             'm01a', 'm01b', 'm02a', 'm02b', 'm03', 'm04', 'm05', 'm06',
@@ -158,13 +158,15 @@ def read_files(directory, fine_resolution, run_checks):
                         for code in codelist:
                             df_npis_per_code = pd.read_csv(
                                 os.path.join(directory,
-                                            f'kr_massn_unterkat_{code}.csv'),
+                                             f'kr_massn_unterkat_{code}.csv'),
                                 sep=',')
 
                             # set some parameters for dataframe
                             if counter_codes == 0:
-                                counties = np.sort(df_npis_per_code.ags5.unique())
-                                num_counties = len(df_npis_per_code.ags5.unique())
+                                counties = np.sort(
+                                    df_npis_per_code.ags5.unique())
+                                num_counties = len(
+                                    df_npis_per_code.ags5.unique())
 
                                 # extract dates from data
                                 dates = df_npis_per_code.iloc[:int(
@@ -186,14 +188,15 @@ def read_files(directory, fine_resolution, run_checks):
                                 dummy_to_append = pd.DataFrame(
                                     columns=['code'] + dates_new,
                                     data=copy.deepcopy(df_npis_per_code
-                                                    [df_npis_per_code.ags5 == counties[i]].
-                                                    iloc[:, 6:].T.reset_index().values))
+                                                       [df_npis_per_code.ags5 == counties[i]].
+                                                       iloc[:, 6:].T.reset_index().values))
 
-                                df_local[i] = pd.concat([df_local[i], dummy_to_append])
+                                df_local[i] = pd.concat(
+                                    [df_local[i], dummy_to_append])
 
                                 if df_npis_per_code.iloc[i * len(dates): (i + 1) *
-                                                        len(dates),
-                                                        3].nunique() > 1:
+                                                         len(dates),
+                                                         3].nunique() > 1:
                                     raise gd.DataError(
                                         'Dates are not sorted as expected.')
 
@@ -206,12 +209,14 @@ def read_files(directory, fine_resolution, run_checks):
                         df_npis_old = pd.concat([df_local[i]
                                                 for i in range(num_counties)])
                         # 'bundesland' maps to stateID for DIVI, so rename it seperately here
-                        df_npis_old.rename({'bundesland':dd.EngEng('state')}, axis=1, inplace=True)
+                        df_npis_old.rename(
+                            {'bundesland': dd.EngEng('state')}, axis=1, inplace=True)
                         # rename other columns according to default dict
                         df_npis_old.rename(dd.GerEng, axis=1, inplace=True)
                         df_npis_old['NPI_code'] = df_npis_old['NPI_code'].str.replace(
                             'code_m', 'M')
-                        gd.write_dataframe(df_npis_old, directory, filename, 'json')
+                        gd.write_dataframe(
+                            df_npis_old, directory, filename, 'json')
                 except FileNotFoundError:
                     # TODO: sanity check fails with this file due to different shapes of the dataframe
                     # analysis runs without problems, check if results are the same and either change
@@ -233,8 +238,8 @@ def read_files(directory, fine_resolution, run_checks):
             # check if rows hospitals and geriatric care are still empty;
             # these fields have been empty so far and are thus not used
             test_codes = ['M23_010', 'M23_020', 'M23_030', 'M23_040',
-                        'M23_050', 'M23_060', 'M24_010', 'M24_020',
-                        'M24_030', 'M24_040', 'M24_050', 'M24_060']
+                          'M23_050', 'M23_060', 'M24_010', 'M24_020',
+                          'M24_030', 'M24_040', 'M24_050', 'M24_060']
             for tcode in test_codes:
                 for i in [''] + ["_" + str(i) for i in range(1, 6)]:
                     if (df_npis_old[df_npis_old[dd.EngEng['npiCode']] == tcode+i].iloc[:, 6:].max().max() > 0):
@@ -279,7 +284,7 @@ def read_files(directory, fine_resolution, run_checks):
                         directory, 'combination_npis_incl_ranking_v3.xlsx'), engine='openpyxl')
                 if run_check == True:
                     npi_sanity_check(df_npis_old, df_npis_desc,
-                                    df_npis_combinations_pre)
+                                     df_npis_combinations_pre)
                 return df_npis_old, df_npis_desc, df_npis_combinations_pre
             else:
                 return df_npis_old, df_npis_desc, None
@@ -287,6 +292,7 @@ def read_files(directory, fine_resolution, run_checks):
             print('File not found.')
             raise FileNotFoundError
 
+
 def activate_npis_based_on_incidence(
         local_incid, npi_lifting_days_threshold, npi_activation_days_threshold,
         incid_threshold):
@@ -948,7 +954,7 @@ def get_npi_data(fine_resolution=2,
     # setup dataframe for each maingroup, same format as df_npi_combinations
     # used to count codes that occur simultaneously now (before any (de-)activation)
     if fine_resolution == 2:
-        #use deepcopy to copy a dict
+        # use deepcopy to copy a dict
         df_count_joint_codes = copy.deepcopy(df_npis_combinations)
         for maincode in df_count_joint_codes.keys():
             df_count_joint_codes[maincode][1] *= 0