@@ -892,8 +892,8 @@ def get_npi_data(fine_resolution=2,
892
892
df_count_joint_codes [maincode ][1 ] *= 0
893
893
df_counted_joint_codes = count_code_multiplicities_init (df_npis_old , df_count_joint_codes ,
894
894
counties_considered = counties_considered )
895
- save_counter (df_counted_joint_codes , 'joint_codes' , directory )
896
- plot_counter ('joint_codes' , directory )
895
+ save_interaction_matrix (df_counted_joint_codes , 'joint_codes' , directory )
896
+ plot_interaction_matrix ('joint_codes' , directory )
897
897
plot_multiple_prescriptions ('joint_codes' , directory )
898
898
899
899
# create dataframe to count multiple codes after incidence dependent (de-)activation
@@ -1165,8 +1165,8 @@ def get_npi_data(fine_resolution=2,
1165
1165
'. Estimated time remaining: ' +
1166
1166
str (int (time_remain / 60 )) + ' min.' )
1167
1167
1168
- save_counter (df_count_deactivation , 'count_deactivation' , directory )
1169
- plot_counter ('count_deactivation' , directory )
1168
+ save_interaction_matrix (df_count_deactivation , 'count_deactivation' , directory )
1169
+ plot_interaction_matrix ('count_deactivation' , directory )
1170
1170
1171
1171
if counter_cases_start >= len (counties_considered )* 0.05 :
1172
1172
print ('WARNING: DataFrame starts with reported cases > 0 '
@@ -1176,11 +1176,11 @@ def get_npi_data(fine_resolution=2,
1176
1176
'Please consider a start date of some weeks ahead of the '
1177
1177
'time window to be analyzed for NPI\' s effects.' )
1178
1178
1179
- save_counter (df_count_incid_depend , 'joint_codes_incid_depend' , directory )
1180
- plot_counter ('joint_codes_incid_depend' , directory )
1179
+ save_interaction_matrix (df_count_incid_depend , 'joint_codes_incid_depend' , directory )
1180
+ plot_interaction_matrix ('joint_codes_incid_depend' , directory )
1181
1181
1182
- save_counter (df_count_active , 'joint_codes_active' , directory )
1183
- plot_counter ('joint_codes_active' , directory )
1182
+ save_interaction_matrix (df_count_active , 'joint_codes_active' , directory )
1183
+ plot_interaction_matrix ('joint_codes_active' , directory )
1184
1184
1185
1185
# print sub counters
1186
1186
print ('Sub task counters are: ' )
@@ -1234,7 +1234,7 @@ def count_code_multiplicities_init(df_npis_old, df_count, counties_considered):
1234
1234
1235
1235
@param[in] df_npis_old Initial data frame read from Corona Datenplattform.
1236
1236
@param[in,out] df_count Dictionnary of main NPI codes with empty interaction
1237
- matrix (to be filled) for all codes under main code in df_count[maincode][1]
1237
+ matrix (to be filled) for all codes under main code in df_count[maincode][1].
1238
1238
@param[in] counties_considered County IDs for which initial data frame is
1239
1239
considered.
1240
1240
"""
@@ -1299,40 +1299,62 @@ def count_codes(df_old, df_count, county):
1299
1299
return df_count
1300
1300
1301
1301
1302
- def save_counter (df_count , filename , directory ):
1303
- # save results
1302
+ def save_interaction_matrix (df_interactions , filename , directory ):
1303
+ """! Saves interaction matrices for all subcodes in provided main codes.
1304
+
1305
+ @param[in] df_interactions Dictionnary of main NPI codes with interaction
1306
+ matrix for all subcodes under main code in df_interactions[maincode][1].
1307
+ @param[in] filename Filename to store result.
1308
+ @param[in] directory Directory where to save data.
1309
+ """
1304
1310
1305
1311
writer = pd .ExcelWriter (
1306
1312
os .path .join (directory , filename + '.xlsx' ),
1307
1313
engine = 'xlsxwriter' )
1308
- for code in df_count .keys ():
1309
- df_count [code ][1 ].to_excel (writer , sheet_name = code )
1314
+ for code in df_interactions .keys ():
1315
+ df_interactions [code ][1 ].to_excel (writer , sheet_name = code )
1310
1316
writer .close ()
1311
1317
1312
1318
# saves plot in folder directory/heatmaps_filename
1313
1319
1314
1320
1315
- def plot_counter (filename , directory ):
1321
+ def plot_interaction_matrix (filename , directory ):
1322
+ """! Reads interaction matrices from hard drive and writes heatmap plots
1323
+ to hard drive.
1324
+
1325
+ @param[in] filename Filename to read results from.
1326
+ @param[in] directory Directory where to read and save data.
1327
+ """
1316
1328
target_directory = os .path .join (directory , 'heatmaps_' + filename )
1317
1329
if not os .path .exists (target_directory ):
1318
1330
os .makedirs (target_directory )
1319
1331
1320
- codelist = pd .ExcelFile (os .path .join (
1321
- directory , filename + '.xlsx' ), engine = 'openpyxl' ).sheet_names
1332
+ try :
1333
+ codelist = pd .ExcelFile (os .path .join (
1334
+ directory , filename + '.xlsx' ), engine = 'openpyxl' ).sheet_names
1335
+ except :
1336
+ raise FileNotFoundError ('File ' + filename + ' not found.' )
1322
1337
1323
- cmap = copy .copy (mpl .cm .get_cmap ('OrRd' ))
1338
+ # invert color map elements for tab20c such that subcolors are shown
1339
+ # from light to dark
1340
+ cmap = copy .copy (mpl .cm .get_cmap ('tab20b' ))
1341
+ colors = [cmap (i ) for i in np .array ([list (range (4 * (i + 1 )- 1 ,4 * i - 1 ,- 1 )) for i in range (5 )]).flatten ()]
1342
+ colors = colors + [(0.6 , 0.6 , 0.6 ), (0.4 , 0.4 , 0.4 ),
1343
+ (0.2 , 0.2 , 0.2 ), (0 , 0 , 0 )]
1344
+ cmap = mpl .colors .ListedColormap (colors )
1324
1345
1325
1346
for code in codelist :
1326
1347
df = pd .read_excel (
1327
1348
os .path .join (directory , filename + '.xlsx' ),
1328
1349
sheet_name = code , engine = 'openpyxl' )
1329
- # set diag = 0
1350
+ # set diag = 0, access (i,i+1) as first column contains index
1330
1351
for i in range (df .shape [0 ]):
1331
1352
df .iloc [i , i + 1 ] = 0
1353
+ # remove first column and convert to numpy array
1332
1354
array_exclusion = df .iloc [:, 1 :].to_numpy ()
1333
1355
if filename != 'count_deactivation' :
1334
1356
# for count deactivation xlabel != ylabel
1335
- # else matrix is of squared form
1357
+ # else matrix is of squared form and symmetric
1336
1358
array_exclusion += np .transpose (array_exclusion )
1337
1359
positions = [i for i in range (len (df .columns )- 1 )]
1338
1360
plt .xticks (positions , df .columns .to_list ()[1 :], rotation = 'vertical' )
@@ -1356,15 +1378,17 @@ def plot_counter(filename, directory):
1356
1378
plt .ylabel ('NPI' )
1357
1379
plt .title ('Joint NPI prescriptions' )
1358
1380
else :
1359
- raise gd .DataError ('unknown filename: ' + filename )
1381
+ raise gd .DataError ('Unknown filename: ' + filename )
1360
1382
1361
- # set vmin = 1 so that only combinations that are simultaneously active at least on one day are in colour,
1362
- # else white
1363
- # set vmax = 300000, this should be larger than maxima in all dataframes,
1364
- # this way colours of heatmaps are comparable (e.g. between codes or between joint_codes and exclusions)
1383
+ # Set vmin = 1 so that only combinations that are simultaneously active
1384
+ # at least on one day are in color, else use white.
1385
+ # Set vmax = 1e6 to be adjusted with colormap, this value is larger
1386
+ # than the maximum in all dataframes, this way colors of heatmaps are
1387
+ # comparable across different visualizations
1388
+ # (e.g. between codes or between joint_codes and exclusions)
1365
1389
1366
1390
plt .imshow (array_exclusion , cmap = cmap ,
1367
- norm = mpl .colors .LogNorm (vmin = 1 , vmax = 300000 ))
1391
+ norm = mpl .colors .LogNorm (vmin = 1 , vmax = 1e6 ))
1368
1392
plt .colorbar ()
1369
1393
plt .tight_layout ()
1370
1394
plt .savefig (
0 commit comments