-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDABC_pipeline_demo.py
444 lines (320 loc) · 13.8 KB
/
DABC_pipeline_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# -*- coding: utf-8 -*-
"""DABC_pipeline_demo.ipynb
Automatically generated by Colaboratory.
Original file on github is located at
https://colab.research.google.com/github/Robin970822/DABC-Net-for-COVID-19/blob/master/DABC_pipeline_demo.ipynb
# Before We Start
**1. Download code from github.**
```bash
git clone https://github.com/Robin970822/DABC-Net-for-COVID-19.git
```
**2. Change directories to DABC-Net-for-COVID-19.**
```bash
cd DABC-Net-for-COVID-19
```
Mind that this shared folder DABC-Net-for-COVID-19 might be seen or modified by other users. Your other data and files on google drive are safe and independent.
**3. Download data and model weights with download.sh.**
```bash
sh download.sh
```
**4. Set your Hardware accelerator as GPU in Edit/Notebook settings in Colab Menu.**
**5. Make sure your folder structure is corret.**
The folder structure looks like this:
```bash
path
├─Input_data
│ 2020034797_0123_2949_20200123015940_4.nii.gz
│ 2020034797_0125_3052_20200125111145_4.nii.gz
│ ...
│
├─Output_data
│ │
│ ├─covid
│ │ 2020034797_0123_2949_20200123015940_4.nii.gz
│ │ 2020034797_0125_3052_20200125111145_4.nii.gz
│ │ ...
│ │
│ ├─lung
│ │ 2020034797_0123_2949_20200123015940_4.nii.gz
│ │ 2020034797_0125_3052_20200125111145_4.nii.gz
│ │ ...
│ │
│ └─uncertainty
│ 2020034797_0123_2949_20200123015940_4_predictive_aleatoric.nii.gz
│ 2020034797_0125_3052_20200125111145_4_sample_1.nii.gz
│ ...
│
├─weight
│ model_05090017
│ ...
│
│ (following folders are required if you need longitudinal study)
│
├─meta
│ 2020035021.csv
│
└─model
prediction.pkl
...
```
# Step I:
## Setup environment and load data.
Make sure you have set **GPU** acceleration when you run this notebook. See your Hardware accelerator in Edit/Notebook settings in Colab Menu(On the upper left usually).
## Pull code from github
"""
!git clone https://github.com/Robin970822/DABC-Net-for-COVID-19.git
cd /content/DABC-Net-for-COVID-19/
!git pull origin master
"""## Download data and model weights
It will take 3 mintues.
"""
!sh download.sh
"""Make sure you are in DABC-Net-for-COVID-19"""
!pwd
"""## Set up environment and install all necessary packages
Make sure you have set **GPU** acceleration when you run this notebook. See your Hardware accelerator in **Edit/Notebook settings** in Colab Menu(On the upper left usually).
Note that you only use **xgboost==1.1.0** and **scikit-learn==0.21.3** to load the prediction model when you only need to run prediction and you can also skip inference steps.
"""
!pip install SimpleITK tensorflow-gpu==1.15.4 keras==2.2.4 xgboost==1.1.0 scikit-learn==0.21.3 scipy==1.1
"""# Step II:
## Lung and lesion Segmentation.
## Load datasets and pretrained model
"""
import numpy as np
import pandas as pd
import tensorflow as tf
from models import models as Model
from pipeline.inference_pipeline import local_inference
from pipeline.data_pipeline import save_pred_to_nii, read_from_nii, confirm_data
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
def DABC_infer(nii_path='', save_path='', usage='covid'):
save_path = save_path + '/*'
nii_path = nii_path + '/*'
all_src_data = read_from_nii(nii_path=nii_path, Hu_window=(-1024, 512), need_rotate=True)
all_src_data = np.expand_dims(all_src_data, -1)
print('\n**********\tInferring CT scans:\t**********\n')
test_vol = confirm_data(all_src_data)
'''
infer
'''
if usage == 'covid':
name = 'weight/Covid_05112327'
elif usage == 'lung':
name = 'weight/model_05090017'
else:
print('Please select correct model!')
return None
model = Model.DABC(input_size=(4, 256, 256, 1), load_weighted=name)
pred = local_inference(test_vol, model)
save_pred_to_nii(pred=pred, save_path=save_path.replace('*', ''), ref_path=nii_path,
need_resize=True, need_rotate=True)
"""## Run lung and lesion segmentation
It takes about 8 minutes for lesion segmentation of 8 input CT scans.
"""
# Segment lesion
input_path='2020035365'
output_path='2020035365_output/covid'
DABC_infer(input_path, output_path)
"""It takes about 8 minutes for lung segmentation of 8 input CT scans."""
# Segment lung
output_path='2020035365_output/lung'
DABC_infer(input_path, output_path, usage='lung')
"""Postprocessing as refinement: remove small segmented area to reduce falsepositive region
(This step is optional)
"""
# from postprocess_lung import remove_small_objects
# remove_small_objects('2020035365_output/lung')
"""# Step III:
## Infer segmentation uncertainty through monte-carlo dropout
It will take about 2 minutes for 5 monte-carlo samples
"""
from DABC_uncertainty_colab import DABC_uncertainty
# run 5x inference
DABC_uncertainty('2020035365/2020035365_0204_3050_20200204184413_4.nii.gz', '2020035365_output/uncertainty', sample_value=5, uc_chosen='Both')
"""# Step IV:
## Visualisation of segmentation results, uncertainties and progress curve
## Plot segmentation
"""
import pandas as pd
from utils.visualization import data_disease_slice, plot_segmentation, plot_progress_curve, plot_uncertainty
import warnings
warnings.filterwarnings("ignore")
# Severe patient
raw_severe, lung_severe, lesion_severe, ratio_severe = data_disease_slice(patientID='2020035365', slice_id=[175, 162, 195, 195, 195, 195, 195, 195])
meta_path='meta/2020035365.csv'
meta = pd.read_csv(meta_path, index_col=[0])
_meta_severe = meta[meta['slice'] > 100]
_meta_severe['ratio'] = ratio_severe
# Mild patient
# To avoid waiting, the segmentation results of mild patient have already run and saved in 2020035021_output/ folder.
# If you want to reqeat the results, please change the patientID from 2020035365 to 2020035021 in inference steps.
raw_mild, lung_mild, lesion_mild, ratio_mild = data_disease_slice(patientID='2020035021', slice_id=[200, 200, 200, 200, 200, 200])
meta_path='meta/2020035021.csv'
meta = pd.read_csv(meta_path, index_col=[0])
_meta_mild = meta[meta['slice'] > 100]
_meta_mild['ratio'] = ratio_mild
plot_segmentation(raw_severe, lung_severe, lesion_severe, color_map='Reds', state='Severe', hspace=-0.6)
plot_segmentation(raw_mild, lung_mild, lesion_mild, color_map='Reds', state='Mild', hspace=-0.4)
"""## Plot progress curve"""
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 9))
plot_progress_curve(_meta_severe, patientID=2020035365, line_color=sns.color_palette('Reds')[5], label='Severe patient')
plot_progress_curve(_meta_mild, patientID=2020035021, line_color=sns.color_palette('Greens')[3], label='Mild patient')
plt.legend(loc='upper right')
plt.title('Severe pateint vs Mild pateint', fontsize=26)
plt.xlabel('Time(Day)', fontsize=16)
plt.ylabel('Lesion ratio', fontsize=16)
"""## Plot uncertainty
"""
# Example I:
plot_uncertainty(name_id='2020035365_0204_3050_20200204184413_4.nii.gz',slice_id=175)
# Example II:
plot_uncertainty(name_id='2020035365_0204_3050_20200204184413_4.nii.gz',slice_id=150)
"""#Step V:
## Prediction of penumonia progression
## Load data
In this section, we present one mild patient and one severe patient with multi-scans to show progress of the disease and illustrate our model performance.
"""
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from utils.calculate_feature import *
from utils.visualization import *
from pipeline.data_pipeline import read_from_nii
# severe patient
meta_path_severe='meta/2020035365.csv'
meta_severe = pd.read_csv(meta_path_severe, index_col=[0])
raw_data_severe = read_from_nii('2020035365/*').astype('float32')
lung_severe = read_from_nii(r'2020035365_output/lung/*').astype('float32')
lesion_severe = read_from_nii(r'2020035365_output/covid/*').astype('float32')
# mild patient
meta_path_mild='meta/2020035021.csv'
meta_mild = pd.read_csv(meta_path_mild, index_col=[0])
def predict_base_learners(base_learners, x):
P = np.zeros((x.shape[0], len(base_learners)))
print('Generating base learner predictions.')
for i, (name, m) in enumerate(base_learners.items()):
print('%s...'% name, end='', flush=False)
p = m.predict_proba(x)
P[:, i] = p[:, 1]
print('done.')
return P
"""## Calculate all 14 features from the segmenetion results
"""
res_list_severe, all_info_severe = calculate(raw_data_severe, lung_severe, lesion_severe, meta_severe)
del raw_data_severe, lung_severe, lesion_severe # release RAM
# mild patient
raw_data_mild = read_from_nii(r'2020035021/*').astype('float32')
lung_mild = read_from_nii(r'2020035021_output/lung/*').astype('float32')
lesion_mild = read_from_nii(r'2020035021_output/covid/*').astype('float32')
res_list_mild, all_info_mild = calculate(raw_data_mild, lung_mild, lesion_mild, meta_mild)
del raw_data_mild, lung_mild, lesion_mild
"""All features and infomation of Severe and Mild patient as shown below.
(You can also print the variable 'all_info_severe' or 'all_info_mild' directly in new blank code cell.)
"""
from utils.visualization import plot_fetures
plot_fetures(all_info_severe=all_info_severe, all_info_mild=all_info_mild, save_to_html=True) # x-axis:time(day), y-axis: lesion ratio
"""Animation of progress curve"""
from utils.visualization import plot_animation_curve
plot_animation_curve(all_info=all_info_severe) # x-axis:time(day), y-axis: lesion ratio
"""## Prediction
### Load model
"""
import pickle
import json
with open('model/prediction.pkl', 'rb') as j:
base_pred = pickle.load(j)
with open('model/min_max_prediction.json', 'r') as j:
min_max_dict_pred = json.load(j)
with open('model/classification.pkl', 'rb') as j:
base_cls = pickle.load(j)
with open('model/min_max_classification.json', 'r') as j:
min_max_dict_cls = json.load(j)
feature = [
'left_ratio', 'right_ratio',
'left_lung', 'right_lung',
'left_lesion', 'right_lesion',
'left_weighted_lesion', 'right_weighted_lesion',
'left_consolidation', 'right_consolidation',
'left_z', 'right_z',
'Age', 'sex',
]
"""## Preprocessing"""
X_severe = preprocessing(all_info_severe, feature)
X_mild = preprocessing(all_info_mild, feature)
"""## Per Scan Classification for different timepoint."""
def Per_Scan_Classification(X):
x = min_max_scalar(np.array(X), np.array(min_max_dict_cls['min']), np.array(min_max_dict_cls['max']))
P_pred = predict_base_learners(base_cls, np.array(x))
p = P_pred.mean(axis=1)
return p
p_severe = Per_Scan_Classification(X_severe)
print('Prediction of severe patient(per scan):\n{}\n'.format(p_severe))
p_mild = Per_Scan_Classification(X_mild)
print('Prediction of mild patient(per scan):\n{}\n'.format(p_mild))
print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print('pred\t{} \ngt\t{} \nprob {}'.format((p_severe > 0.5).astype('int'), np.array(all_info_severe['Severe']), p_severe))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print('pred\t{} \ngt\t{} \nprob {}'.format((p_mild > 0.5).astype('int'), np.array(all_info_mild['Severe']), p_mild))
"""## First Two Scans"""
def First_Two_Scans(X):
# first two scan
x_list = X.iloc[1].tolist()[:-2] + X.iloc[0].tolist()
# min max scale
x = min_max_scalar(np.array(x_list), np.array(min_max_dict_pred['min']), np.array(min_max_dict_pred['max']))
# Predition
P_pred = predict_base_learners(base_pred, np.array([x]))
return P_pred.mean()
print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print(First_Two_Scans(X_severe))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print(First_Two_Scans(X_mild))
"""## First Three Scans"""
with open('model/prediction_first_3.pkl', 'rb') as j:
base_pred = pickle.load(j)
with open('model/min_max_prediction_first_3.json', 'r') as j:
min_max_dict_pred = json.load(j)
def First_Three_Scans(X):
# first two scan
x_list = X.iloc[2].tolist()[:-2] + X.iloc[1].tolist()
# min max scale
x = min_max_scalar(np.array(x_list), np.array(min_max_dict_pred['min']), np.array(min_max_dict_pred['max']))
# Predition
P_pred = predict_base_learners(base_pred, np.array([x]))
return P_pred.mean()
print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print(First_Three_Scans(X_severe))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print(First_Three_Scans(X_mild))
"""## First Scan
Using first scan to predict progress.
Note: only use first scan is not reliable, we recomand using first two or three scans to predict disease progression.
(In this section, the severe patient progress prediction is wrong using first scan. However, when use two or three scan, model can predict correctly with high confidence)
"""
from copy import deepcopy
with open('model/prediction_first.pkl', 'rb') as j:
base_pred = pickle.load(j)
with open('model/min_max_prediction_first.json', 'r') as j:
min_max_dict_pred = json.load(j)
def First_Scan(X):
# first two scan
x_list = X.iloc[0].tolist()
# min max scale
x = min_max_scalar(np.array(x_list), np.array(min_max_dict_pred['min']), np.array(min_max_dict_pred['max']))
# Predition
P_pred = predict_base_learners(base_pred, np.array([x]))
return P_pred.mean()
print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print(First_Scan(deepcopy(X_severe)))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print(First_Scan(deepcopy(X_mild)))
"""## Progress of disease"""
slice_id = [175, 162, 195, 195, 195, 195, 195, 195]
raw, lesion, gt = data_disease_progress_slice(all_info_severe, patientID=2020035365, slice_id=slice_id, timepoint_count=8)
plot_progress(raw, lesion, p_severe, gt, state='severe', color_map='Reds', timepoint_count=8)
print('\n\n')
slice_id = [200, 200, 200, 200, 200, 200]
raw, lesion, gt = data_disease_progress_slice(all_info_mild, patientID=2020035021, slice_id=slice_id, timepoint_count=6)
plot_progress(raw, lesion, p_mild, gt, state='mild', color_map='Reds', timepoint_count=6)
"""<center>Predict the patient situation in the progression of disease using per CT scan.</center>"""