DABC_pipeline_demo.py

# -*- coding: utf-8 -*-
"""DABC_pipeline_demo.ipynb

Automatically generated by Colaboratory.

Original file on github is located at
    https://colab.research.google.com/github/Robin970822/DABC-Net-for-COVID-19/blob/master/DABC_pipeline_demo.ipynb

# Before We Start

**1. Download code from github.**

```bash
git clone https://github.com/Robin970822/DABC-Net-for-COVID-19.git
```

**2. Change directories to DABC-Net-for-COVID-19.**

```bash
cd DABC-Net-for-COVID-19
```

Mind that this shared folder DABC-Net-for-COVID-19 might be seen or modified by other users. Your other data and files on google drive are safe and independent. 

**3. Download data and model weights with download.sh.**

```bash
sh download.sh
```

**4. Set your Hardware accelerator as GPU in Edit/Notebook settings in Colab Menu.**

**5. Make sure your folder structure is corret.**

The folder structure looks like this:
```bash
path
├─Input_data
│      2020034797_0123_2949_20200123015940_4.nii.gz
│      2020034797_0125_3052_20200125111145_4.nii.gz
│      ...
│
├─Output_data
│   │
│   ├─covid
│   │      2020034797_0123_2949_20200123015940_4.nii.gz
│   │      2020034797_0125_3052_20200125111145_4.nii.gz
│   │      ...
│   │
│   ├─lung
│   │      2020034797_0123_2949_20200123015940_4.nii.gz
│   │      2020034797_0125_3052_20200125111145_4.nii.gz
│   │      ...
│   │
│   └─uncertainty
│           2020034797_0123_2949_20200123015940_4_predictive_aleatoric.nii.gz
│           2020034797_0125_3052_20200125111145_4_sample_1.nii.gz
│           ...
│
├─weight
│       model_05090017
│       ...
│
│ (following folders are required if you need longitudinal study)
│
├─meta
│       2020035021.csv
│
└─model
        prediction.pkl
        ...
```

# Step I: 
## Setup environment and load data.

Make sure you have set **GPU** acceleration when you run this notebook. See your Hardware accelerator in Edit/Notebook settings in Colab Menu(On the upper left usually).

## Pull code from github
"""

!git clone https://github.com/Robin970822/DABC-Net-for-COVID-19.git

cd /content/DABC-Net-for-COVID-19/

!git pull origin master

"""## Download data and model weights

It will take 3 mintues.
"""

!sh download.sh

"""Make sure you are in DABC-Net-for-COVID-19"""

!pwd

"""## Set up environment and install all necessary packages

Make sure you have set **GPU** acceleration when you run this notebook. See your Hardware accelerator in **Edit/Notebook settings** in Colab Menu(On the upper left usually).

Note that you only use **xgboost==1.1.0** and **scikit-learn==0.21.3** to load the prediction model when you only need to run prediction and you can also skip inference steps.
"""

!pip install SimpleITK tensorflow-gpu==1.15.4 keras==2.2.4 xgboost==1.1.0 scikit-learn==0.21.3 scipy==1.1

"""# Step II:
## Lung and lesion Segmentation.

## Load datasets and pretrained model
"""

import numpy as np
import pandas as pd
import tensorflow as tf
from models import models as Model
from pipeline.inference_pipeline import local_inference
from pipeline.data_pipeline import save_pred_to_nii, read_from_nii, confirm_data

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


def DABC_infer(nii_path='', save_path='', usage='covid'):
    save_path = save_path + '/*'
    nii_path = nii_path + '/*'
    all_src_data = read_from_nii(nii_path=nii_path, Hu_window=(-1024, 512), need_rotate=True)
    all_src_data = np.expand_dims(all_src_data, -1)

    print('\n**********\tInferring CT scans:\t**********\n')
    test_vol = confirm_data(all_src_data)
    '''
    infer
    '''
    if usage == 'covid':
        name = 'weight/Covid_05112327'
    elif usage == 'lung':
        name = 'weight/model_05090017'
    else:
        print('Please select correct model!')
        return None
    model = Model.DABC(input_size=(4, 256, 256, 1), load_weighted=name)
    pred = local_inference(test_vol, model)
    save_pred_to_nii(pred=pred, save_path=save_path.replace('*', ''), ref_path=nii_path,
                     need_resize=True, need_rotate=True)

"""## Run lung and lesion segmentation
It takes about 8 minutes for lesion segmentation of 8 input CT scans.
"""

# Segment lesion
input_path='2020035365'
output_path='2020035365_output/covid'
DABC_infer(input_path, output_path)

"""It takes about 8 minutes for lung segmentation of 8 input CT scans."""

# Segment lung
output_path='2020035365_output/lung'
DABC_infer(input_path, output_path, usage='lung')

"""Postprocessing as refinement: remove small segmented area to reduce falsepositive region

(This step is optional)
"""

# from postprocess_lung import remove_small_objects
# remove_small_objects('2020035365_output/lung')

"""# Step III:
## Infer segmentation uncertainty through monte-carlo dropout

It will take about 2 minutes for 5 monte-carlo samples
"""

from DABC_uncertainty_colab import DABC_uncertainty
# run 5x inference
DABC_uncertainty('2020035365/2020035365_0204_3050_20200204184413_4.nii.gz', '2020035365_output/uncertainty', sample_value=5, uc_chosen='Both')

"""# Step IV: 

## Visualisation of segmentation results, uncertainties and progress curve

## Plot segmentation
"""

import pandas as pd
from utils.visualization import data_disease_slice, plot_segmentation, plot_progress_curve, plot_uncertainty
import warnings

warnings.filterwarnings("ignore")

# Severe patient
raw_severe, lung_severe, lesion_severe, ratio_severe = data_disease_slice(patientID='2020035365', slice_id=[175, 162, 195, 195, 195, 195, 195, 195])

meta_path='meta/2020035365.csv'
meta = pd.read_csv(meta_path, index_col=[0])
_meta_severe = meta[meta['slice'] > 100]
_meta_severe['ratio'] = ratio_severe

# Mild patient
# To avoid waiting, the segmentation results of mild patient have already run and saved in 2020035021_output/ folder.
# If you want to reqeat the results, please change the patientID from 2020035365 to 2020035021 in inference steps.

raw_mild, lung_mild, lesion_mild, ratio_mild = data_disease_slice(patientID='2020035021', slice_id=[200, 200, 200, 200, 200, 200])

meta_path='meta/2020035021.csv'
meta = pd.read_csv(meta_path, index_col=[0])
_meta_mild = meta[meta['slice'] > 100]
_meta_mild['ratio'] = ratio_mild

plot_segmentation(raw_severe, lung_severe, lesion_severe, color_map='Reds', state='Severe', hspace=-0.6)

plot_segmentation(raw_mild, lung_mild, lesion_mild, color_map='Reds', state='Mild', hspace=-0.4)

"""## Plot progress curve"""

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(16, 9))
plot_progress_curve(_meta_severe, patientID=2020035365, line_color=sns.color_palette('Reds')[5], label='Severe patient')
plot_progress_curve(_meta_mild, patientID=2020035021, line_color=sns.color_palette('Greens')[3], label='Mild patient')
plt.legend(loc='upper right')
plt.title('Severe pateint vs Mild pateint', fontsize=26)
plt.xlabel('Time(Day)', fontsize=16)
plt.ylabel('Lesion ratio', fontsize=16)

"""## Plot uncertainty

"""

# Example I:
plot_uncertainty(name_id='2020035365_0204_3050_20200204184413_4.nii.gz',slice_id=175)

# Example II:
plot_uncertainty(name_id='2020035365_0204_3050_20200204184413_4.nii.gz',slice_id=150)

"""#Step V:
## Prediction of penumonia progression

## Load data
  In this section, we present one mild patient and one severe patient with multi-scans to show progress of the disease and illustrate our model performance.
"""

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from utils.calculate_feature import *
from utils.visualization import *
from pipeline.data_pipeline import read_from_nii

# severe patient
meta_path_severe='meta/2020035365.csv'
meta_severe = pd.read_csv(meta_path_severe, index_col=[0])

raw_data_severe = read_from_nii('2020035365/*').astype('float32')
lung_severe = read_from_nii(r'2020035365_output/lung/*').astype('float32')
lesion_severe = read_from_nii(r'2020035365_output/covid/*').astype('float32')

# mild patient
meta_path_mild='meta/2020035021.csv'
meta_mild = pd.read_csv(meta_path_mild, index_col=[0])

def predict_base_learners(base_learners, x):
    P = np.zeros((x.shape[0], len(base_learners)))
    print('Generating base learner predictions.')
    for i, (name, m) in enumerate(base_learners.items()):
        print('%s...'% name, end='', flush=False)
        p = m.predict_proba(x)
        P[:, i] = p[:, 1]
    print('done.')
    return P

"""## Calculate all 14 features from the segmenetion results

"""

res_list_severe, all_info_severe = calculate(raw_data_severe, lung_severe, lesion_severe, meta_severe)

del raw_data_severe, lung_severe, lesion_severe  # release RAM

# mild patient
raw_data_mild = read_from_nii(r'2020035021/*').astype('float32')
lung_mild = read_from_nii(r'2020035021_output/lung/*').astype('float32')
lesion_mild = read_from_nii(r'2020035021_output/covid/*').astype('float32')

res_list_mild, all_info_mild = calculate(raw_data_mild, lung_mild, lesion_mild, meta_mild)
del raw_data_mild, lung_mild, lesion_mild

"""All features and infomation of Severe and Mild patient as shown below.

(You can also print the variable 'all_info_severe' or 'all_info_mild' directly in new blank code cell.)
"""

from utils.visualization import plot_fetures

plot_fetures(all_info_severe=all_info_severe, all_info_mild=all_info_mild, save_to_html=True)  # x-axis:time(day), y-axis: lesion ratio

"""Animation of progress curve"""

from utils.visualization import plot_animation_curve

plot_animation_curve(all_info=all_info_severe)  # x-axis:time(day), y-axis: lesion ratio

"""## Prediction
### Load model
"""

import pickle
import json

with open('model/prediction.pkl', 'rb') as j:
    base_pred = pickle.load(j)

with open('model/min_max_prediction.json', 'r') as j:
    min_max_dict_pred = json.load(j)

with open('model/classification.pkl', 'rb') as j:
    base_cls = pickle.load(j)

with open('model/min_max_classification.json', 'r') as j:
    min_max_dict_cls = json.load(j)

feature = [
    'left_ratio', 'right_ratio',
    'left_lung', 'right_lung',
    'left_lesion', 'right_lesion',

    'left_weighted_lesion', 'right_weighted_lesion',

    'left_consolidation', 'right_consolidation',

    'left_z', 'right_z',
    'Age', 'sex',
]

"""## Preprocessing"""

X_severe = preprocessing(all_info_severe, feature)
X_mild = preprocessing(all_info_mild, feature)

"""## Per Scan Classification for different timepoint."""

def Per_Scan_Classification(X):
  x = min_max_scalar(np.array(X), np.array(min_max_dict_cls['min']), np.array(min_max_dict_cls['max']))
  P_pred = predict_base_learners(base_cls, np.array(x))
  p = P_pred.mean(axis=1)
  return p

p_severe = Per_Scan_Classification(X_severe)
print('Prediction of severe patient(per scan):\n{}\n'.format(p_severe))
p_mild = Per_Scan_Classification(X_mild)
print('Prediction of mild patient(per scan):\n{}\n'.format(p_mild))

print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print('pred\t{} \ngt\t{} \nprob {}'.format((p_severe > 0.5).astype('int'), np.array(all_info_severe['Severe']), p_severe))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print('pred\t{} \ngt\t{} \nprob {}'.format((p_mild > 0.5).astype('int'), np.array(all_info_mild['Severe']), p_mild))

"""## First Two Scans"""

def First_Two_Scans(X):
  # first two scan
  x_list = X.iloc[1].tolist()[:-2] + X.iloc[0].tolist()
  # min max scale
  x = min_max_scalar(np.array(x_list), np.array(min_max_dict_pred['min']), np.array(min_max_dict_pred['max']))

  # Predition
  P_pred = predict_base_learners(base_pred, np.array([x]))
  return P_pred.mean()

print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print(First_Two_Scans(X_severe))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print(First_Two_Scans(X_mild))

"""## First Three Scans"""

with open('model/prediction_first_3.pkl', 'rb') as j:
    base_pred = pickle.load(j)

with open('model/min_max_prediction_first_3.json', 'r') as j:
    min_max_dict_pred = json.load(j)


def First_Three_Scans(X):
  # first two scan
  x_list = X.iloc[2].tolist()[:-2] + X.iloc[1].tolist()
  # min max scale
  x = min_max_scalar(np.array(x_list), np.array(min_max_dict_pred['min']), np.array(min_max_dict_pred['max']))

  # Predition
  P_pred = predict_base_learners(base_pred, np.array([x]))
  return P_pred.mean()

print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print(First_Three_Scans(X_severe))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print(First_Three_Scans(X_mild))

"""## First Scan
Using first scan to predict progress.

Note: only use first scan is not reliable, we recomand using first two or three scans to predict disease progression.
(In this section, the severe patient progress prediction is wrong using first scan. However, when use two or three scan, model can predict correctly with high confidence)
"""

from copy import deepcopy
with open('model/prediction_first.pkl', 'rb') as j:
    base_pred = pickle.load(j)

with open('model/min_max_prediction_first.json', 'r') as j:
    min_max_dict_pred = json.load(j)


def First_Scan(X):
  # first two scan
  x_list = X.iloc[0].tolist()
  # min max scale
  x = min_max_scalar(np.array(x_list), np.array(min_max_dict_pred['min']), np.array(min_max_dict_pred['max']))

  # Predition
  P_pred = predict_base_learners(base_pred, np.array([x]))
  return P_pred.mean()

print('\n'+'*'*10+'\tSevere patient\t'+'*'*10)
print(First_Scan(deepcopy(X_severe)))
print('\n'+'*'*10+'\tMild patient\t'+'*'*10)
print(First_Scan(deepcopy(X_mild)))

"""## Progress of disease"""

slice_id = [175, 162, 195, 195, 195, 195, 195, 195]
raw, lesion, gt = data_disease_progress_slice(all_info_severe, patientID=2020035365, slice_id=slice_id, timepoint_count=8)
plot_progress(raw, lesion, p_severe, gt, state='severe', color_map='Reds', timepoint_count=8)

print('\n\n')
slice_id = [200, 200, 200, 200, 200, 200]
raw, lesion, gt = data_disease_progress_slice(all_info_mild, patientID=2020035021, slice_id=slice_id, timepoint_count=6)
plot_progress(raw, lesion, p_mild, gt, state='mild', color_map='Reds', timepoint_count=6)

"""<center>Predict the patient situation in the progression of disease using per CT scan.</center>"""