-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_kmr.py
executable file
·195 lines (160 loc) · 5.48 KB
/
data_kmr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
""" Dealing with the data from Kimura
* Plan 1.
Inter-WSI validation
Do not use the folds inside of each WSI.
We have 9 WSIs. Presume that chips in one case should not be mixed together,
then inter-WSI validation is to choose k WSIs for training and 9-k WSIs for validation at a time.
* Plan 2.
Cross validation.
Use folds inside each WSI.
Now we created 10 folds. k folds (say 9) from ALL WSIs are involved in training
and 10-k folds are involved in validation at a time.
"""
#%%
import os, glob, shutil as shu
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as io
import skimage.transform as trans
from data import adjustData
#%%
mode = "mac" # TODO: use argparse instead!!
if mode == "mac":
# use plaidml backend for Mac OS
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
import keras
import keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
"""
* 目录结构
TILES_(256, 256)/
├── DAB
│ ├── 01_14-3768_Ki67_HE
│ │ ├── Annotations
│ │ │ └── annotations-01_14-3768_Ki67_HE.txt
│ │ └── Tiles
│ │ ├── Healthy Tissue
│ │ │ ├── 001 [3328 entries
...
"""
#%%
# * 1. Inter-WSI cross validation
# * 1. 选择8个做training/validation,1个做test
"""
* 1 获取编号列表
* 2 取8个
* 3 在每个子文件夹中都取这8个片子
DAB--1~8
Mask--1~8
...
* 4 Write a generator to return 写生成器返回:
DAB
WSI1
Tiles
Tumor
<All 10 folds>
* ! images------.
Chips ------------------------------|
WSI1 |
Tiles |
Tumor |
<All 10 folds> |
* ! images -----.------- Zipped one by one as tuples
"""
from itertools import combinations as comb
def folds(l_wsis=None, k=5):
"""folds [summary]
[extended_summary]
Args:
l_wsis ([list], optional): [description]. Defaults to None.
k (int, optional): [description]. Defaults to 1.
Returns:
[type]: [description]
l_wsis = [
"01_14-3768_Ki67_HE",
"01_14-7015_Ki67_HE",
"01_15-1052_Ki67_HE",
"01_15-2502_Ki67_HE",
"01_17-5256_Ki67_HE",
"01_17-6747_Ki67_HE",
"01_17-7885_Ki67_HE",
"01_17-7930_Ki67_HE",
"01_17-8107_Ki67_HE",
] """
def create_divides(l, k):
if len(l) % k == 0:
n = len(l) // k
else:
n = len(l) // k + 1
res = [l[i * n : i * n + n] for i in range(k)]
if res[-1] == []:
n -= 1
return [l[i * n : i * n + n] for i in range(k)]
return [([x for x in l_wsis if x not in f], f) for f in create_divides(l_wsis, k)]
def kmrGenerator(
dataset_path,
batch_size=4,
image_folder=None,
mask_folder=None,
aug_dict=None,
image_color_mode="rgb",
mask_color_mode="grayscale",
image_save_prefix="image",
mask_save_prefix="mask",
flag_multi_class=False,
num_class=2,
save_to_dir=None,
target_size=(256, 256),
seed=1,
) -> tuple:
"""kmrGenerator: Custom generator providing HE-DAB or HE-Mask pair for fit_generator
[extended_summary]
Args:
dataset_path ([string]): path of the dataset
batch_size (int, optional): batch size. Defaults to 4.
image_folder (list of strings, optional): folder for he stains. Defaults to None.
mask_folder (list of strings, optional): folder of masks or DABs. Defaults to None.
aug_dict (ditionary, optional): data augmentation arguments. Defaults to None.
image_color_mode (str, optional): color mode of the he images. Defaults to "rgb".
mask_color_mode (str, optional): color mode of masks/DAB. Defaults to "grayscale".
image_save_prefix (str, optional): [description]. Defaults to "image".
mask_save_prefix (str, optional): [description]. Defaults to "mask".
flag_multi_class (bool, optional): [description]. Defaults to False.
num_class (int, optional): number of classes. Defaults to 2.
save_to_dir ([type], optional): [description]. Defaults to None.
target_size (tuple, optional): 2-D size of training images. Defaults to (256, 256).
seed (int, optional): seed for random shuffling. Defaults to 1.
Returns:
tuple: yield pairs of HE image - Mask or DAB
"""
he_datagen = ImageDataGenerator(**aug_dict)
dab_datagen = ImageDataGenerator(**aug_dict)
he_generator = he_datagen.flow_from_directory(
dataset_path + "HE/",
classes=image_folder,
class_mode=None,
color_mode=image_color_mode,
target_size=target_size,
batch_size=batch_size,
save_to_dir=save_to_dir,
save_prefix=image_save_prefix,
seed=seed,
)
dab_generator = dab_datagen.flow_from_directory(
dataset_path + "DAB/",
classes=image_folder,
class_mode=None,
color_mode=image_color_mode,
target_size=target_size,
batch_size=batch_size,
save_to_dir=save_to_dir,
save_prefix=image_save_prefix,
seed=seed,
)
train_generator = zip(he_generator, dab_generator)
for (img, mask) in train_generator:
img, mask = adjustData(img, mask, flag_multi_class, num_class)
yield (img, mask)
#%%
# * 2. Cross validation
# %%