-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdataset_input.py
56 lines (43 loc) · 1.68 KB
/
dataset_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import pandas as pd
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
from glob import glob
from sklearn.model_selection import train_test_split
base_skin_dir = os.path.join('/home/yjzhang/Work/SkinCancer', 'input')
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}
lesion_type_dict = {
'nv': 'Melanocytic nevi',
'mel': 'dermatofibroma',
'bkl': 'Benign keratosis-like lesions ',
'bcc': 'Basal cell carcinoma',
'akiec': 'Actinic keratoses',
'vasc': 'Vascular lesions',
'df': 'Dermatofibroma'
}
tile_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))
tile_df['path'] = tile_df['image_id'].map(imageid_path_dict.get)
tile_df['cell_type'] = tile_df['dx'].map(lesion_type_dict.get)
tile_df['cell_type_idx'] = pd.Categorical(tile_df['cell_type']).codes
tile_df[['cell_type_idx', 'cell_type']].sort_values('cell_type_idx').drop_duplicates()
train_df, test_df = train_test_split(tile_df, test_size=0.1)
validation_df, test_df = train_test_split(test_df, test_size=0.5)
train_df = train_df.reset_index()
validation_df = validation_df.reset_index()
test_df = test_df.reset_index()
class SkinDataset(data.Dataset):
def __init__(self, df, transform):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, index):
X = Image.open(self.df['path'][index])
y = torch.tensor(int(self.df['cell_type_idx'][index]))
if self.transform:
X = self.transform(X)
return X, y