lxg2015
diff --git a/‎__pycache__/convNet.cpython-36.pyc
1.52 KB b/‎__pycache__/convNet.cpython-36.pyc
1.52 KB
diff --git a/‎__pycache__/dataset.cpython-36.pyc
3.69 KB b/‎__pycache__/dataset.cpython-36.pyc
3.69 KB
diff --git a/‎__pycache__/tool.cpython-36.pyc
1.48 KB b/‎__pycache__/tool.cpython-36.pyc
1.48 KB
diff --git a/‎data/__init__.py
+2 b/‎data/__init__.py
+2
diff --git a/‎data/__pycache__/__init__.cpython-36.pyc
207 Bytes b/‎data/__pycache__/__init__.cpython-36.pyc
207 Bytes
diff --git a/‎data/__pycache__/dataset.cpython-36.pyc
4.24 KB b/‎data/__pycache__/dataset.cpython-36.pyc
4.24 KB
diff --git a/‎data/__pycache__/tool.cpython-36.pyc
2.42 KB b/‎data/__pycache__/tool.cpython-36.pyc
2.42 KB
diff --git a/‎data/data.py
+93 b/‎data/data.py
+93
diff --git a/‎data/dataset.py
+202 b/‎data/dataset.py
+202
diff --git a/‎data/dataset.pyc
3.13 KB b/‎data/dataset.pyc
3.13 KB
@@ -0,0 +1,2 @@
+from .dataset import *
+from .tool import object_crop, getmaxmask
@@ -0,0 +1,93 @@
+# https://www.kaggle.com/nanigans/pytorch-starter/notebook
+import os
+import math
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+# 1604 item total
+# band_1,2 id, inc_angle, is_ice
+path = '/home/lxg/codedata/ice/'
+data = pd.read_json(os.path.join(path, 'train.json'))
+
+data['band_1'] = data['band_1'].apply(lambda x: np.array(x).reshape(75,75))
+data['band_2'] = data['band_2'].apply(lambda x: np.array(x).reshape(75,75))
+# band_1 min-34.715858, max3.98
+# band_2 min-35.403362, max-6.934982
+
+data['inc_angle'] = pd.to_numeric(data['inc_angle'], errors='coerce')  # lack data is filled with na
+# inc_angle 1604, 1471 notnan 133 nan, min24.75, max45.9, mean39.26
+# 753 True, 851 False
+# split
+# train = data.sample(frac=0.8)
+# val = data[~data.isin(train)].dropna()
+
+def plotSample(df, idx):
+    c = ('ship', 'ice')
+    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
+    ax1.imshow(df['band_1'].iloc[idx])
+    ax2.imshow(df['band_2'].iloc[idx])
+    ax3.hist(df['band_1'].iloc[idx].ravel(), bins=256, fc='k', ec='k')
+    ax4.hist(df['band_2'].iloc[idx].ravel(), bins=256, fc='k', ec='k')
+    f.set_figheight(10)
+    f.set_figwidth(10)
+    plt.suptitle(str(df['inc_angle'].iloc[idx])+c[df['is_iceberg'].iloc[idx]])
+    plt.show()
+
+def plotMinMax(df):
+    min_max = pd.DataFrame()
+    min_max['min_1'] = data['band_1'].apply(lambda x: x.min())
+    min_max['max_1'] = data['band_1'].apply(lambda x: x.max())
+    min_max['min_2'] = data['band_2'].apply(lambda x: x.min())
+    min_max['max_2'] = data['band_2'].apply(lambda x: x.max())
+    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
+    ax1.hist(min_max['min_1'])
+    ax2.hist(min_max['max_1'])
+    ax3.hist(min_max['min_2'])
+    ax4.hist(min_max['max_2'])
+    f.set_figheight(20)
+    f.set_figwidth(20)
+    plt.show()
+
+def splitSave(df):
+    train = df.sample(frac=0.8)
+    val = df[~df.isin(train)].dropna()
+    train.to_json(os.path.join(path, 'train_train.json'))
+    val.to_json(os.path.join(path, 'train_val.json'))
+    print('split done')
+
+def amplitudeSplit(df):
+    '''
+    according to angle value, splite the band_1、band_2
+    '''
+    df['angle'] = df['inc_angle'].apply(lambda x: 45 if math.isnan(x) else x)
+
+
+def splitAndSaveTest():
+    test = pd.read_json(os.path.join(path, 'test.json'))
+    test['band_1'] = test['band_1'].apply(lambda x: np.array(x).reshape(75,75))
+    test['band_2'] = test['band_2'].apply(lambda x: np.array(x).reshape(75,75))
+    test['inc_angle'] = pd.to_numeric(test['inc_angle'], errors='coerce')
+    
+    length = test.shape[0]
+    print('total', test.shape)
+    test1 = test[0:length/3]
+    test2 = test[length/3:length*2/3]
+    test3 = test[length*2/3:]
+    print(test1.shape[0]+test2.shape[0]+test3.shape[0])
+
+    test1.to_json(os.path.join(path, 'test1.json'))
+    test2.to_json(os.path.join(path, 'test2.json'))
+    test3.to_json(os.path.join(path, 'test3.json'))
+
+if __name__ == '__main__':
+    # splitAndSaveTest()
+    splitSave(data)
+
+    # plotMinMax(data)
+
+    # for i in range(100,200):
+    #     plotSample(data, i)
+    #     i += 1
+    # data.to_json(os.path.join(path, 'train_clean.json'))
+    # pass
@@ -0,0 +1,202 @@
+import os
+import cv2
+import random
+import pandas as pd
+import numpy as np
+import torch.utils.data as data
+import torch
+from .tool import randomCrop, rotate, lee_filter, object_crop, getMaskImg
+
+def read_clean(path, file, predicted=False):
+    '''
+    train and test prepare
+    return:
+    full_img_tr: numpy
+    data['is_iceberg']: numpy
+    list(data['id']): list
+    '''
+    data = pd.read_json(os.path.join(path, file))
+    # data = data[data['mask_size'] < 99.0001]
+
+    band_1_tr = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
+    band_2_tr = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
+    # band_3_tr = (band_1_tr**2 + band_2_tr**2) / 2
+    # full_img_tr = np.stack([band_1_tr, band_2_tr, band_3_tr], axis=1) # 1604,2,75,75
+    full_img_tr = np.stack([band_1_tr, band_2_tr], axis=1) # 1604,2,75,75
+    full_img_tr = full_img_tr.transpose(0,2,3,1)
+
+    inc_angle = data['inc_angle'].values
+    inc_angle[np.isnan(inc_angle)] = 0#39.26 #replace nan with mean of inc_angle
+    # inc_angle = (inc_angle-39.26)*10  # normalise 
+
+    if not predicted:
+        return full_img_tr, data['is_iceberg'].values, inc_angle
+    else:
+        return full_img_tr, list(data['id']), inc_angle
+
+class train_cross():
+    '''
+    N folder cross verify
+    '''
+    def __init__(self, train, label, inc_angle, num):
+        '''
+        num: split set number
+        '''
+        self.length = train.shape[0]
+        self.num = num
+        self.data = train
+        self.label = label
+        self.inc_angle = inc_angle
+        self.image_list = list(range(self.length))
+        random.shuffle(self.image_list)  # replace
+    
+    def getset(self, ids):
+        span = self.length / self.num
+        first_index = int(ids*span)
+
+        if ids is not self.num-1:
+            test_list = self.image_list[first_index:int((ids+1)*span)]
+        else:
+            test_list = self.image_list[first_index:]
+        
+        image_test = self.data[test_list]
+        lab_test = self.label[test_list]
+        inc_test = self.inc_angle[test_list]
+
+        train_list = list(set(self.image_list) - set(test_list))
+        image_train = self.data[train_list]
+        lab_train = self.label[train_list]
+        inc_train = self.inc_angle[train_list]
+
+        return image_train, lab_train, inc_train, image_test, lab_test, inc_test
+
+class DataSet(data.Dataset):
+    def __init__(self, datap, labelp, incp, train, predicted=False):
+        self.image_size = 40 #20 #40 #75 #40 #75 
+        self.data = datap
+        self.incp = incp
+        self.predicted = predicted
+        self.length = datap.shape[0]
+        self.train = train
+        if(not predicted):
+            self.label = labelp
+            self.id = []
+        else:
+            self.label = []
+            self.id = labelp
+       
+    def __getitem__(self, idx):
+        img = self.data[idx] # WxHxC
+        
+        # substract min value, for resnet18
+        # img -= img.min() 
+        
+        # take the opposite
+        # img = 0 - img 
+
+        # speckle filter
+        # img = lee_filter(img)
+
+        # pca whitening  https://github.com/RobotLiu2015/machine-learning/tree/master/PCA%20and%20Whitening
+
+        if self.train:
+
+            # if random.random() < 0.5:
+            #     # add speckle noise(https://stackoverflow.com/questions/22937589/how-to-add-noise-gaussian-salt-and-pepper-etc-to-image-in-python-with-opencv)
+            #     row,col,ch = img.shape
+            #     gauss = np.random.randn(row,col,ch)
+            #     gauss = gauss.reshape(row,col,ch)        
+            #     noisy = img + img * gauss
+            
+            # if random.random() < 0.5:
+            # # salter and pepper
+            #     row,col,ch = img.shape
+            #     s_vs_p = 0.5
+            #     amount = 0.004
+            #     out = np.copy(img)
+            #     # Salt mode
+            #     num_salt = np.ceil(amount * img.size * s_vs_p)
+            #     coords = [np.random.randint(0, i - 1, int(num_salt))
+            #             for i in img.shape]
+            #     out[coords] = 1
+
+            #     # Pepper mode
+            #     num_pepper = np.ceil(amount* img.size * (1. - s_vs_p))
+            #     coords = [np.random.randint(0, i - 1, int(num_pepper))
+            #             for i in img.shape]
+            #     out[coords] = 0
+            #     img = out
+        
+            if random.random() < 0.5: 
+                img = np.fliplr(img)
+
+            # if random.random() < 0.5:
+            #     angle = random.uniform(-20,20) # 20
+            #     img = rotate(img, angle)
+
+            if random.random() < 0.3:
+                img = cv2.resize(img, (85,85))  
+                img = randomCrop(img, 75, 75)
+            elif random.random() < 0.6:
+                img = np.pad(img, ((7,7),(7,7),(0,0)), 'reflect')
+                img = randomCrop(img, 75, 75)
+            else:
+                pass
+                
+        small = True
+        if small:
+            img, max_area = object_crop(img, self.train)
+        # print(img.shape)
+        img = cv2.resize(img, (self.image_size, self.image_size))
+        # mask = getMaskImg(img)
+        # mask = cv2.resize(mask, (s, s), interpolation=cv2.INTER_NEAREST)
+
+        img = img.transpose(2,0,1) 
+        img = torch.from_numpy(img).float()
+
+        # inc = torch.LongTensor(mask)
+        inc = torch.Tensor([self.incp[idx]])
+        # inc = torch.Tensor([max_area])
+        if not self.predicted:
+            return img, self.label[idx], inc
+        else:
+            return img, self.id[idx], inc
+
+    def __len__(self):
+        return self.length
+
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    from torchvision import transforms
+    print('dataset main run')
+    transform = transforms.Compose([
+        transforms.ToTensor()  # simply typeas float and divide by 255
+    ])
+    dataset = DataSet(path = '/home/lxg/codedata/ice',
+                    file = 'train_train.json',
+                    train = True,
+                    predicted=True)
+    for idx in range(len(dataset)):
+        img, label = dataset[idx]
+        img = img.numpy()
+        print('idx:', idx, 'label:', label, 'shape:', img.shape)
+        f, (ax1, ax2) = plt.subplots(1,2)
+        ax1.imshow(img[0])
+        ax2.imshow(img[1])
+        f.suptitle(str(label))
+        # plt.show()
+
+        c,w,h = img.shape
+        # img = img.transpose(1,2,0)
+        # filter_img = img
+        filter_img = lee_filter(img)
+        print((filter_img[0] == img[0]).sum())
+        # img = img.transpose(2,1,0)
+        f, (ax1, ax2) = plt.subplots(1,2)
+        ax1.imshow(filter_img[0])
+        ax2.imshow(filter_img[1])
+        f.suptitle('filter_'+str(label))
+        plt.show()
+
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .dataset import *`
	`2`	`+from .tool import object_crop, getmaxmask`