Skip to content

Commit 0e22c94

Browse files
committed
first commit after the competition end one month
0 parents  commit 0e22c94

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+3661
-0
lines changed

__pycache__/convNet.cpython-36.pyc

1.52 KB
Binary file not shown.

__pycache__/dataset.cpython-36.pyc

3.69 KB
Binary file not shown.

__pycache__/tool.cpython-36.pyc

1.48 KB
Binary file not shown.

data/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .dataset import *
2+
from .tool import object_crop, getmaxmask
207 Bytes
Binary file not shown.
4.24 KB
Binary file not shown.

data/__pycache__/tool.cpython-36.pyc

2.42 KB
Binary file not shown.

data/data.py

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# https://www.kaggle.com/nanigans/pytorch-starter/notebook
2+
import os
3+
import math
4+
import pandas as pd
5+
import numpy as np
6+
import matplotlib.pyplot as plt
7+
8+
# 1604 item total
9+
# band_1,2 id, inc_angle, is_ice
10+
path = '/home/lxg/codedata/ice/'
11+
data = pd.read_json(os.path.join(path, 'train.json'))
12+
13+
data['band_1'] = data['band_1'].apply(lambda x: np.array(x).reshape(75,75))
14+
data['band_2'] = data['band_2'].apply(lambda x: np.array(x).reshape(75,75))
15+
# band_1 min-34.715858, max3.98
16+
# band_2 min-35.403362, max-6.934982
17+
18+
data['inc_angle'] = pd.to_numeric(data['inc_angle'], errors='coerce') # lack data is filled with na
19+
# inc_angle 1604, 1471 notnan 133 nan, min24.75, max45.9, mean39.26
20+
# 753 True, 851 False
21+
# split
22+
# train = data.sample(frac=0.8)
23+
# val = data[~data.isin(train)].dropna()
24+
25+
def plotSample(df, idx):
26+
c = ('ship', 'ice')
27+
f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
28+
ax1.imshow(df['band_1'].iloc[idx])
29+
ax2.imshow(df['band_2'].iloc[idx])
30+
ax3.hist(df['band_1'].iloc[idx].ravel(), bins=256, fc='k', ec='k')
31+
ax4.hist(df['band_2'].iloc[idx].ravel(), bins=256, fc='k', ec='k')
32+
f.set_figheight(10)
33+
f.set_figwidth(10)
34+
plt.suptitle(str(df['inc_angle'].iloc[idx])+c[df['is_iceberg'].iloc[idx]])
35+
plt.show()
36+
37+
def plotMinMax(df):
38+
min_max = pd.DataFrame()
39+
min_max['min_1'] = data['band_1'].apply(lambda x: x.min())
40+
min_max['max_1'] = data['band_1'].apply(lambda x: x.max())
41+
min_max['min_2'] = data['band_2'].apply(lambda x: x.min())
42+
min_max['max_2'] = data['band_2'].apply(lambda x: x.max())
43+
f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
44+
ax1.hist(min_max['min_1'])
45+
ax2.hist(min_max['max_1'])
46+
ax3.hist(min_max['min_2'])
47+
ax4.hist(min_max['max_2'])
48+
f.set_figheight(20)
49+
f.set_figwidth(20)
50+
plt.show()
51+
52+
def splitSave(df):
53+
train = df.sample(frac=0.8)
54+
val = df[~df.isin(train)].dropna()
55+
train.to_json(os.path.join(path, 'train_train.json'))
56+
val.to_json(os.path.join(path, 'train_val.json'))
57+
print('split done')
58+
59+
def amplitudeSplit(df):
60+
'''
61+
according to angle value, splite the band_1、band_2
62+
'''
63+
df['angle'] = df['inc_angle'].apply(lambda x: 45 if math.isnan(x) else x)
64+
65+
66+
def splitAndSaveTest():
67+
test = pd.read_json(os.path.join(path, 'test.json'))
68+
test['band_1'] = test['band_1'].apply(lambda x: np.array(x).reshape(75,75))
69+
test['band_2'] = test['band_2'].apply(lambda x: np.array(x).reshape(75,75))
70+
test['inc_angle'] = pd.to_numeric(test['inc_angle'], errors='coerce')
71+
72+
length = test.shape[0]
73+
print('total', test.shape)
74+
test1 = test[0:length/3]
75+
test2 = test[length/3:length*2/3]
76+
test3 = test[length*2/3:]
77+
print(test1.shape[0]+test2.shape[0]+test3.shape[0])
78+
79+
test1.to_json(os.path.join(path, 'test1.json'))
80+
test2.to_json(os.path.join(path, 'test2.json'))
81+
test3.to_json(os.path.join(path, 'test3.json'))
82+
83+
if __name__ == '__main__':
84+
# splitAndSaveTest()
85+
splitSave(data)
86+
87+
# plotMinMax(data)
88+
89+
# for i in range(100,200):
90+
# plotSample(data, i)
91+
# i += 1
92+
# data.to_json(os.path.join(path, 'train_clean.json'))
93+
# pass

data/dataset.py

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import os
2+
import cv2
3+
import random
4+
import pandas as pd
5+
import numpy as np
6+
import torch.utils.data as data
7+
import torch
8+
from .tool import randomCrop, rotate, lee_filter, object_crop, getMaskImg
9+
10+
def read_clean(path, file, predicted=False):
11+
'''
12+
train and test prepare
13+
return:
14+
full_img_tr: numpy
15+
data['is_iceberg']: numpy
16+
list(data['id']): list
17+
'''
18+
data = pd.read_json(os.path.join(path, file))
19+
# data = data[data['mask_size'] < 99.0001]
20+
21+
band_1_tr = np.concatenate([im for im in data['band_1']]).reshape(-1, 75, 75)
22+
band_2_tr = np.concatenate([im for im in data['band_2']]).reshape(-1, 75, 75)
23+
# band_3_tr = (band_1_tr**2 + band_2_tr**2) / 2
24+
# full_img_tr = np.stack([band_1_tr, band_2_tr, band_3_tr], axis=1) # 1604,2,75,75
25+
full_img_tr = np.stack([band_1_tr, band_2_tr], axis=1) # 1604,2,75,75
26+
full_img_tr = full_img_tr.transpose(0,2,3,1)
27+
28+
inc_angle = data['inc_angle'].values
29+
inc_angle[np.isnan(inc_angle)] = 0#39.26 #replace nan with mean of inc_angle
30+
# inc_angle = (inc_angle-39.26)*10 # normalise
31+
32+
if not predicted:
33+
return full_img_tr, data['is_iceberg'].values, inc_angle
34+
else:
35+
return full_img_tr, list(data['id']), inc_angle
36+
37+
class train_cross():
38+
'''
39+
N folder cross verify
40+
'''
41+
def __init__(self, train, label, inc_angle, num):
42+
'''
43+
num: split set number
44+
'''
45+
self.length = train.shape[0]
46+
self.num = num
47+
self.data = train
48+
self.label = label
49+
self.inc_angle = inc_angle
50+
self.image_list = list(range(self.length))
51+
random.shuffle(self.image_list) # replace
52+
53+
def getset(self, ids):
54+
span = self.length / self.num
55+
first_index = int(ids*span)
56+
57+
if ids is not self.num-1:
58+
test_list = self.image_list[first_index:int((ids+1)*span)]
59+
else:
60+
test_list = self.image_list[first_index:]
61+
62+
image_test = self.data[test_list]
63+
lab_test = self.label[test_list]
64+
inc_test = self.inc_angle[test_list]
65+
66+
train_list = list(set(self.image_list) - set(test_list))
67+
image_train = self.data[train_list]
68+
lab_train = self.label[train_list]
69+
inc_train = self.inc_angle[train_list]
70+
71+
return image_train, lab_train, inc_train, image_test, lab_test, inc_test
72+
73+
class DataSet(data.Dataset):
74+
def __init__(self, datap, labelp, incp, train, predicted=False):
75+
self.image_size = 40 #20 #40 #75 #40 #75
76+
self.data = datap
77+
self.incp = incp
78+
self.predicted = predicted
79+
self.length = datap.shape[0]
80+
self.train = train
81+
if(not predicted):
82+
self.label = labelp
83+
self.id = []
84+
else:
85+
self.label = []
86+
self.id = labelp
87+
88+
def __getitem__(self, idx):
89+
img = self.data[idx] # WxHxC
90+
91+
# substract min value, for resnet18
92+
# img -= img.min()
93+
94+
# take the opposite
95+
# img = 0 - img
96+
97+
# speckle filter
98+
# img = lee_filter(img)
99+
100+
# pca whitening https://github.com/RobotLiu2015/machine-learning/tree/master/PCA%20and%20Whitening
101+
102+
if self.train:
103+
104+
# if random.random() < 0.5:
105+
# # add speckle noise(https://stackoverflow.com/questions/22937589/how-to-add-noise-gaussian-salt-and-pepper-etc-to-image-in-python-with-opencv)
106+
# row,col,ch = img.shape
107+
# gauss = np.random.randn(row,col,ch)
108+
# gauss = gauss.reshape(row,col,ch)
109+
# noisy = img + img * gauss
110+
111+
# if random.random() < 0.5:
112+
# # salter and pepper
113+
# row,col,ch = img.shape
114+
# s_vs_p = 0.5
115+
# amount = 0.004
116+
# out = np.copy(img)
117+
# # Salt mode
118+
# num_salt = np.ceil(amount * img.size * s_vs_p)
119+
# coords = [np.random.randint(0, i - 1, int(num_salt))
120+
# for i in img.shape]
121+
# out[coords] = 1
122+
123+
# # Pepper mode
124+
# num_pepper = np.ceil(amount* img.size * (1. - s_vs_p))
125+
# coords = [np.random.randint(0, i - 1, int(num_pepper))
126+
# for i in img.shape]
127+
# out[coords] = 0
128+
# img = out
129+
130+
if random.random() < 0.5:
131+
img = np.fliplr(img)
132+
133+
# if random.random() < 0.5:
134+
# angle = random.uniform(-20,20) # 20
135+
# img = rotate(img, angle)
136+
137+
if random.random() < 0.3:
138+
img = cv2.resize(img, (85,85))
139+
img = randomCrop(img, 75, 75)
140+
elif random.random() < 0.6:
141+
img = np.pad(img, ((7,7),(7,7),(0,0)), 'reflect')
142+
img = randomCrop(img, 75, 75)
143+
else:
144+
pass
145+
146+
small = True
147+
if small:
148+
img, max_area = object_crop(img, self.train)
149+
# print(img.shape)
150+
img = cv2.resize(img, (self.image_size, self.image_size))
151+
# mask = getMaskImg(img)
152+
# mask = cv2.resize(mask, (s, s), interpolation=cv2.INTER_NEAREST)
153+
154+
img = img.transpose(2,0,1)
155+
img = torch.from_numpy(img).float()
156+
157+
# inc = torch.LongTensor(mask)
158+
inc = torch.Tensor([self.incp[idx]])
159+
# inc = torch.Tensor([max_area])
160+
if not self.predicted:
161+
return img, self.label[idx], inc
162+
else:
163+
return img, self.id[idx], inc
164+
165+
def __len__(self):
166+
return self.length
167+
168+
169+
if __name__ == '__main__':
170+
import matplotlib.pyplot as plt
171+
from torchvision import transforms
172+
print('dataset main run')
173+
transform = transforms.Compose([
174+
transforms.ToTensor() # simply typeas float and divide by 255
175+
])
176+
dataset = DataSet(path = '/home/lxg/codedata/ice',
177+
file = 'train_train.json',
178+
train = True,
179+
predicted=True)
180+
for idx in range(len(dataset)):
181+
img, label = dataset[idx]
182+
img = img.numpy()
183+
print('idx:', idx, 'label:', label, 'shape:', img.shape)
184+
f, (ax1, ax2) = plt.subplots(1,2)
185+
ax1.imshow(img[0])
186+
ax2.imshow(img[1])
187+
f.suptitle(str(label))
188+
# plt.show()
189+
190+
c,w,h = img.shape
191+
# img = img.transpose(1,2,0)
192+
# filter_img = img
193+
filter_img = lee_filter(img)
194+
print((filter_img[0] == img[0]).sum())
195+
# img = img.transpose(2,1,0)
196+
f, (ax1, ax2) = plt.subplots(1,2)
197+
ax1.imshow(filter_img[0])
198+
ax2.imshow(filter_img[1])
199+
f.suptitle('filter_'+str(label))
200+
plt.show()
201+
202+

data/dataset.pyc

3.13 KB
Binary file not shown.

0 commit comments

Comments
 (0)