-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbalance_training_data.py
62 lines (45 loc) · 4.23 KB
/
balance_training_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
import pandas as pd
from collections import Counter
from random import shuffle
import cv2
import time
train_data = np.load('training_data_grey.npy')
df = pd.DataFrame(train_data)
print(Counter(df[1].apply(str)))
print(len(train_data))
w = []
a = []
d = []
s = []
n = []
final_data = []
for data in train_data:
img = data[0]
choice = data[1]
if choice == [1, 0, 0, 0, 0]:
w.append([img, choice])
elif choice == [0, 1, 0, 0, 0]:
a.append([img, choice])
elif choice == [0, 0, 1, 0, 0]:
d.append([img, choice])
elif choice == [0, 0, 0, 1, 0]:
s.append([img, choice])
elif choice == [0, 0, 0, 0, 1]:
n.append([img, choice])
else:
print("No matches! (imposible)")
print(choice)
w = w[:len(a)][:len(d)][:len(n)]#[:len(s)]
a = a[:len(d)][:len(n)][:len(w)]
d = d[:len(a)][:len(n)][:len(w)]
n = n[:len(a)][:len(d)][:len(w)]
#s = s[:len(a)][:len(d)][:len(n)]]
#data_data = w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + w + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + d + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n + n+ n + n + n + n + n + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s + s+ s + s + s + s + s
#final_data = data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data + data_data
final_data = w + a + d + s + n
shuffle(final_data)
print(len(final_data))
df = pd.DataFrame(final_data)
print(Counter(df[1].apply(str)))
np.save('training_data_grey_shuffled.npy', final_data)