-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBinarize_3split.py
142 lines (116 loc) · 4.39 KB
/
Binarize_3split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pandas as pd
import numpy as np
from initData import *
np.set_printoptions(threshold=np.nan)
newAttrNames = np.array(["wage1", "wage2","wage3","hours1","hours2",\
"hours3","iq1","iq2","iq3","educ1","educ2",\
"educ3","exper1","exper2","exper3","tenure1",\
"tenure2","tenure3","age1","age2","age3",\
"not_black", "black"])
matrix = np.mat(dfMatrix[1:])
y_len = len(matrix[0:,0])
attr_len = len(newAttrNames)
# Create new empty matrix
newMatrix = np.zeros(shape=(y_len,attr_len))
# Wages - binerize by percentile
wages_33_percentile = np.percentile(matrix[0:,0], (1/3)*100)
wages_67_percentile = np.percentile(matrix[0:,0], (2/3)*100)
for i in range(y_len):
if matrix[i,0] < wages_33_percentile:
newMatrix[i,0] = 1
elif wages_33_percentile <= matrix[i,0] < wages_67_percentile:
newMatrix[i,1] = 1
elif wages_67_percentile <= matrix[i,0]:
newMatrix[i,2] = 1
else:
print("Something whent wrong with wages")
# hours - binerize by percentile
hours_33_percentile = np.percentile(matrix[0:,1], (1/3)*100)
hours_67_percentile = np.percentile(matrix[0:,1], (2/3)*100)
for i in range(y_len):
if matrix[i,1] < hours_33_percentile:
newMatrix[i,3] = 1
elif hours_33_percentile <= matrix[i,1] < hours_67_percentile:
newMatrix[i,4] = 1
elif hours_67_percentile <= matrix[i,1]:
newMatrix[i,5] = 1
else:
print("Something whent wrong with hours")
# iq - binerize by percentile
iq_33_percentile = np.percentile(matrix[0:,2], (1/3)*100)
iq_67_percentile = np.percentile(matrix[0:,2], (2/3)*100)
for i in range(y_len):
if matrix[i,2] < iq_33_percentile:
newMatrix[i,6] = 1
elif iq_33_percentile <= matrix[i,2] < iq_67_percentile:
newMatrix[i,7] = 1
elif iq_67_percentile <= matrix[i,2]:
newMatrix[i,8] = 1
else:
print("Something whent wrong with iq")
# educ - binerize by percentile
educ_33_percentile = np.percentile(matrix[0:,3], (1/3)*100)
educ_67_percentile = np.percentile(matrix[0:,3], (2/3)*100)
for i in range(y_len):
if matrix[i,3] < educ_33_percentile:
newMatrix[i,9] = 1
elif educ_33_percentile <= matrix[i,3] < educ_67_percentile:
newMatrix[i,10] = 1
elif educ_67_percentile <= matrix[i,3]:
newMatrix[i,11] = 1
else:
print("Something whent wrong with educ")
# exper - binerize by percentile
exper_33_percentile = np.percentile(matrix[0:,4], (1/3)*100)
exper_67_percentile = np.percentile(matrix[0:,4], (2/3)*100)
for i in range(y_len):
if matrix[i,4] < exper_33_percentile:
newMatrix[i,12] = 1
elif exper_33_percentile <= matrix[i,4] < exper_67_percentile:
newMatrix[i,13] = 1
elif exper_67_percentile <= matrix[i,4]:
newMatrix[i,14] = 1
else:
print("Something whent wrong with exper")
# tenure - binerize by percentile
tenure_33_percentile = np.percentile(matrix[0:,5], (1/3)*100)
tenure_67_percentile = np.percentile(matrix[0:,5], (2/3)*100)
for i in range(y_len):
if matrix[i,5] < tenure_33_percentile:
newMatrix[i,15] = 1
elif tenure_33_percentile <= matrix[i,5] < tenure_67_percentile:
newMatrix[i,16] = 1
elif tenure_67_percentile <= matrix[i,5]:
newMatrix[i,17] = 1
else:
print("Something whent wrong with tenure")
# age - binerize by percentile
age_33_percentile = np.percentile(matrix[0:,6], (1/3)*100)
age_67_percentile = np.percentile(matrix[0:,6], (2/3)*100)
for i in range(y_len):
if matrix[i,6] < age_33_percentile:
newMatrix[i,18] = 1
elif age_33_percentile <= matrix[i,6] < age_67_percentile:
newMatrix[i,19] = 1
elif age_67_percentile <= matrix[i,6]:
newMatrix[i,20] = 1
else:
print("Something whent wrong with age")
# black - binerize by percentile
for i in range(y_len):
if matrix[i,7] < 1:
newMatrix[i,21] = 1
elif 1 == matrix[i,7]:
newMatrix[i,22] = 1
else:
print("Something whent wrong with black")
dictionary = {}
for i in range(y_len):
dictionary[i] = []
for a in range(attr_len):
if newMatrix[i,a] == 1:
dictionary[i].append(a+1)
else:
pass
df = pd.DataFrame.from_dict(dictionary, orient="index")
np.savetxt(r'toolbox_02450/Data/data.txt', df.values, fmt='%d', delimiter=",")