-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
81 lines (62 loc) · 2.54 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
# Import phi from train data set
phi = np.loadtxt('data/train.csv', dtype='float', delimiter=',', skiprows=1,
usecols=tuple(range(1, 14)))
# Import y from train data set
y = np.loadtxt('data/train.csv', dtype='float', delimiter=',', skiprows=1,
usecols=14, ndmin=2)
# Import phi_test from test data set
phi_test = np.loadtxt('data/test.csv', dtype='float', delimiter=',',
skiprows=1, usecols=tuple(range(1, 14)))
# Add a cloloumn of 1s to right of phi and phi_test
phi_test = np.concatenate((phi_test, np.ones((105, 1))), axis=1)
phi = np.concatenate((phi, np.ones((400, 1))), axis=1)
# Min Max scaling for phi and phi_test (Feature Engineering)
for i in range(0, 13):
col_max = max(phi[:, i])
col_min = min(phi[:, i])
phi[:, i] = (phi[:, i] - col_min) / (col_max - col_min)
phi_test[:, i] = (phi_test[:, i] - col_min) / (col_max - col_min)
# Log scaling on y
y = np.log(y)
# Function to calculate change in error function
def delta_w(p, phi, w):
if p == 2:
deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
np.dot(np.transpose(phi), y)) +
lambd * p * np.power(np.absolute(w), (p - 1)))
if p < 2 and p > 1:
deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
np.dot(np.transpose(phi), y)) +
lambd * p * np.power(np.absolute(w), (p - 1)) * np.sign(w))
return deltaw
# Dictionary containing filenames as keys and p as values
filenames = {'output.csv': 2.0,
'output_p1.csv': 1.75,
'output_p2.csv': 1.5,
'output_p3.csv': 1.3
}
# For each item in this dictionary
for (fname, p) in filenames.items():
# Set initial w to zeros
w = np.zeros((14, 1))
# Hyperparameter lambda value
lambd = 0.2
# Maximum step size
t = 0.00012
# Calculate new value of w
w_new = w - t * delta_w(p, phi, w)
i = 0
# Repeat steps until error between consecutive w is less than threshold
while(np.linalg.norm(w_new-w) > 10 ** -10):
w = w_new
w_new = w - t * delta_w(p, phi, w)
i = i + 1
# Load values of id
id_test = np.loadtxt('data/test.csv', dtype='int', delimiter=',',
skiprows=1, usecols=0, ndmin=2)
# Calculate y for test data
y_test = np.exp(np.dot(phi_test, w_new))
# Save the ids and y
np.savetxt(fname, np.concatenate((id_test, y_test), axis=1),
delimiter=',', fmt=['%d', '%f'], header='ID,MEDV', comments='')