-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommon.py
112 lines (93 loc) · 3.37 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import csv
import time
from functools import reduce
from sys import stderr
import matplotlib.pyplot as plt
import numpy as np
from bgd import bgd
from lsm import lsm
# Env Settings
CSV_FILE_NAME = 'houses_info.csv'
NUM_ITEMS = 2998
NUM_TESTING_ITEMS = int(NUM_ITEMS * 0.3)
NUM_FEATURES = 19
def csv2dataset(csv_file, num_test):
""" divide csv data into training | testing sets """
x = []
y = []
with open(csv_file) as f:
rdr = csv.reader(f)
next(rdr) # skip table head
# retrieve X (factors) and Y (total prices)
for line in rdr:
xline = [1.0] # a1*x^0 == a1*1
for s in line[1:]: # skip (total) price in csv
xline.append(float(s))
x.append(xline)
y.append(float(line[0]))
x_train = np.array(x[: len(x) - num_test])
y_train = np.transpose(np.array(y[: len(y) - num_test]))
x_test = np.array(x[len(x) - num_test:])
y_test = np.transpose(np.array(y[len(y) - num_test:]))
return x_train, y_train, x_test, y_test
def train_model(X_train, Y_train, optimizer):
"""@:return weights """
tic = time.time()
weights = None
if optimizer == 'lsm':
weights = lsm(X_train, Y_train)
elif optimizer == 'bgd':
weights = bgd(X_train, Y_train)
else:
print('optimizers available: \'lsm\' or \'bgd\'', file=stderr)
exit(1)
print('--------------------')
toc = time.time()
print('time elapsed: {0:.2f} sec'.format(toc - tic))
print('[' + optimizer + '] weights: ' + str(weights), end='\n\n')
return weights, toc - tic
def test_model(X_test, Y_test, weights, time_cost=None):
""" @:return avg. error rate (%) """
ers = []
for x, y in zip(X_test, Y_test):
pred = np.dot(x, weights)
er = abs(y - pred) / y * 100
print('pred: ¥{:.2f}\t\t'.format(pred, ) + 'actual: ¥{:.0f}\t\t'.format(y) + 'error: {:.3f}%'.format(er))
ers.append(er)
print('--------------------')
print('avg. error: {:.3f}%'.format(reduce(lambda a, b: a + b, ers) / len(ers)))
if time_cost:
print('training time cost: {:.2f} sec'.format(time_cost))
def plot_prediction(X_test, Y_test, weights):
""" represent errors in prediction """
# pick up 2 key factors
X1, X2 = [], []
Y_pred = []
for data in X_test:
X1.append(data[1]) # unit prices
X2.append(data[7]) # construction areas
x = np.array(data)
prediction = np.dot(x, weights)
Y_pred.append(prediction)
X1 = np.array(X1)
X2 = np.array(X2)
Y_pred = np.array(Y_pred)
# Figure 1
plt.figure(num=1)
plt.title('Total Prices vs. Unit Prices')
plt.scatter(X1, Y_test, s=50, label='real value', alpha=0.5)
plt.scatter(X1, Y_pred, s=50, label='prediction', alpha=0.5)
plt.legend(loc='best')
ax1 = plt.gca()
ax1.spines['right'].set_color('none')
ax1.spines['top'].set_color('none')
# Figure 2
plt.figure(num=2)
plt.title('Total Prices vs. Construction Areas')
plt.scatter(X2, Y_test, s=50, label='real value', alpha=0.5)
plt.scatter(X2, Y_pred, s=50, label='prediction', alpha=0.5)
plt.legend(loc='upper left')
ax2 = plt.gca()
ax2.spines['right'].set_color('none')
ax2.spines['top'].set_color('none')
plt.show()