Skip to content

Commit bbae05d

Browse files
committed
linear reg
1 parent bcb23f9 commit bbae05d

File tree

2 files changed

+187
-14
lines changed

2 files changed

+187
-14
lines changed

net_launcher.py

+24-14
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,18 @@
22
import numpy as np
33
import tensorflow as tf
44
from sklearn.model_selection import train_test_split
5+
from time import clock
56

67
class NetLauncher(object):
78

89
def __init__(self, name_csv = 'feature_importanceRF.csv', predict_var ='vmlinux', drop_feature = True,
910
nb_features = 1000, learning_rate1 = 0.5, learning_rate2 = 0.025, nb_node_layer1 = 200,
1011
nb_node_layer2 = 300, batch_size = 50, nb_epochs = 30, training_size = 0.9):
11-
12+
self.name_csv = name_csv
13+
self.nb_features = nb_features
14+
self.drop_feature = drop_feature
1215
f = FeaturesLoader(predict_var = predict_var, name_csv = name_csv, nb_features = nb_features, drop_feature = drop_feature)
13-
1416
self.features = f.get_selected_features()
15-
1617
self.predict_var = predict_var
1718
self.learning_rate1 = learning_rate1
1819
self.learning_rate2 = learning_rate2
@@ -24,7 +25,7 @@ def __init__(self, name_csv = 'feature_importanceRF.csv', predict_var ='vmlinux'
2425

2526
def create_train_test_set(self):
2627

27-
n = 65000
28+
n = 92000
2829
sizes = np.array(self.features[0:n][self.predict_var])
2930
x_train, x_test, y_train, y_test = train_test_split(self.features.drop(self.predict_var, axis=1)[0:n], sizes, test_size = 1-self.training_size)
3031

@@ -36,12 +37,11 @@ def create_train_test_set(self):
3637

3738
return (x_train, y_train, x_test, y_test)
3839

39-
def compute_tiny(self):
40-
#, batch_size=20, nb_epochs=5, learning_rate=1000):
41-
40+
def compute_tiny(self):#, batch_size=20, nb_epochs=5, learning_rate=1000):
41+
e = clock()
4242
batch_size = self.batch_size
4343
nb_epochs = self.nb_epochs
44-
learning_rate = self.learning_rate
44+
learning_rate = self.learning_rate1
4545

4646
training_x, training_y, testing_x, testing_y = self.create_train_test_set()
4747

@@ -90,14 +90,20 @@ def compute_tiny(self):
9090
for i in range(nb_batch_test):
9191
mape_test += sess.run(test_cost)
9292
print("Test final cost =", mape_test / nb_batch_test)
93+
s = clock()
94+
self.save_csv(mape_train / nb_batch_train, mape_test / nb_batch_test, s-e)
9395
return (mape_train / nb_batch_train, mape_test / nb_batch_test)
94-
96+
97+
def save_csv(self, mape_train, mape_test, time):
98+
with open('res.csv','a') as fd:
99+
fd.write('\n' + str(self.name_csv) + ',' + str(self.predict_var) + ',' + str(self.drop_feature) + ',' + str(self.nb_features)+ ',' + str(self.learning_rate1) + ',' + str(self.learning_rate2) + ',' + str(self.nb_node_layer1) + ',' + str(self.nb_node_layer2) + ',' + str(self.batch_size) + ',' + str(self.nb_epochs) + ',' + str(self.training_size) + ',' + str(mape_train) + ',' + str(mape_test) + ',' + str(time))
100+
95101
def compute_small(self):
96102
#batch_size=20, nb_epochs=5, learning_rate=10, nb_node_layer1=200):
97-
103+
e = clock()
98104
batch_size = self.batch_size
99105
nb_epochs = self.nb_epochs
100-
learning_rate = self.learning_rate
106+
learning_rate = self.learning_rate1
101107
nb_node_layer1 = self.nb_node_layer1
102108
training_x, training_y, testing_x, testing_y = self.create_train_test_set()
103109

@@ -153,11 +159,13 @@ def compute_small(self):
153159
for i in range(nb_batch_test):
154160
mape_test += sess.run(test_cost)
155161
print("Test final cost =", mape_test / nb_batch_test)
162+
s = clock()
163+
self.save_csv(mape_train / nb_batch_train, mape_test / nb_batch_test, s-e)
156164
return (mape_train / nb_batch_train, mape_test / nb_batch_test)
157165

158166

159167
def compute_standard(self):
160-
168+
e = clock()
161169
batch_size = self.batch_size
162170
nb_epochs = self.nb_epochs
163171
learning_rate1 = self.learning_rate1
@@ -241,11 +249,13 @@ def compute_standard(self):
241249
for i in range(nb_batch_test):
242250
mape_test += sess.run(test_cost)
243251
print("Test final cost =", mape_test / nb_batch_test)
252+
s = clock()
253+
self.save_csv(mape_train / nb_batch_train, mape_test / nb_batch_test, s-e)
244254
return (mape_train / nb_batch_train, mape_test / nb_batch_test)
245255

246256
def launch(self):
247-
if self.training_size < float(1/65):
257+
if self.training_size < float(1/92):
248258
return self.compute_tiny()
249-
if self.training_size > float(1/65) and self.training_size < float(10/65):
259+
if self.training_size > float(1/92) and self.training_size < float(10/92):
250260
return self.compute_small()
251261
return self.compute_standard()

tristate regression.ipynb

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"Tri-state regression"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import tuxml\n",
17+
"import numpy as np\n",
18+
"from sklearn.model_selection import train_test_split\n",
19+
"import statsmodels.api as sm\n",
20+
"from sklearn.linear_model import LinearRegression"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": 2,
26+
"metadata": {},
27+
"outputs": [],
28+
"source": [
29+
"dataset = tuxml.load_dataset()"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": 3,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": [
38+
"y = np.array(dataset['vmlinux'])\n",
39+
"x_train, x_test, y_train, y_test = train_test_split(dataset.drop('vmlinux', axis=1), y, test_size = 0.1)"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": 4,
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"reg = LinearRegression(fit_intercept=False).fit(x_train, y_train)"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": 5,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": [
57+
"y_pred = reg.predict(x_test)\n",
58+
"mape_test = (y_pred-y_test)/y_test"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": 10,
64+
"metadata": {},
65+
"outputs": [
66+
{
67+
"name": "stdout",
68+
"output_type": "stream",
69+
"text": [
70+
"With the initial encoding, we get 42.11562432800913 % error\n"
71+
]
72+
}
73+
],
74+
"source": [
75+
"print(\"With the initial encoding, we get \", np.mean(100*np.abs(mape_test)), \"% error\")"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": 11,
81+
"metadata": {},
82+
"outputs": [],
83+
"source": [
84+
"dataset = tuxml.load_dataset()\n",
85+
"dataset = dataset.replace([0,1,2],[1,0,0])"
86+
]
87+
},
88+
{
89+
"cell_type": "code",
90+
"execution_count": 12,
91+
"metadata": {},
92+
"outputs": [],
93+
"source": [
94+
"y = np.array(dataset['vmlinux'])\n",
95+
"x_train, x_test, y_train, y_test = train_test_split(dataset.drop('vmlinux', axis=1), y, test_size = 0.1)"
96+
]
97+
},
98+
{
99+
"cell_type": "code",
100+
"execution_count": 13,
101+
"metadata": {},
102+
"outputs": [],
103+
"source": [
104+
"reg = LinearRegression(fit_intercept=False).fit(x_train, y_train)"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": 14,
110+
"metadata": {},
111+
"outputs": [],
112+
"source": [
113+
"y_pred = reg.predict(x_test)\n",
114+
"mape_test = (y_pred-y_test)/y_test"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": 15,
120+
"metadata": {},
121+
"outputs": [
122+
{
123+
"name": "stdout",
124+
"output_type": "stream",
125+
"text": [
126+
"With the new encoding, we get 42.84821700010437 % errors\n"
127+
]
128+
}
129+
],
130+
"source": [
131+
"print(\"With the new encoding, we get \", np.mean(100*np.abs(mape_test)), \"% errors\")"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": null,
137+
"metadata": {},
138+
"outputs": [],
139+
"source": []
140+
}
141+
],
142+
"metadata": {
143+
"kernelspec": {
144+
"display_name": "Python 3",
145+
"language": "python",
146+
"name": "python3"
147+
},
148+
"language_info": {
149+
"codemirror_mode": {
150+
"name": "ipython",
151+
"version": 3
152+
},
153+
"file_extension": ".py",
154+
"mimetype": "text/x-python",
155+
"name": "python",
156+
"nbconvert_exporter": "python",
157+
"pygments_lexer": "ipython3",
158+
"version": "3.7.3"
159+
}
160+
},
161+
"nbformat": 4,
162+
"nbformat_minor": 2
163+
}

0 commit comments

Comments
 (0)