-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbufferKSplit.py
143 lines (112 loc) · 3.38 KB
/
bufferKSplit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#hypothesi = theta0 + theta1*sigma
import numpy as np
import random
import matplotlib.pyplot as plt
import math
import csv
from sklearn.cross_validation import KFold
# m denotes the number of examples here, not the number of features
def gradientDescent(x, y,stdDev, theta, alpha, m, numIterations):
xTrans = x.transpose()
for i in range(0, numIterations):
hypothesis = np.dot(x, theta)#change........
loss = hypothesis - y
cost = np.sum(loss ** 2) / (2 * m)
gradient = np.dot(xTrans, loss) / m
# update
theta = theta - alpha * gradient
return theta
def genData(no1,no2,N,a1,a2,stDev):
numPoints=no1-N-1
x = np.zeros(shape=(numPoints, 2))
y = np.zeros(shape=numPoints)
# basically a straight line
for i in range(0,numPoints):
# bias feature
x[i][0] = 1
x[i][1] = stDev
# our target variable
y[i] = a2[i]#(i + bias) + random.uniform(0, 1) * variance
return x, y
n=0
N=1
no1=0
no2=0
index=[]
stDev=0.0
pid='A'
a1=[]
a2=[]
res=[]
count=0
error=0
err=0
numIterations= 10000
alpha = 0.0005
stDev=0.0
StDev=0.0
with open('data.csv', "rb") as f1:
f1 = [x for x in f1]
for line in f1:
data = line.split(",")
count=count+1
#print pid
#print data[1]
if data[4]=='Not Available':
no2 = no2+1
a1.append(int(0))
a2.append(int(0))
index.append(no1+no2)
elif pid==data[1]:
no1=no1+1
a1.append(int(data[4]))
a2.append(int(data[5]))
index.append(no1+no2)
else:
min_err=100000000000000000
pid=data[1]
a1.append(int(data[4]))
a2.append(int(data[5]))
index.append(no1+no2)
no1=no1+1
x, y = genData(no1-1,no2,N,a1,a2,stDev)
m, n = np.shape(x)
theta = np.ones(n)
kf = KFold(len(x), n_folds=4)
for train, test in kf:
x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
#print("%s %s" % (train, test))
#print("%s %s" % (x_train,y_test))
stDev=np.std(y_train)
theta = gradientDescent(x_train, y_train,stDev, theta, alpha, m, numIterations)
#print(theta)
error=0
err=0
StDev=np.std(y_test)
for i in range(0, len(x_test)):
if(a1[test[i]]>a2[test[i]]+theta[0]+theta[1]*StDev):
error=error+1
err = err + a1[test[i]]-a2[test[i]]-theta[0]-theta[1]*StDev
#print error
#print err
if err<min_err:
thetaMin=theta
min_err = err
minDev=stDev
#stDev=np.std(a1)
theta=thetaMin
print(theta)
for v in range(N,no1+no2):
res.append(math.ceil(theta[0]+theta[1]*stDev))
N=no1+no2
no1=N
no2=0
res.append(0)
np.savetxt("bufferKsplit.csv", res, delimiter=",")
print len(res)
print len(a2)
res2=[]
for v in range(0,len(a2)):
res2.append(res[v]+a2[v])
plt.plot(index,a1,'b',index,res2,'r')
plt.show()