Skip to content

Commit 072dd4d

Browse files
python 3
1 parent 271399a commit 072dd4d

11 files changed

+127
-51
lines changed

supervised_class2/adaboost.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
22
# https://www.udemy.com/machine-learning-in-python-random-forest-adaboost
3+
from __future__ import print_function, division
4+
from builtins import range, input
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
39
import numpy as np
410
import matplotlib.pyplot as plt
511
from sklearn.tree import DecisionTreeClassifier
@@ -17,7 +23,7 @@ def fit(self, X, Y):
1723
N, _ = X.shape
1824
W = np.ones(N) / N
1925

20-
for m in xrange(self.M):
26+
for m in range(self.M):
2127
tree = DecisionTreeClassifier(max_depth=1)
2228
tree.fit(X, Y, sample_weight=W)
2329
P = tree.predict(X)
@@ -60,14 +66,14 @@ def score(self, X, Y):
6066
train_errors = np.empty(T)
6167
test_losses = np.empty(T)
6268
test_errors = np.empty(T)
63-
for num_trees in xrange(T):
69+
for num_trees in range(T):
6470
if num_trees == 0:
6571
train_errors[num_trees] = None
6672
test_errors[num_trees] = None
6773
test_losses[num_trees] = None
6874
continue
6975
if num_trees % 20 == 0:
70-
print num_trees
76+
print(num_trees)
7177

7278
model = AdaBoost(num_trees)
7379
model.fit(Xtrain, Ytrain)
@@ -78,8 +84,8 @@ def score(self, X, Y):
7884
test_losses[num_trees] = loss
7985

8086
if num_trees == T - 1:
81-
print "final train error:", 1 - acc_train
82-
print "final test error:", 1 - acc
87+
print("final train error:", 1 - acc_train)
88+
print("final test error:", 1 - acc)
8389

8490
plt.plot(test_errors, label='test errors')
8591
plt.plot(test_losses, label='test losses')

supervised_class2/bagging_classification.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
22
# https://www.udemy.com/machine-learning-in-python-random-forest-adaboost
3+
from __future__ import print_function, division
4+
from builtins import range, input
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
39
import numpy as np
410
import matplotlib.pyplot as plt
511
from sklearn.tree import DecisionTreeClassifier
@@ -34,7 +40,7 @@
3440
# lone decision tree
3541
model = DecisionTreeClassifier()
3642
model.fit(X, Y)
37-
print "score for 1 tree:", model.score(X, Y)
43+
print("score for 1 tree:", model.score(X, Y))
3844

3945
# plot data with boundary
4046
plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)
@@ -50,7 +56,7 @@ def __init__(self, B):
5056
def fit(self, X, Y):
5157
N = len(X)
5258
self.models = []
53-
for b in xrange(self.B):
59+
for b in range(self.B):
5460
idx = np.random.choice(N, size=N, replace=True)
5561
Xb = X[idx]
5662
Yb = Y[idx]
@@ -74,7 +80,7 @@ def score(self, X, Y):
7480
model = BaggedTreeClassifier(200)
7581
model.fit(X, Y)
7682

77-
print "score for bagged model:", model.score(X, Y)
83+
print("score for bagged model:", model.score(X, Y))
7884

7985
# plot data with boundary
8086
plt.scatter(X[:,0], X[:,1], s=100, c=Y, alpha=0.5)

supervised_class2/bagging_regression.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
22
# https://www.udemy.com/machine-learning-in-python-random-forest-adaboost
3+
from __future__ import print_function, division
4+
from builtins import range, input
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
39
import numpy as np
410
import matplotlib.pyplot as plt
511
from sklearn.tree import DecisionTreeRegressor
@@ -21,7 +27,7 @@
2127
model = DecisionTreeRegressor()
2228
model.fit(Xtrain, Ytrain)
2329
prediction = model.predict(x_axis.reshape(T, 1))
24-
print "score for 1 tree:", model.score(x_axis.reshape(T, 1), y_axis)
30+
print("score for 1 tree:", model.score(x_axis.reshape(T, 1), y_axis))
2531

2632
# plot the lone decision tree's predictions
2733
plt.plot(x_axis, prediction)
@@ -36,7 +42,7 @@ def __init__(self, B):
3642
def fit(self, X, Y):
3743
N = len(X)
3844
self.models = []
39-
for b in xrange(self.B):
45+
for b in range(self.B):
4046
idx = np.random.choice(N, size=N, replace=True)
4147
Xb = X[idx]
4248
Yb = Y[idx]
@@ -59,7 +65,7 @@ def score(self, X, Y):
5965

6066
model = BaggedTreeRegressor(200)
6167
model.fit(Xtrain, Ytrain)
62-
print "score for bagged tree:", model.score(x_axis.reshape(T, 1), y_axis)
68+
print("score for bagged tree:", model.score(x_axis.reshape(T, 1), y_axis))
6369
prediction = model.predict(x_axis.reshape(T, 1))
6470

6571
# plot the bagged regressor's predictions

supervised_class2/bias_variance_demo.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
22
# https://www.udemy.com/machine-learning-in-python-random-forest-adaboost
3+
from __future__ import print_function, division
4+
from builtins import range, input
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
39
import numpy as np
410
import matplotlib.pyplot as plt
511
from sklearn.linear_model import LinearRegression
@@ -17,7 +23,7 @@
1723
def make_poly(x, D):
1824
N = len(x)
1925
X = np.empty((N, D+1))
20-
for d in xrange(D+1):
26+
for d in range(D+1):
2127
X[:,d] = x**d
2228
if d > 1:
2329
X[:,d] = (X[:,d] - X[:,d].mean()) / X[:,d].std()
@@ -53,7 +59,7 @@ def f(X):
5359
# create the model
5460
model = LinearRegression()
5561

56-
for k in xrange(NUM_DATASETS):
62+
for k in range(NUM_DATASETS):
5763
Y = f_X + np.random.randn(N)*NOISE_VARIANCE
5864

5965
Xtrain = Xpoly[:Ntrain]
@@ -62,7 +68,7 @@ def f(X):
6268
Xtest = Xpoly[Ntrain:]
6369
Ytest = Y[Ntrain:]
6470

65-
for d in xrange(MAX_POLY):
71+
for d in range(MAX_POLY):
6672
model.fit(Xtrain[:,:d+2], Ytrain)
6773
predictions = model.predict(Xpoly[:,:d+2])
6874

@@ -87,8 +93,8 @@ def f(X):
8793

8894
# show all prediction curves for each polynomial degree
8995
# along with the mean curve
90-
for d in xrange(MAX_POLY):
91-
for k in xrange(NUM_DATASETS):
96+
for d in range(MAX_POLY):
97+
for k in range(NUM_DATASETS):
9298
plt.plot(x_axis, prediction_curves[:,k,d], color='green', alpha=0.5)
9399
plt.plot(x_axis, prediction_curves[:,:,d].mean(axis=1), color='blue', linewidth=2.0)
94100
plt.title("All curves for degree = %d" % (d+1))
@@ -98,15 +104,15 @@ def f(X):
98104
avg_train_prediction = np.zeros((Ntrain, MAX_POLY))
99105
squared_bias = np.zeros(MAX_POLY)
100106
f_Xtrain = f_X[:Ntrain]
101-
for d in xrange(MAX_POLY):
102-
for i in xrange(Ntrain):
107+
for d in range(MAX_POLY):
108+
for i in range(Ntrain):
103109
avg_train_prediction[i,d] = train_predictions[i,:,d].mean()
104110
squared_bias[d] = ((avg_train_prediction[:,d] - f_Xtrain)**2).mean()
105111

106112
# calculate the variance
107113
variances = np.zeros((Ntrain, MAX_POLY))
108-
for d in xrange(MAX_POLY):
109-
for i in xrange(Ntrain):
114+
for d in range(MAX_POLY):
115+
for i in range(Ntrain):
110116
delta = train_predictions[i,:,d] - avg_train_prediction[i,d]
111117
variances[i,d] = delta.dot(delta) / N
112118
variance = variances.mean(axis=0)

supervised_class2/bootstrap.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
22
# https://www.udemy.com/machine-learning-in-python-random-forest-adaboost
3+
from __future__ import print_function, division
4+
from builtins import range, input
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
39
import numpy as np
410
import matplotlib.pyplot as plt
511
from scipy.stats import norm, t
@@ -8,10 +14,10 @@
814
N = 20
915
X = np.random.randn(N)
1016

11-
print "sample mean of X:", X.mean()
17+
print("sample mean of X:", X.mean())
1218

1319
individual_estimates = np.empty(B)
14-
for b in xrange(B):
20+
for b in range(B):
1521
sample = np.random.choice(X, size=N)
1622
individual_estimates[b] = sample.mean()
1723

@@ -25,7 +31,7 @@
2531
lower2 = X.mean() + norm.ppf(0.025)*X.std()/np.sqrt(N)
2632
upper2 = X.mean() + norm.ppf(0.975)*X.std()/np.sqrt(N)
2733

28-
print "bootstrap mean of X:", bmean
34+
print("bootstrap mean of X:", bmean)
2935

3036
plt.hist(individual_estimates, bins=20)
3137
plt.axvline(x=lower, linestyle='--', color='g', label="lower bound for 95%% CI (bootstrap)")

supervised_class2/knn_dt_demo.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
22
# https://www.udemy.com/machine-learning-in-python-random-forest-adaboost
3+
from __future__ import print_function, division
4+
from builtins import range, input
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
39
import numpy as np
410
import matplotlib.pyplot as plt
511
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
@@ -27,6 +33,7 @@
2733

2834
plt.scatter(Xtrain, Ytrain, s=50, alpha=0.7, c='blue')
2935
plt.scatter(Xtrain, model.predict(Xtrain.reshape(Ntrain, 1)), s=50, alpha=0.7, c='green')
36+
plt.title("decision tree - low bias, high variance")
3037
# plt.show()
3138

3239
# plt.scatter(X, Y)
@@ -46,6 +53,7 @@
4653
plt.scatter(Xtrain, model.predict(Xtrain.reshape(Ntrain, 1)), s=50, alpha=0.7, c='green')
4754
plt.plot(Xaxis, Yaxis)
4855
plt.plot(Xaxis, model.predict(Xaxis.reshape(T, 1)))
56+
plt.title("decision tree - high bias, low variance")
4957
plt.show()
5058

5159

@@ -57,6 +65,7 @@
5765
plt.scatter(Xtrain, model.predict(Xtrain.reshape(Ntrain, 1)), s=50, alpha=0.7, c='green')
5866
plt.plot(Xaxis, Yaxis)
5967
plt.plot(Xaxis, model.predict(Xaxis.reshape(T, 1)))
68+
plt.title("knn - low bias, high variance")
6069
plt.show()
6170

6271
# knn - high bias, low variance
@@ -67,6 +76,7 @@
6776
plt.scatter(Xtrain, model.predict(Xtrain.reshape(Ntrain, 1)), s=50, alpha=0.7, c='green')
6877
plt.plot(Xaxis, Yaxis)
6978
plt.plot(Xaxis, model.predict(Xaxis.reshape(T, 1)))
79+
plt.title("knn - high bias, low variance")
7080
plt.show()
7181

7282

@@ -76,10 +86,10 @@
7686
N = 100
7787
D = 2
7888
X = np.random.randn(N, D)
79-
X[:N/2] += np.array([1, 1]) # center it at (1,1)
80-
X[N/2:] += np.array([-1, -1]) # center it at (-1, -1)
89+
X[:N//2] += np.array([1, 1]) # center it at (1,1)
90+
X[N//2:] += np.array([-1, -1]) # center it at (-1, -1)
8191

82-
Y = np.array([0]*(N/2) + [1]*(N/2))
92+
Y = np.array([0]*(N//2) + [1]*(N//2))
8393

8494

8595
def plot_decision_boundary(X, model):
@@ -110,6 +120,7 @@ def plot_decision_boundary(X, model):
110120

111121
plt.scatter(X[:,0], X[:,1], s=50, c=Y, alpha=0.7)
112122
plot_decision_boundary(X, model)
123+
plt.title("dt - low bias, high variance")
113124
plt.show()
114125

115126
# dt - high bias, low variance
@@ -118,6 +129,7 @@ def plot_decision_boundary(X, model):
118129

119130
plt.scatter(X[:,0], X[:,1], s=50, c=Y, alpha=0.7)
120131
plot_decision_boundary(X, model)
132+
plt.title("dt - high bias, low variance")
121133
plt.show()
122134

123135

@@ -127,6 +139,7 @@ def plot_decision_boundary(X, model):
127139

128140
plt.scatter(X[:,0], X[:,1], s=50, c=Y, alpha=0.7)
129141
plot_decision_boundary(X, model)
142+
plt.title("knn - low bias, high variance")
130143
plt.show()
131144

132145
# knn - high bias, low variance
@@ -135,4 +148,5 @@ def plot_decision_boundary(X, model):
135148

136149
plt.scatter(X[:,0], X[:,1], s=50, c=Y, alpha=0.7)
137150
plot_decision_boundary(X, model)
151+
plt.title("knn - high bias, low variance")
138152
plt.show()

supervised_class2/rf_classification.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
# mushroom data from:
44
# https://archive.ics.uci.edu/ml/datasets/Mushroom
55
# put all files in the folder ../large_files/
6+
from __future__ import print_function, division
7+
from future.utils import iteritems
8+
from builtins import range, input
9+
# Note: you may need to update your version of future
10+
# sudo pip install -U future
11+
12+
613
import numpy as np
714
import pandas as pd
815
import matplotlib.pyplot as plt
@@ -39,19 +46,19 @@ def fit(self, df):
3946

4047
# find dimensionality
4148
self.D = len(NUMERICAL_COLS)
42-
for col, encoder in self.labelEncoders.iteritems():
49+
for col, encoder in iteritems(self.labelEncoders):
4350
self.D += len(encoder.classes_)
44-
print "dimensionality:", self.D
51+
print("dimensionality:", self.D)
4552

4653
def transform(self, df):
4754
N, _ = df.shape
4855
X = np.zeros((N, self.D))
4956
i = 0
50-
for col, scaler in self.scalers.iteritems():
57+
for col, scaler in iteritems(self.scalers):
5158
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
5259
i += 1
5360

54-
for col, encoder in self.labelEncoders.iteritems():
61+
for col, encoder in iteritems(self.labelEncoders):
5562
# print "transforming col:", col
5663
K = len(encoder.classes_)
5764
X[np.arange(N), encoder.transform(df[col]) + i] = 1
@@ -73,7 +80,7 @@ def replace_missing(df):
7380
# set a special value = 'missing'
7481
for col in CATEGORICAL_COLS:
7582
if np.any(df[col].isnull()):
76-
print col
83+
print(col)
7784
df.loc[ df[col].isnull(), col ] = 'missing'
7885

7986

@@ -100,11 +107,11 @@ def get_data():
100107

101108
# do a quick baseline test
102109
baseline = LogisticRegression()
103-
print "CV baseline:", cross_val_score(baseline, X, Y, cv=8).mean()
110+
print("CV baseline:", cross_val_score(baseline, X, Y, cv=8).mean())
104111

105112
# single tree
106113
tree = DecisionTreeClassifier()
107-
print "CV one tree:", cross_val_score(tree, X, Y, cv=8).mean()
114+
print("CV one tree:", cross_val_score(tree, X, Y, cv=8).mean())
108115

109116
model = RandomForestClassifier(n_estimators=20) # try 10, 20, 50, 100, 200
110-
print "CV forest:", cross_val_score(model, X, Y, cv=8).mean()
117+
print("CV forest:", cross_val_score(model, X, Y, cv=8).mean())

0 commit comments

Comments
 (0)