Skip to content

Commit be04d8c

Browse files
committed
add gaussian nb unsupervised deep learning
1 parent 273b0db commit be04d8c

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

unsupervised_class2/gaussian_nb.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
2+
# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
3+
from __future__ import print_function, division
4+
from future.utils import iteritems
5+
from builtins import range, input
6+
# Note: you may need to update your version of future
7+
# sudo pip install -U future
8+
9+
10+
import numpy as np
11+
from sklearn.decomposition import PCA
12+
# from sklearn.naive_bayes import GaussianNB # doesn't have smoothing
13+
from scipy.stats import norm
14+
from scipy.stats import multivariate_normal as mvn
15+
from util import getKaggleMNIST
16+
17+
18+
class GaussianNB(object):
19+
def fit(self, X, Y, smoothing=1e-2):
20+
self.gaussians = dict()
21+
self.priors = dict()
22+
labels = set(Y)
23+
for c in labels:
24+
current_x = X[Y == c]
25+
self.gaussians[c] = {
26+
'mean': current_x.mean(axis=0),
27+
'var': current_x.var(axis=0) + smoothing,
28+
}
29+
self.priors[c] = float(len(Y[Y == c])) / len(Y)
30+
31+
def score(self, X, Y):
32+
P = self.predict(X)
33+
return np.mean(P == Y)
34+
35+
def predict(self, X):
36+
N, D = X.shape
37+
K = len(self.gaussians)
38+
P = np.zeros((N, K))
39+
for c, g in iteritems(self.gaussians):
40+
mean, var = g['mean'], g['var']
41+
P[:,c] = mvn.logpdf(X, mean=mean, cov=var) + np.log(self.priors[c])
42+
return np.argmax(P, axis=1)
43+
44+
45+
# get data
46+
Xtrain, Ytrain, Xtest, Ytest = getKaggleMNIST()
47+
48+
# try NB by itself
49+
model1 = GaussianNB()
50+
model1.fit(Xtrain, Ytrain)
51+
print("NB train score:", model1.score(Xtrain, Ytrain))
52+
print("NB test score:", model1.score(Xtest, Ytest))
53+
54+
# try NB with PCA first
55+
pca = PCA(n_components=50)
56+
Ztrain = pca.fit_transform(Xtrain)
57+
Ztest = pca.transform(Xtest)
58+
59+
model2 = GaussianNB()
60+
model2.fit(Ztrain, Ytrain)
61+
print("NB+PCA train score:", model2.score(Ztrain, Ytrain))
62+
print("NB+PCA test score:", model2.score(Ztest, Ytest))

0 commit comments

Comments
 (0)