Skip to content

Commit d3d1e48

Browse files
committed
rbm
1 parent 9cc8ec3 commit d3d1e48

File tree

1 file changed

+245
-0
lines changed

1 file changed

+245
-0
lines changed

recommenders/rbm_tf_k.py

+245
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
# https://udemy.com/recommender-systems
2+
# https://deeplearningcourses.com/recommender-systems
3+
from __future__ import print_function, division
4+
from builtins import range
5+
# Note: you may need to update your version of future
6+
# sudo pip install -U future
7+
8+
import numpy as np
9+
import tensorflow as tf
10+
import matplotlib.pyplot as plt
11+
from sklearn.utils import shuffle
12+
13+
import pandas as pd
14+
from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz
15+
from datetime import datetime
16+
17+
18+
# is it possible to one-hot encode the data prior to feeding it
19+
# into the neural network, so that we don't have to do it on the fly?
20+
# yes, but:
21+
# 1) scipy sparse doesn't support N-D matrices
22+
# 2) you can use the 'sparse' standalone package, but it takes very long
23+
# and you will run out of RAM
24+
25+
26+
def one_hot_encode(X, K):
27+
# input is N x D
28+
# output is N x D x K
29+
N, D = X.shape
30+
Y = np.zeros((N, D, K))
31+
for n, d in zip(*X.nonzero()):
32+
# 0.5...5 --> 1..10 --> 0..9
33+
k = int(X[n,d]*2 - 1)
34+
Y[n,d,k] = 1
35+
return Y
36+
37+
def one_hot_mask(X, K):
38+
# input is N x D
39+
# output is N x D x K
40+
N, D = X.shape
41+
Y = np.zeros((N, D, K))
42+
# if X[n,d] == 0, there's a missing rating
43+
# so the mask should be all zeros
44+
# else, it should be all ones
45+
for n, d in zip(*X.nonzero()):
46+
Y[n,d,:] = 1
47+
return Y
48+
49+
one_to_ten = np.arange(K) + 1 # [1, 2, 3, ..., 10]
50+
def convert_probs_to_ratings(probs):
51+
# probs is N x D x K
52+
# output is N x D matrix of predicted ratings
53+
# N, D, K = probs.shape
54+
# out = np.zeros((N, D))
55+
# each predicted rating is a weighted average using the probabilities
56+
# for n in range(N):
57+
# for d in range(D):
58+
# out[n,d] = probs[n,d].dot(one_to_ten) / 2
59+
# return out
60+
return probs.dot(one_to_ten) / 2
61+
62+
63+
64+
def dot1(V, W):
65+
# V is N x D x K (batch of visible units)
66+
# W is D x K x M (weights)
67+
# returns N x M (hidden layer size)
68+
return tf.tensordot(V, W, axes=[[1,2], [0,1]])
69+
70+
def dot2(H, W):
71+
# H is N x M (batch of hiddens)
72+
# W is D x K x M (weights transposed)
73+
# returns N x D x K (visible)
74+
return tf.tensordot(H, W, axes=[[1], [2]])
75+
76+
77+
class RBM(object):
78+
def __init__(self, D, M, K):
79+
self.D = D # input feature size
80+
self.M = M # hidden size
81+
self.K = K # number of ratings
82+
self.build(D, M, K)
83+
84+
85+
def build(self, D, M, K):
86+
# params
87+
self.W = tf.Variable(tf.random_normal(shape=(D, K, M)) * np.sqrt(2.0 / M))
88+
self.c = tf.Variable(np.zeros(M).astype(np.float32))
89+
self.b = tf.Variable(np.zeros((D, K)).astype(np.float32))
90+
91+
# data
92+
self.X_in = tf.placeholder(tf.float32, shape=(None, D, K))
93+
self.mask = tf.placeholder(tf.float32, shape=(None, D, K))
94+
95+
# conditional probabilities
96+
# NOTE: tf.contrib.distributions.Bernoulli API has changed in Tensorflow v1.2
97+
V = self.X_in
98+
p_h_given_v = tf.nn.sigmoid(dot1(V, self.W) + self.c)
99+
self.p_h_given_v = p_h_given_v # save for later
100+
101+
# draw a sample from p(h | v)
102+
r = tf.random_uniform(shape=tf.shape(p_h_given_v))
103+
H = tf.to_float(r < p_h_given_v)
104+
105+
# draw a sample from p(v | h)
106+
# note: we don't have to actually do the softmax
107+
logits = dot2(H, self.W) + self.b
108+
cdist = tf.distributions.Categorical(logits=logits)
109+
X_sample = cdist.sample() # shape is (N, D)
110+
X_sample = tf.one_hot(X_sample, depth=self.K) # turn it into (N, D, K)
111+
X_sample = X_sample * self.mask # missing ratings shouldn't contribute to objective
112+
113+
114+
# build the objective
115+
objective = tf.reduce_mean(self.free_energy(self.X_in)) - tf.reduce_mean(self.free_energy(X_sample))
116+
self.train_op = tf.train.AdamOptimizer(1e-2).minimize(objective)
117+
# self.train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(objective)
118+
119+
# build the cost
120+
# we won't use this to optimize the model parameters
121+
# just to observe what happens during training
122+
logits = self.forward_logits(self.X_in)
123+
self.cost = tf.reduce_mean(
124+
tf.nn.softmax_cross_entropy_with_logits(
125+
labels=self.X_in,
126+
logits=logits,
127+
)
128+
)
129+
130+
# to get the output
131+
self.output_visible = self.forward_output(self.X_in)
132+
133+
initop = tf.global_variables_initializer()
134+
self.session = tf.Session()
135+
self.session.run(initop)
136+
137+
def fit(self, X, mask, X_test, mask_test, epochs=10, batch_sz=256, show_fig=True):
138+
N, D = X.shape
139+
n_batches = N // batch_sz
140+
141+
142+
costs = []
143+
test_costs = []
144+
for i in range(epochs):
145+
t0 = datetime.now()
146+
print("epoch:", i)
147+
X, mask, X_test, mask_test = shuffle(X, mask, X_test, mask_test) # everything has to be shuffled accordingly
148+
for j in range(n_batches):
149+
x = X[j*batch_sz:(j*batch_sz + batch_sz)].toarray()
150+
m = mask[j*batch_sz:(j*batch_sz + batch_sz)].toarray()
151+
152+
# both visible units and mask have to be in one-hot form
153+
# N x D --> N x D x K
154+
batch_one_hot = one_hot_encode(x, self.K)
155+
m = one_hot_mask(m, self.K)
156+
157+
_, c = self.session.run(
158+
(self.train_op, self.cost),
159+
feed_dict={self.X_in: batch_one_hot, self.mask: m}
160+
)
161+
162+
if j % 100 == 0:
163+
print("j / n_batches:", j, "/", n_batches, "cost:", c)
164+
print("duration:", datetime.now() - t0)
165+
166+
# calculate the true train and test cost
167+
t0 = datetime.now()
168+
sse = 0
169+
test_sse = 0
170+
n = 0
171+
test_n = 0
172+
for j in range(n_batches):
173+
x = X[j*batch_sz:(j*batch_sz + batch_sz)].toarray()
174+
m = mask[j*batch_sz:(j*batch_sz + batch_sz)].toarray()
175+
176+
# only visible input has to be in one-hot form
177+
xoh = one_hot_encode(x, self.K)
178+
179+
probs = self.get_visible(xoh)
180+
xhat = convert_probs_to_ratings(probs)
181+
sse += (m * (xhat - x)*(xhat - x)).sum()
182+
n += m.sum()
183+
184+
# the test PREDICTIONS come from the train data!
185+
# X_test and mask_test are only used for targets
186+
xt = X_test[j*batch_sz:(j*batch_sz + batch_sz)].toarray()
187+
mt = mask_test[j*batch_sz:(j*batch_sz + batch_sz)].toarray()
188+
189+
test_sse += (mt * (xhat - xt) * (xhat - xt)).sum()
190+
test_n += mt.sum()
191+
c = sse/n
192+
ct = test_sse/test_n
193+
print("train mse:", c)
194+
print("test mse:", ct)
195+
print("calculate cost duration:", datetime.now() - t0)
196+
costs.append(c)
197+
test_costs.append(ct)
198+
if show_fig:
199+
plt.plot(costs, label='train mse')
200+
plt.plot(test_costs, label='test mse')
201+
plt.legend()
202+
plt.show()
203+
204+
def free_energy(self, V):
205+
first_term = -tf.reduce_sum(dot1(V, self.b))
206+
second_term = -tf.reduce_sum(
207+
# tf.log(1 + tf.exp(tf.matmul(V, self.W) + self.c)),
208+
tf.nn.softplus(dot1(V, self.W) + self.c),
209+
axis=1
210+
)
211+
return first_term + second_term
212+
213+
def forward_hidden(self, X):
214+
return tf.nn.sigmoid(dot1(X, self.W) + self.c)
215+
216+
def forward_logits(self, X):
217+
Z = self.forward_hidden(X)
218+
return dot2(Z, self.W) + self.b
219+
220+
def forward_output(self, X):
221+
return tf.nn.softmax(self.forward_logits(X))
222+
223+
def transform(self, X):
224+
# accepts and returns a real numpy array
225+
# unlike forward_hidden and forward_output
226+
# which deal with tensorflow variables
227+
return self.session.run(self.p_h_given_v, feed_dict={self.X_in: X})
228+
229+
def get_visible(self, X):
230+
return self.session.run(self.output_visible, feed_dict={self.X_in: X})
231+
232+
233+
def main():
234+
A = load_npz("Atrain.npz")
235+
A_test = load_npz("Atest.npz")
236+
mask = (A > 0) * 1.0
237+
mask_test = (A_test > 0) * 1.0
238+
239+
N, M = A.shape
240+
rbm = RBM(M, 50, 10)
241+
rbm.fit(A, mask, A_test, mask_test)
242+
243+
244+
if __name__ == '__main__':
245+
main()

0 commit comments

Comments
 (0)