minor updates

lazyprogrammer · lazyprogrammer · commit ce6aba1f43c8 · 2017-12-27T01:45:39.000-05:00
diff --git a/airline/ann.py b/airline/ann.py
@@ -38,7 +38,7 @@ class ANN(object):
     def __init__(self, hidden_layer_sizes):
         self.hidden_layer_sizes = hidden_layer_sizes
 
-    def fit(self, X, Y, activation=T.tanh, learning_rate=10e-4, mu=0.5, reg=0, epochs=5000, batch_sz=None, print_period=100, show_fig=True):
+    def fit(self, X, Y, activation=T.tanh, learning_rate=1e-3, mu=0.5, reg=0, epochs=5000, batch_sz=None, print_period=100, show_fig=True):
         X = X.astype(np.float32)
         Y = Y.astype(np.float32)
 
diff --git a/airline/rnn.py b/airline/rnn.py
@@ -29,7 +29,7 @@ class RNN(object):
     def __init__(self, hidden_layer_sizes):
         self.hidden_layer_sizes = hidden_layer_sizes
 
-    def fit(self, X, Y, activation=T.tanh, learning_rate=10e-2, mu=0.5, reg=0, epochs=2000, show_fig=False):
+    def fit(self, X, Y, activation=T.tanh, learning_rate=1e-1, mu=0.5, reg=0, epochs=2000, show_fig=False):
         N, t, D = X.shape
 
         self.hidden_layers = []
diff --git a/ann_class2/extra_reading.txt b/ann_class2/extra_reading.txt
@@ -18,4 +18,8 @@ Xavier (Glorot) Normal Initializer
 http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
 
 He Normal Initializer
-http://arxiv.org/abs/1502.01852
+http://arxiv.org/abs/1502.01852
+
+For understanding Nesterov Momentum:
+Advances in optimizing Recurrent Networks by Yoshua Bengio, Section 3.5
+http://arxiv.org/pdf/1212.0901v2.pdf
diff --git a/bayesian_ml/3/run.py b/bayesian_ml/3/run.py
@@ -66,7 +66,7 @@ def objective(X, Y, C, mu, a, b, e, f, a0, b0, e0, f0):
   # e3 = gamma_dist.entropy(e, scale=1.0/f)
   # e4 = -e_ln_q_gamma(e, f)
   # print "e3:", e3, "e4:", e4
-  # assert(np.abs(e3 - e4) < 10e-8)
+  # assert(np.abs(e3 - e4) < 1e-8)
   total += gamma_dist.entropy(e, scale=1.0/f)
   # total -= e_ln_q_gamma(e, f)
   # print "total after lnq(lambda):", total
diff --git a/cnn_class/custom_blur.py b/cnn_class/custom_blur.py
@@ -91,3 +91,13 @@ def convolve2d(X, W):
 print(out.shape)
 # after convolution, the output signal is N1 + N2 - 1
 
+# try it in color
+out = np.zeros(img.shape)
+W /= W.sum()
+for i in range(3):
+    out[:,:,i] = convolve2d(img[:,:,i], W)
+plt.imshow(out)
+plt.show()
+
+
+
diff --git a/logistic_regression_class/logistic3.py b/logistic_regression_class/logistic3.py
@@ -16,17 +16,19 @@
 N = 100
 D = 2
 
+N_per_class = N//2
+
 
 X = np.random.randn(N,D)
 
 # center the first 50 points at (-2,-2)
-X[:50,:] = X[:50,:] - 2*np.ones((50,D))
+X[:N_per_class,:] = X[:N_per_class,:] - 2*np.ones((N_per_class,D))
 
 # center the last 50 points at (2, 2)
-X[50:,:] = X[50:,:] + 2*np.ones((50,D))
+X[N_per_class:,:] = X[N_per_class:,:] + 2*np.ones((N_per_class,D))
 
-# labels: first 50 are 0, last 50 are 1
-T = np.array([0]*50 + [1]*50)
+# labels: first N_per_class are 0, last N_per_class are 1
+T = np.array([0]*N_per_class + [1]*N_per_class)
 
 # add a column of ones
 # ones = np.array([[1]*N]).T # old
diff --git a/nlp_class2/glove.py b/nlp_class2/glove.py
@@ -223,7 +223,7 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                 for i in xrange(V):
                     # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(U[j], U[j]) for j in xrange(V)), axis=0)
                     matrix = reg*np.eye(D) + (fX[i,:]*U.T).dot(U)
-                    # assert(np.abs(matrix - matrix2).sum() < 10e-5)
+                    # assert(np.abs(matrix - matrix2).sum() < 1e-5)
                     vector = (fX[i,:]*(logX[i,:] - b[i] - c - mu)).dot(U)
                     W[i] = np.linalg.solve(matrix, vector)
                 # print "fast way took:", (datetime.now() - t0)
@@ -238,8 +238,8 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                 #         vector2 += fX[i,j]*(logX[i,j] - b[i] - c[j])*U[j]
                 # print "slow way took:", (datetime.now() - t0)
 
-                    # assert(np.abs(matrix - matrix2).sum() < 10e-5)
-                    # assert(np.abs(vector - vector2).sum() < 10e-5)
+                    # assert(np.abs(matrix - matrix2).sum() < 1e-5)
+                    # assert(np.abs(vector - vector2).sum() < 1e-5)
                     # W[i] = np.linalg.solve(matrix, vector)
                 # print "updated W"
 
@@ -257,7 +257,7 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
                 for j in xrange(V):
                     # matrix = reg*np.eye(D) + np.sum((fX[i,j]*np.outer(W[i], W[i]) for i in xrange(V)), axis=0)
                     matrix = reg*np.eye(D) + (fX[:,j]*W.T).dot(W)
-                    # assert(np.abs(matrix - matrix2).sum() < 10e-8)
+                    # assert(np.abs(matrix - matrix2).sum() < 1e-8)
                     vector = (fX[:,j]*(logX[:,j] - b - c[j] - mu)).dot(W)
                     # matrix = reg*np.eye(D)
                     # vector = 0
@@ -323,7 +323,7 @@ def main(we_file, w2i_file, use_brown=True, n_files=50):
     model.fit(
         sentences,
         cc_matrix=cc_matrix,
-        learning_rate=3*10e-5,
+        learning_rate=3e-4,
         reg=0.1,
         epochs=10,
         gd=True,
diff --git a/nlp_class2/rntn_tensorflow.py b/nlp_class2/rntn_tensorflow.py
@@ -68,7 +68,7 @@ def __init__(self, V, D, K, activation):
         self.bo = tf.Variable(bo.astype(np.float32))
         self.params = [self.We, self.W11, self.W22, self.W12, self.W1, self.W2, self.Wo]
 
-    def fit(self, trees, lr=10e-3, mu=0.9, reg=10e-2, epochs=5):
+    def fit(self, trees, lr=1e-2, mu=0.9, reg=1e-1, epochs=5):
         train_ops = []
         costs = []
         predictions = []
diff --git a/nlp_class2/util.py b/nlp_class2/util.py
@@ -1,6 +1,13 @@
 # Course URL:
 # https://deeplearningcourses.com/c/natural-language-processing-with-deep-learning-in-python
 # https://udemy.com/natural-language-processing-with-deep-learning-in-python
+from __future__ import print_function, division
+from future.utils import iteritems
+from builtins import range
+# Note: you may need to update your version of future
+# sudo pip install -U future
+
+
 import numpy as np
 
 def init_weight(Mi, Mo):
@@ -21,15 +28,15 @@ def dist2(a, b):
     for dist, name in [(dist1, 'Euclidean'), (dist2, 'cosine')]:
         min_dist = float('inf')
         best_word = ''
-        for word, idx in word2idx.iteritems():
+        for word, idx in iteritems(word2idx):
             if word not in (w1, w2, w3):
                 v1 = We[idx]
                 d = dist(v0, v1)
                 if d < min_dist:
                     min_dist = d
                     best_word = word
-        print "closest match by", name, "distance:", best_word
-        print w1, "-", w2, "=", best_word, "-", w3
+        print("closest match by", name, "distance:", best_word)
+        print(w1, "-", w2, "=", best_word, "-", w3)
 
 
 class Tree:
@@ -43,9 +50,9 @@ def __init__(self, word, label):
 def display_tree(t, lvl=0):
     prefix = ''.join(['>']*lvl)
     if t.word is not None:
-        print "%s%s %s" % (prefix, t.label, t.word)
+        print("%s%s %s" % (prefix, t.label, t.word))
     else:
-        print "%s%s -" % (prefix, t.label)
+        print("%s%s -" % (prefix, t.label))
         # if t.left is None or t.right is None:
         #     raise Exception("Tree node has no word but left and right child are None")
     if t.left:
diff --git a/rl2/atari/dqn_tf.py b/rl2/atari/dqn_tf.py
@@ -100,7 +100,7 @@ def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma, scope):
       cost = tf.reduce_mean(tf.square(self.G - selected_action_values))
       # self.train_op = tf.train.AdamOptimizer(1e-2).minimize(cost)
       # self.train_op = tf.train.AdagradOptimizer(1e-2).minimize(cost)
-      # self.train_op = tf.train.RMSPropOptimizer(2.5e-4, decay=0.99, epsilon=10e-3).minimize(cost)
+      # self.train_op = tf.train.RMSPropOptimizer(2.5e-4, decay=0.99, epsilon=1e-3).minimize(cost)
       self.train_op = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6).minimize(cost)
       # self.train_op = tf.train.MomentumOptimizer(1e-3, momentum=0.9).minimize(cost)
       # self.train_op = tf.train.GradientDescentOptimizer(1e-4).minimize(cost)
diff --git a/rl2/atari/dqn_tf_alt.py b/rl2/atari/dqn_tf_alt.py
@@ -165,11 +165,11 @@ def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
 
     cost = tf.reduce_mean(tf.square(self.G - selected_action_values))
     self.cost = cost
-    # self.train_op = tf.train.AdamOptimizer(10e-3).minimize(cost)
-    # self.train_op = tf.train.AdagradOptimizer(10e-3).minimize(cost)
+    # self.train_op = tf.train.AdamOptimizer(1e-2).minimize(cost)
+    # self.train_op = tf.train.AdagradOptimizer(1e-2).minimize(cost)
     self.train_op = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6).minimize(cost)
-    # self.train_op = tf.train.MomentumOptimizer(10e-4, momentum=0.9).minimize(cost)
-    # self.train_op = tf.train.GradientDescentOptimizer(10e-5).minimize(cost)
+    # self.train_op = tf.train.MomentumOptimizer(1e-3, momentum=0.9).minimize(cost)
+    # self.train_op = tf.train.GradientDescentOptimizer(1e-4).minimize(cost)
 
   def set_session(self, session):
     self.session = session
diff --git a/unsupervised_class/kmeans.py b/unsupervised_class/kmeans.py
@@ -50,7 +50,7 @@ def plot_k_means(X, K, max_iter=20, beta=1.0, show_plots=True):
                 exponents[n,k] = np.exp(-beta*d(M[k], X[n]))
 
         R = exponents / exponents.sum(axis=1, keepdims=True)
-        # assert(np.abs(R - R2).sum() < 10e-10)
+        # assert(np.abs(R - R2).sum() < 1e-10)
 
         # step 2: recalculate means
         for k in range(K):
diff --git a/unsupervised_class/kmeans_visualize.py b/unsupervised_class/kmeans_visualize.py
@@ -57,7 +57,7 @@ def plot_k_means(X, K, max_iter=20, beta=1.0):
 
         costs[i] = cost(X, R, M)
         if i > 0:
-            if np.abs(costs[i] - costs[i-1]) < 10e-5:
+            if np.abs(costs[i] - costs[i-1]) < 1e-5:
                 break
     plt.show()
 
diff --git a/unsupervised_class2/autoencoder_tf.py b/unsupervised_class2/autoencoder_tf.py
@@ -43,7 +43,7 @@ def build(self, D, M):
         )
 
         self.train_op = tf.train.AdamOptimizer(1e-1).minimize(self.cost)
-        # self.train_op = tf.train.MomentumOptimizer(10e-4, momentum=0.9).minimize(self.cost)
+        # self.train_op = tf.train.MomentumOptimizer(1e-3, momentum=0.9).minimize(self.cost)
 
     def fit(self, X, epochs=1, batch_sz=100, show_fig=False):
         N, D = X.shape

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ def build(self, D, M):`
`43`	`43`	`)`
`44`	`44`
`45`	`45`	`self.train_op = tf.train.AdamOptimizer(1e-1).minimize(self.cost)`
`46`		`- # self.train_op = tf.train.MomentumOptimizer(10e-4, momentum=0.9).minimize(self.cost)`
	`46`	`+ # self.train_op = tf.train.MomentumOptimizer(1e-3, momentum=0.9).minimize(self.cost)`
`47`	`47`
`48`	`48`	`def fit(self, X, epochs=1, batch_sz=100, show_fig=False):`
`49`	`49`	`N, D = X.shape`