Working on network3.py and Theano with CUDA

MichalDanielDobrzanski · MichalDanielDobrzanski · commit f403c099829e · 2016-09-14T19:46:01.000+02:00
diff --git a/network3.py b/network3.py
@@ -61,9 +61,9 @@ def ReLU(z): return T.maximum(0.0, z)
         "network3.py to set\nthe GPU flag to True.")
 
 #### Load the MNIST data
-def load_data_shared(filename="../data/mnist.pkl.gz"):
+def load_data_shared(filename="mnist.pkl.gz"):
     f = gzip.open(filename, 'rb')
-    training_data, validation_data, test_data = cPickle.load(f)
+    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
     f.close()
     def shared(data):
         """Place the data into shared variables.  This allows Theano to copy
@@ -93,7 +93,7 @@ def __init__(self, layers, mini_batch_size):
         self.y = T.ivector("y")
         init_layer = self.layers[0]
         init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
-        for j in xrange(1, len(self.layers)):
+        for j in range(1, len(self.layers)): # xrange() was renamed to range() in Python 3.
             prev_layer, layer  = self.layers[j-1], self.layers[j]
             layer.set_inpt(
                 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
@@ -108,9 +108,9 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
         test_x, test_y = test_data
 
         # compute number of minibatches for training, validation and testing
-        num_training_batches = size(training_data)/mini_batch_size
-        num_validation_batches = size(validation_data)/mini_batch_size
-        num_test_batches = size(test_data)/mini_batch_size
+        num_training_batches = int(size(training_data)/mini_batch_size)
+        num_validation_batches = int(size(validation_data)/mini_batch_size)
+        num_test_batches = int(size(test_data)/mini_batch_size)
 
         # define the (regularized) cost function, symbolic gradients, and updates
         l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
@@ -155,15 +155,15 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
             })
         # Do the actual training
         best_validation_accuracy = 0.0
-        for epoch in xrange(epochs):
-            for minibatch_index in xrange(num_training_batches):
+        for epoch in range(epochs):
+            for minibatch_index in range(num_training_batches):
                 iteration = num_training_batches*epoch+minibatch_index
                 if iteration % 1000 == 0:
                     print("Training mini-batch number {0}".format(iteration))
                 cost_ij = train_mb(minibatch_index)
                 if (iteration+1) % num_training_batches == 0:
                     validation_accuracy = np.mean(
-                        [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
+                        [validate_mb_accuracy(j) for j in range(num_validation_batches)])
                     print("Epoch {0}: validation accuracy {1:.2%}".format(
                         epoch, validation_accuracy))
                     if validation_accuracy >= best_validation_accuracy:
@@ -172,7 +172,7 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
                         best_iteration = iteration
                         if test_data:
                             test_accuracy = np.mean(
-                                [test_mb_accuracy(j) for j in xrange(num_test_batches)])
+                                [test_mb_accuracy(j) for j in range(num_test_batches)])
                             print('The corresponding test accuracy is {0:.2%}'.format(
                                 test_accuracy))
         print("Finished training network.")
diff --git a/test.py b/test.py
@@ -17,14 +17,14 @@
 
 # ----------------------
 # - read the input data:
-
+'''
 import mnist_loader
 training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
 training_data = list(training_data)
-
+'''
 # ---------------------
 # - network.py example:
-import network
+#import network
 
 '''
 net = network.Network([784, 30, 10])
@@ -33,7 +33,7 @@
 
 # ----------------------
 # - network2.py example:
-import network2
+#import network2
 
 '''
 net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost)
@@ -42,7 +42,8 @@
     monitor_evaluation_accuracy=True)
 '''
 
-# chapter 3 - Overfitting example
+# chapter 3 - Overfitting example - too many epochs of learning applied on small (1k samples) amount od data.
+# Overfitting is treating noise as a signal.
 '''
 net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost)
 net.large_weight_initializer()
@@ -77,28 +78,79 @@
 
 # chapter 4 - The vanishing gradient problem - deep networks are hard to train with simple SGD algorithm
 # this network learns much slower than a shallow one.
+'''
 net = network2.Network([784, 30, 30, 30, 30, 10], cost=network2.CrossEntropyCost)
 net.SGD(training_data, 30, 10, 0.1,
     lmbda=5.0,
     evaluation_data=validation_data,
     monitor_evaluation_accuracy=True)
-
+'''
 
 
 # ----------------------
-# - network3.py example:
-import network3
+# Theano and CUDA
+# ----------------------
 
 """
     This deep network uses Theano with GPU acceleration support.
     I am using Ubuntu 16.04 with CUDA 7.5.
+    Tutorial:
+    http://deeplearning.net/software/theano/install_ubuntu.html#install-ubuntu
+
 
 """
 
-# from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
-# training_data, validation_data, test_data = network3.load_data_shared()
-# mini_batch_size = 10
-# net = Network([
-#     FullyConnectedLayer(n_in=784, n_out=100),
-#     SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
-# net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
+"""
+    Testing function to check whether your computations have been made on CPU or GPU.
+    If the result is 'Used the cpu' and you want to have it in gpu, do the following:
+    1) install theano:
+        sudo python3.5 -m pip install Theano
+    2) download and install the latest cuda:
+        https://developer.nvidia.com/cuda-downloads
+        I had some issues with that, so I followed this idea (better option is to download the 1,1GB package as .run file):
+        http://askubuntu.com/questions/760242/how-can-i-force-16-04-to-add-a-repository-even-if-it-isnt-considered-secure-eno
+        You may also want to grab the proper NVidia driver, choose it form there:
+        System Settings > Software & Updates > Additional Drivers.
+    3)
+
+"""
+
+def testTheano():
+    from theano import function, config, shared, sandbox
+    import theano.tensor as T
+    import numpy
+    import time
+
+    vlen = 10 * 30 * 768  # 10 x #cores x # threads per core
+    iters = 1000
+
+    rng = numpy.random.RandomState(22)
+    x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
+    f = function([], T.exp(x))
+    print(f.maker.fgraph.toposort())
+    t0 = time.time()
+    for i in range(iters):
+        r = f()
+    t1 = time.time()
+    print("Looping %d times took %f seconds" % (iters, t1 - t0))
+    print("Result is %s" % (r,))
+    if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
+        print('Used the cpu')
+    else:
+        print('Used the gpu')
+
+# Perform check:
+#testTheano()
+
+
+# ----------------------
+# - network3.py example:
+import network3
+
+from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
+training_data, validation_data, test_data = network3.load_data_shared()
+mini_batch_size = 10
+net = network3.Network([
+    FullyConnectedLayer(n_in=784, n_out=100),
+    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
+net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)