diff --git a/ann_class2/batch_norm_tf.py b/ann_class2/batch_norm_tf.py
index de25cd3b..31d9a351 100644
--- a/ann_class2/batch_norm_tf.py
+++ b/ann_class2/batch_norm_tf.py
@@ -4,13 +4,15 @@
 # sudo pip install -U future
 
 import numpy as np
-import pandas as pd
+#import pandas as pd
 import matplotlib.pyplot as plt
 import tensorflow as tf
 from sklearn.utils import shuffle
-from sklearn.model_selection import train_test_split
+#from sklearn.model_selection import train_test_split
 from util import get_normalized_data
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
 
 def init_weight(M1, M2):
   return np.random.randn(M1, M2) * np.sqrt(2.0 / M1)
@@ -38,13 +40,11 @@ def forward(self, X, is_training, decay=0.9):
     activation = tf.matmul(X, self.W)
     if is_training:
       batch_mean, batch_var = tf.nn.moments(activation, [0])
-      update_running_mean = tf.assign(
-        self.running_mean,
-        self.running_mean * decay + batch_mean * (1 - decay)
+      update_running_mean = self.running_mean.assign(
+      self.running_mean * decay + batch_mean * (1 - decay)
       )
-      update_running_var = tf.assign(
-        self.running_var,
-        self.running_var * decay + batch_var * (1 - decay)
+      update_running_var = self.running_var.assign(
+      self.running_var * decay + batch_var * (1 - decay)
       )
       
       with tf.control_dependencies([update_running_mean, update_running_var]):
@@ -115,8 +115,8 @@ def fit(self, X, Y, Xtest, Ytest, activation=tf.nn.relu, learning_rate=1e-2, epo
     # for train and test (prediction)
 
     # set up theano functions and variables
-    tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
-    tfY = tf.placeholder(tf.int32, shape=(None,), name='Y')
+    tfX = tf.compat.v1.placeholder(tf.float32, shape=(None, D), name='X')
+    tfY = tf.compat.v1.placeholder(tf.int32, shape=(None,), name='Y')
 
     # for later use
     self.tfX = tfX
@@ -131,7 +131,7 @@ def fit(self, X, Y, Xtest, Ytest, activation=tf.nn.relu, learning_rate=1e-2, epo
     )
     # train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost)
     # train_op = tf.train.RMSPropOptimizer(learning_rate, decay=0.99, momentum=0.9).minimize(cost)
-    train_op = tf.train.MomentumOptimizer(learning_rate, momentum=0.9, use_nesterov=True).minimize(cost)
+    train_op = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=0.9, use_nesterov=True).minimize(cost)
     # train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
 
     # for testing
@@ -141,7 +141,7 @@ def fit(self, X, Y, Xtest, Ytest, activation=tf.nn.relu, learning_rate=1e-2, epo
     # accuracy = tf.reduce_mean(1.0*(tfY == tf.argmax(logits, 1)))
 
     # init the variables
-    self.session.run(tf.global_variables_initializer())
+    self.session.run(tf.compat.v1.global_variables_initializer())
 
     n_batches = N // batch_sz
     costs = []
@@ -187,7 +187,7 @@ def main():
 
   ann = ANN([500, 300])
 
-  session = tf.InteractiveSession()
+  session = tf.compat.v1.InteractiveSession()
   ann.set_session(session)
 
   ann.fit(Xtrain, Ytrain, Xtest, Ytest, show_fig=True)
diff --git a/ann_class2/dropout_tensorflow.py b/ann_class2/dropout_tensorflow.py
index b20c44fb..c2186e57 100644
--- a/ann_class2/dropout_tensorflow.py
+++ b/ann_class2/dropout_tensorflow.py
@@ -13,6 +13,8 @@
 from util import get_normalized_data
 from sklearn.utils import shuffle
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
 
 class HiddenLayer(object):
     def __init__(self, M1, M2):
@@ -59,8 +61,8 @@ def fit(self, X, Y, Xvalid, Yvalid, lr=1e-4, mu=0.9, decay=0.9, epochs=15, batch
             self.params += h.params
 
         # set up theano functions and variables
-        inputs = tf.placeholder(tf.float32, shape=(None, D), name='inputs')
-        labels = tf.placeholder(tf.int64, shape=(None,), name='labels')
+        inputs = tf.compat.v1.placeholder(tf.float32, shape=(None, D), name='inputs')
+        labels = tf.compat.v1.placeholder(tf.int64, shape=(None,), name='labels')
         logits = self.forward(inputs)
 
         cost = tf.reduce_mean(
@@ -69,7 +71,7 @@ def fit(self, X, Y, Xvalid, Yvalid, lr=1e-4, mu=0.9, decay=0.9, epochs=15, batch
                 labels=labels
             )
         )
-        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
+        train_op = tf.compat.v1.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
         # train_op = tf.train.MomentumOptimizer(lr, momentum=mu).minimize(cost)
         # train_op = tf.train.AdamOptimizer(lr).minimize(cost)
         prediction = self.predict(inputs)
@@ -85,8 +87,8 @@ def fit(self, X, Y, Xvalid, Yvalid, lr=1e-4, mu=0.9, decay=0.9, epochs=15, batch
 
         n_batches = N // batch_sz
         costs = []
-        init = tf.global_variables_initializer()
-        with tf.Session() as session:
+        init = tf.compat.v1.global_variables_initializer()
+        with tf.compat.v1.Session() as session:
             session.run(init)
             for i in range(epochs):
                 print("epoch:", i, "n_batches:", n_batches)
diff --git a/ann_class2/keras_functional.py b/ann_class2/keras_functional.py
index 265d3f9b..14e5e955 100644
--- a/ann_class2/keras_functional.py
+++ b/ann_class2/keras_functional.py
@@ -5,8 +5,8 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.models import Model
-from keras.layers import Dense, Input
+from tensorflow.keras.models import Model #type: ignore
+from tensorflow.keras.layers import Dense, Input #type: ignore
 from util import get_normalized_data, y2indicator
 
 import matplotlib.pyplot as plt
diff --git a/ann_class2/pytorch_batchnorm.py b/ann_class2/pytorch_batchnorm.py
index 766dc805..c3fb30ad 100644
--- a/ann_class2/pytorch_batchnorm.py
+++ b/ann_class2/pytorch_batchnorm.py
@@ -37,9 +37,11 @@
 model.add_module("dense1", torch.nn.Linear(D, 500))
 model.add_module("bn1", torch.nn.BatchNorm1d(500))
 model.add_module("relu1", torch.nn.ReLU())
+model.add_module("dropout1", torch.nn.Dropout(p=0.2))
 model.add_module("dense2", torch.nn.Linear(500, 300))
 model.add_module("bn2", torch.nn.BatchNorm1d(300))
 model.add_module("relu2", torch.nn.ReLU())
+model.add_module("dropout2", torch.nn.Dropout(p=0.2))
 model.add_module("dense3", torch.nn.Linear(300, K))
 # Note: no final softmax!
 # just like Tensorflow, it's included in cross-entropy function
diff --git a/ann_class2/tensorflow2.py b/ann_class2/tensorflow2.py
index a07f0104..00bd7746 100644
--- a/ann_class2/tensorflow2.py
+++ b/ann_class2/tensorflow2.py
@@ -12,11 +12,12 @@
 
 import numpy as np
 import tensorflow as tf
-
 import matplotlib.pyplot as plt
-
 from util import get_normalized_data, y2indicator
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
+
 
 def error_rate(p, t):
     return np.mean(p != t)
@@ -31,7 +32,7 @@ def main():
     print_period = 50
 
     lr = 0.00004
-    reg = 0.01
+    #reg = 0.01
 
     Ytrain_ind = y2indicator(Ytrain)
     Ytest_ind = y2indicator(Ytest)
@@ -53,8 +54,8 @@ def main():
 
 
     # define variables and expressions
-    X = tf.placeholder(tf.float32, shape=(None, D), name='X')
-    T = tf.placeholder(tf.float32, shape=(None, K), name='T')
+    X = tf.compat.v1.placeholder(tf.float32, shape=(None, D), name='X')
+    T = tf.compat.v1.placeholder(tf.float32, shape=(None, K), name='T')
     W1 = tf.Variable(W1_init.astype(np.float32))
     b1 = tf.Variable(b1_init.astype(np.float32))
     W2 = tf.Variable(W2_init.astype(np.float32))
@@ -70,19 +71,19 @@ def main():
     # softmax_cross_entropy_with_logits take in the "logits"
     # if you wanted to know the actual output of the neural net,
     # you could pass "Yish" into tf.nn.softmax(logits)
-    cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Yish, labels=T))
+    cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=Yish, labels=T))
 
     # we choose the optimizer but don't implement the algorithm ourselves
     # let's go with RMSprop, since we just learned about it.
     # it includes momentum!
-    train_op = tf.train.RMSPropOptimizer(lr, decay=0.99, momentum=0.9).minimize(cost)
+    train_op = tf.compat.v1.train.RMSPropOptimizer(lr, decay=0.99, momentum=0.9).minimize(cost)
 
     # we'll use this to calculate the error rate
     predict_op = tf.argmax(Yish, 1)
 
     costs = []
-    init = tf.global_variables_initializer()
-    with tf.Session() as session:
+    init = tf.compat.v1.global_variables_initializer()
+    with tf.compat.v1.Session() as session:
         session.run(init)
 
         for i in range(max_iter):
diff --git a/ann_class2/util.py b/ann_class2/util.py
index 5c8ad934..20bba18d 100644
--- a/ann_class2/util.py
+++ b/ann_class2/util.py
@@ -15,7 +15,7 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
-from sklearn.linear_model import LogisticRegression
+#from sklearn.linear_model import LogisticRegression
 
 
 def get_clouds():
@@ -70,14 +70,14 @@ def get_spiral():
 def get_transformed_data():
     print("Reading in and transforming data...")
 
-    if not os.path.exists('../large_files/train.csv'):
-        print('Looking for ../large_files/train.csv')
+    if not os.path.exists('.\\large_files\\digit-recognizer\\train.csv'):
+        print('Looking for .\\large_files\\digit-recognizer\\train.csv')
         print('You have not downloaded the data and/or not placed the files in the correct location.')
         print('Please get the data from: https://www.kaggle.com/c/digit-recognizer')
         print('Place train.csv in the folder large_files adjacent to the class folder')
         exit()
 
-    df = pd.read_csv('../large_files/train.csv')
+    df = pd.read_csv('.\\large_files\\digit-recognizer\\train.csv')
     data = df.values.astype(np.float32)
     np.random.shuffle(data)
 
@@ -117,14 +117,14 @@ def get_transformed_data():
 def get_normalized_data():
     print("Reading in and transforming data...")
 
-    if not os.path.exists('../large_files/train.csv'):
-        print('Looking for ../large_files/train.csv')
+    if not os.path.exists('.\\large_files\\digit-recognizer\\train.csv'):
+        print('Looking for .\\large_files\\digit-recognizer\\train.csv')
         print('You have not downloaded the data and/or not placed the files in the correct location.')
         print('Please get the data from: https://www.kaggle.com/c/digit-recognizer')
         print('Place train.csv in the folder large_files adjacent to the class folder')
         exit()
 
-    df = pd.read_csv('../large_files/train.csv')
+    df = pd.read_csv('.\\large_files\\digit-recognizer\\train.csv')
     data = df.values.astype(np.float32)
     np.random.shuffle(data)
     X = data[:, 1:]
diff --git a/cnn_class2/class_activation_maps.py b/cnn_class2/class_activation_maps.py
index 19033ff5..ec2af568 100644
--- a/cnn_class2/class_activation_maps.py
+++ b/cnn_class2/class_activation_maps.py
@@ -6,9 +6,9 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.models import Model
-from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
-from keras.preprocessing import image
+from tensorflow.keras.models import Model #type: ignore
+from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions #type: ignore
+from tensorflow.keras.preprocessing import image #type: ignore
 
 import numpy as np
 import scipy as sp
@@ -19,10 +19,10 @@
 
 
 # get the image files
-# http://www.vision.caltech.edu/Image_Datasets/Caltech101/
-# http://www.vision.caltech.edu/Image_Datasets/Caltech256/
-image_files = glob('../large_files/256_ObjectCategories/*/*.jp*g')
-image_files += glob('../large_files/101_ObjectCategories/*/*.jp*g')
+# http://www.vision.caltech.edu/datasets/Caltech101
+# http://www.vision.caltech.edu/datasets/Caltech256/
+image_files = glob('.\\large_files\\256_ObjectCategories\\*\\*.jp*g')
+image_files += glob('.\\large_files\\101_ObjectCategories\\*\\*.jp*g')
 
 
 
@@ -39,13 +39,13 @@
 resnet.summary()
 
 # make a model to get output before flatten
-activation_layer = resnet.get_layer('activation_49')
+activation_layer = resnet.get_layer('conv5_block3_out')
 
 # create a model object
 model = Model(inputs=resnet.input, outputs=activation_layer.output)
 
 # get the feature map weights
-final_dense = resnet.get_layer('fc1000')
+final_dense = resnet.get_layer('predictions')
 W = final_dense.get_weights()[0]
 
 
diff --git a/cnn_class2/make_limited_datasets.py b/cnn_class2/make_limited_datasets.py
index 911c4563..51b57c95 100644
--- a/cnn_class2/make_limited_datasets.py
+++ b/cnn_class2/make_limited_datasets.py
@@ -10,7 +10,7 @@ def link(src, dst):
   if not os.path.exists(dst):
     os.symlink(src, dst, target_is_directory=True)
 
-mkdir('../large_files/fruits-360-small')
+mkdir('.\\large_files\\fruits-360-small')
 
 
 classes = [
@@ -24,16 +24,16 @@ def link(src, dst):
   'Raspberry'
 ]
 
-train_path_from = os.path.abspath('../large_files/fruits-360/Training')
-valid_path_from = os.path.abspath('../large_files/fruits-360/Validation')
+train_path_from = os.path.abspath('.\\large_files\\fruits-360\\Training')
+valid_path_from = os.path.abspath('\\large_files\\fruits-360\\Validation')
 
-train_path_to = os.path.abspath('../large_files/fruits-360-small/Training')
-valid_path_to = os.path.abspath('../large_files/fruits-360-small/Validation')
+train_path_to = os.path.abspath('.\\large_files\\fruits-360-small\\Training')
+valid_path_to = os.path.abspath('.\\large_files\\fruits-360-small\\Validation')
 
 mkdir(train_path_to)
 mkdir(valid_path_to)
 
 
 for c in classes:
-  link(train_path_from + '/' + c, train_path_to + '/' + c)
-  link(valid_path_from + '/' + c, valid_path_to + '/' + c)
\ No newline at end of file
+  link(train_path_from + '\\' + c, train_path_to + '\\' + c)
+  link(valid_path_from + '\\' + c, valid_path_to + '\\' + c)
\ No newline at end of file
diff --git a/cnn_class2/siamese.py b/cnn_class2/siamese.py
index 4c43f163..1df2c634 100644
--- a/cnn_class2/siamese.py
+++ b/cnn_class2/siamese.py
@@ -4,21 +4,23 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.layers import Input, Lambda, Dense, Flatten, Conv2D, BatchNormalization, Activation, MaxPooling2D
-from keras.models import Model
-from keras.preprocessing import image
+from tensorflow.keras.layers import (Input, Lambda, Dense, Flatten, Conv2D, #type: ignore
+                                     BatchNormalization, Activation, MaxPooling2D)
+from tensorflow.keras.models import Model #type: ignore
+from tensorflow.keras.preprocessing import image #type: ignore
 
-import keras.backend as K
+import tensorflow.keras.backend as K #type: ignore
 
 import numpy as np
 import matplotlib.pyplot as plt
+import tensorflow as tf
 
 from glob import glob
 from collections import Counter
 
 
 # get the data from: http://vision.ucsd.edu/content/yale-face-database
-files = glob('../large_files/yalefaces/subject*')
+files = glob('.\\large_files\\yalefaces\\subject*')
 
 # easier to randomize later
 np.random.shuffle(files)
@@ -54,7 +56,7 @@ def load_img(filepath):
 # all the filenames are something like 'subject13.happy'
 labels = np.zeros(N)
 for i, f in enumerate(files):
-  filename = f.rsplit('/', 1)[-1]
+  filename = f.rsplit('\\', 1)[-1]
   subject_num = filename.split('.', 1)[0]
 
   # subtract 1 since the filenames start from 1
@@ -229,6 +231,32 @@ def test_generator():
       yield [x1, x2], y
 
 
+train_dataset = tf.data.Dataset.from_generator(
+    train_generator,
+    output_signature=(
+        (
+            tf.TensorSpec(shape=(None, *img.shape), dtype=tf.float32),  # x_batch_1
+            tf.TensorSpec(shape=(None, *img.shape), dtype=tf.float32)   # x_batch_2
+        ),
+        tf.TensorSpec(shape=(None,), dtype=tf.float32)  # y_batch
+    )
+)
+
+test_dataset = tf.data.Dataset.from_generator(
+    test_generator,
+    output_signature=(
+        (
+            tf.TensorSpec(shape=(None, *img.shape), dtype=tf.float32),  # x_batch_1
+            tf.TensorSpec(shape=(None, *img.shape), dtype=tf.float32)   # x_batch_2
+        ),
+        tf.TensorSpec(shape=(None,), dtype=tf.float32)  # y_batch
+    )
+)
+
+# Optional: Shuffle, batch, and prefetch for performance
+train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
+test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
+
 
 
 # build the base neural network
@@ -426,10 +454,10 @@ def get_test_accuracy(threshold=0.85):
 
 # fit the model
 r = model.fit(
-  train_generator(),
+  train_dataset,
   steps_per_epoch=train_steps,
   epochs=20,
-  validation_data=test_generator(),
+  validation_data=test_dataset,
   validation_steps=valid_steps,
 )
 
diff --git a/cnn_class2/style_transfer1.py b/cnn_class2/style_transfer1.py
index 421a0a19..1ec6ae0c 100644
--- a/cnn_class2/style_transfer1.py
+++ b/cnn_class2/style_transfer1.py
@@ -9,15 +9,12 @@
 # In this script, we will focus on generating the content
 # E.g. given an image, can we recreate the same image
 
-from keras.layers import Input, Lambda, Dense, Flatten
-from keras.layers import AveragePooling2D, MaxPooling2D
-from keras.layers.convolutional import Conv2D
-from keras.models import Model, Sequential
-from keras.applications.vgg16 import VGG16
-from keras.applications.vgg16 import preprocess_input
-from keras.preprocessing import image
-
-import keras.backend as K
+from tensorflow.keras.layers import AveragePooling2D, MaxPooling2D, Conv2D #type: ignore
+from tensorflow.keras.models import Model, clone_model #type: ignore
+from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input #type: ignore
+from tensorflow.keras.preprocessing import image #type: ignore
+
+import tensorflow.keras.backend as K #type: ignore
 import numpy as np
 import matplotlib.pyplot as plt
 
@@ -25,14 +22,15 @@
 
 
 import tensorflow as tf
-if tf.__version__.startswith('2'):
-  tf.compat.v1.disable_eager_execution()
+#if tf.__version__.startswith('2'):
+#  tf.compat.v1.disable_eager_execution()
 
 
 def VGG16_AvgPool(shape):
   # we want to account for features across the entire image
   # so get rid of the maxpool which throws away information
   vgg = VGG16(input_shape=shape, weights='imagenet', include_top=False)
+  vgg_clone = clone_model(vgg)
 
   # new_model = Sequential()
   # for layer in vgg.layers:
@@ -42,16 +40,17 @@ def VGG16_AvgPool(shape):
   #   else:
   #     new_model.add(layer)
 
-  i = vgg.input
-  x = i
-  for layer in vgg.layers:
-    if layer.__class__ == MaxPooling2D:
+  #i = vgg.input
+  #x = i
+  for layer in vgg_clone.layers:
+    if isinstance(layer, MaxPooling2D):
       # replace it with average pooling
-      x = AveragePooling2D()(x)
-    else:
-      x = layer(x)
+      layer = AveragePooling2D(pool_size=layer.pool_size)
+    #else:
+    #  x = layer(x)
 
-  return Model(i, x)
+# return Model(i, x)
+  return vgg_clone
 
 def VGG16_AvgPool_CutOff(shape, num_convs):
   # there are 13 convolutions in total
@@ -98,12 +97,13 @@ def scale_img(x):
   return x
 
 
+
 if __name__ == '__main__':
 
   # open an image
   # feel free to try your own
   # path = '../large_files/caltech101/101_ObjectCategories/elephant/image_0002.jpg'
-  path = 'content/elephant.jpg'
+  path = '.\\cnn_class2\\content\\elephant.jpg'
   img = image.load_img(path)
 
   # convert image to array and preprocess for vgg
@@ -116,8 +116,8 @@ def scale_img(x):
   shape = x.shape[1:]
 
   # see the image
-  # plt.imshow(img)
-  # plt.show()
+  plt.imshow(img)
+  plt.show()
 
 
   # make a content model
@@ -131,16 +131,17 @@ def scale_img(x):
   # try to match the image
 
   # define our loss in keras
-  loss = K.mean(K.square(target - content_model.output))
-
-  # gradients which are needed by the optimizer
-  grads = K.gradients(loss, content_model.input)
-
-  # just like theano.function
-  get_loss_and_grads = K.function(
-    inputs=[content_model.input],
-    outputs=[loss] + grads
-  )
+  #loss_layer = Lambda(lambda inputs: K.mean(K.square(inputs[0] - inputs[1])))
+  #loss = loss_layer([target, content_model.output])
+  
+  def get_loss_and_grads(inputs):
+    with tf.GradientTape() as tape:
+        tape.watch(inputs)
+        # Compute the loss as the mean squared difference between target and model output
+        loss_value = K.mean(K.square(target - content_model(inputs)))
+    # Compute the gradient of loss with respect to the inputs
+    grads_value = tape.gradient(loss_value, inputs)
+    return loss_value, grads_value
 
 
   def get_loss_and_grads_wrapper(x_vec):
@@ -155,10 +156,10 @@ def get_loss_and_grads_wrapper(x_vec):
     # gradient must also be a 1-D array
     # and both loss and gradient must be np.float64
     # will get an error otherwise
-
-    l, g = get_loss_and_grads([x_vec.reshape(*batch_shape)])
-    return l.astype(np.float64), g.flatten().astype(np.float64)
-
+    x_tensor = tf.convert_to_tensor(x_vec.reshape(*batch_shape), dtype=tf.float32)
+    l, g = get_loss_and_grads(x_tensor)
+    #l, g = get_loss_and_grads(x_vec.reshape(*batch_shape))
+    return l.numpy().astype(np.float64), g.numpy().flatten().astype(np.float64)
 
 
   from datetime import datetime
diff --git a/cnn_class2/style_transfer2.py b/cnn_class2/style_transfer2.py
index f385ad53..fb05340e 100644
--- a/cnn_class2/style_transfer2.py
+++ b/cnn_class2/style_transfer2.py
@@ -11,10 +11,12 @@
 # But NOT the same content.
 # It should capture only the essence of the style.
 
-from keras.models import Model, Sequential
-from keras.applications.vgg16 import preprocess_input
-from keras.preprocessing import image
-from keras.applications.vgg16 import VGG16
+from tensorflow.keras.models import Model #type: ignore
+from tensorflow.keras.applications.vgg16 import preprocess_input #type: ignore
+from tensorflow.keras.preprocessing import image #type: ignore
+#from keras.applications.vgg16 import VGG16
+import tensorflow as tf
+from tensorflow.keras.layers import Layer, Lambda #type:ignore
 
 from style_transfer1 import VGG16_AvgPool, unpreprocess, scale_img
 # from skimage.transform import resize
@@ -23,24 +25,41 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
-import keras.backend as K
+import tensorflow.keras.backend as K #type: ignore
 
 
 
-def gram_matrix(img):
-  # input is (H, W, C) (C = # feature maps)
-  # we first need to convert it to (C, H*W)
-  X = K.batch_flatten(K.permute_dimensions(img, (2, 0, 1)))
+# def gram_matrix(img):
+#   # input is (H, W, C) (C = # feature maps)
+#   # we first need to convert it to (C, H*W)
+#   X = K.batch_flatten(K.permute_dimensions(img, (2, 0, 1)))
   
-  # now, calculate the gram matrix
-  # gram = XX^T / N
-  # the constant is not important since we'll be weighting these
-  G = K.dot(X, K.transpose(X)) / img.get_shape().num_elements()
-  return G
+#   # now, calculate the gram matrix
+#   # gram = XX^T / N
+#   # the constant is not important since we'll be weighting these
+#   G = K.dot(X, K.transpose(X))/img.get_shape().num_elements()
+#   return G
+
+class GramMatrixLayer(Layer):
+    def call(self, inputs):
+        # Input shape is expected to be (H, W, C)
+        # Permute dimensions to (C, H, W)
+        permuted_img = tf.transpose(inputs, perm=[2, 0, 1])  # (C, H, W)
+
+        # Flatten the permuted image to (C, H*W)
+        flattened_img = tf.reshape(permuted_img, (tf.shape(permuted_img)[0], -1))  # (C, H*W)
+
+        # Calculate the Gram matrix
+        num_elements = tf.cast(tf.reduce_prod(K.int_shape(inputs)[1:]), tf.float32) 
+        G = K.dot(flattened_img, K.transpose(flattened_img)) / num_elements
+        return G
+
+def gram_matrix(img):
+    return GramMatrixLayer()(img)
 
 
 def style_loss(y, t):
-  return K.mean(K.square(gram_matrix(y) - gram_matrix(t)))
+  return Lambda(lambda x: K.mean(K.square(gram_matrix(x[0]) - gram_matrix(x[1]))))([y, t])
 
 
 # let's generalize this and put it into a function
@@ -69,7 +88,7 @@ def minimize(fn, epochs, batch_shape):
 
 if __name__ == '__main__':
   # try these, or pick your own!
-  path = 'styles/starrynight.jpg'
+  path = '.\\cnn_class2\\styles\\starrynight.jpg'
   # path = 'styles/flowercarrier.jpg'
   # path = 'styles/monalisa.jpg'
   # path = 'styles/lesdemoisellesdavignon.jpg'
@@ -82,8 +101,8 @@ def minimize(fn, epochs, batch_shape):
   x = image.img_to_array(img)
 
   # look at the image
-  # plt.imshow(x)
-  # plt.show()
+  plt.imshow(x)
+  plt.show()
 
   # make it (1, H, W, C)
   x = np.expand_dims(x, axis=0)
@@ -103,7 +122,7 @@ def minimize(fn, epochs, batch_shape):
   # Note: need to select output at index 1, since outputs at
   # index 0 correspond to the original vgg with maxpool
   symbolic_conv_outputs = [
-    layer.get_output_at(1) for layer in vgg.layers \
+    vgg.get_layer(layer.name).output for layer in vgg.layers
     if layer.name.endswith('conv1')
   ]
 
@@ -120,24 +139,31 @@ def minimize(fn, epochs, batch_shape):
   style_layers_outputs = [K.variable(y) for y in multi_output_model.predict(x)]
 
   # calculate the total style loss
-  loss = 0
-  for symbolic, actual in zip(symbolic_conv_outputs, style_layers_outputs):
-    # gram_matrix() expects a (H, W, C) as input
-    loss += style_loss(symbolic[0], actual[0])
-
-  grads = K.gradients(loss, multi_output_model.input)
-
-  # just like theano.function
-  get_loss_and_grads = K.function(
-    inputs=[multi_output_model.input],
-    outputs=[loss] + grads
-  )
+  def get_loss_and_grads(inputs):
+    inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)  # Ensure it's a tensor
+    with tf.GradientTape() as tape:
+        tape.watch(inputs)
+
+        # Calculate the total style loss
+        loss_value = 0
+        for symbolic, actual in zip(symbolic_conv_outputs, style_layers_outputs):
+            current_loss = style_loss(symbolic[0], actual[0])
+            print(f'Loss: {current_loss.numpy().astype(np.float64)}')
+            loss_value += current_loss
+    # Compute gradients
+    grads_value = tape.gradient(loss_value, inputs)
+    return loss_value, grads_value
 
 
   def get_loss_and_grads_wrapper(x_vec):
-    l, g = get_loss_and_grads([x_vec.reshape(*batch_shape)])
-    return l.astype(np.float64), g.flatten().astype(np.float64)
-
+    # Convert the 1-D array back to the appropriate tensor shape
+    x_tensor = tf.convert_to_tensor(x_vec.reshape(*batch_shape), dtype=tf.float32)
+    
+    # Get the loss and gradients
+    l, g = get_loss_and_grads(x_tensor)
+    
+    # Return the loss and the gradients as required by the optimizer
+    return l.numpy().astype(np.float64), g.numpy().flatten().astype(np.float64)
 
   final_img = minimize(get_loss_and_grads_wrapper, 10, batch_shape)
   plt.imshow(scale_img(final_img))
diff --git a/cnn_class2/style_transfer3.py b/cnn_class2/style_transfer3.py
index 8f383d1d..0891ec12 100644
--- a/cnn_class2/style_transfer3.py
+++ b/cnn_class2/style_transfer3.py
@@ -13,22 +13,23 @@
 # We accomplish this by balancing the content loss
 # and style loss simultaneously.
 
-from keras.layers import Input, Lambda, Dense, Flatten
-from keras.layers import AveragePooling2D, MaxPooling2D
-from keras.layers.convolutional import Conv2D
-from keras.models import Model, Sequential
-from keras.applications.vgg16 import VGG16
-from keras.applications.vgg16 import preprocess_input
-from keras.preprocessing import image
-from skimage.transform import resize
-
-import keras.backend as K
+from tensorflow.keras.layers import Layer #type: ignore #Input, Lambda, Dense, Flatten
+# from keras.layers import AveragePooling2D, MaxPooling2D
+# from keras.layers.convolutional import Conv2D
+from tensorflow.keras.models import Model #type: ignore
+# from keras.applications.vgg16 import VGG16
+from tensorflow.keras.applications.vgg16 import preprocess_input #type: ignore
+from tensorflow.keras.preprocessing import image #type: ignore
+#from skimage.transform import resize
+
+import tensorflow.keras.backend as K #type: ignore
+import tensorflow as tf
 import numpy as np
 import matplotlib.pyplot as plt
 
-from style_transfer1 import VGG16_AvgPool, VGG16_AvgPool_CutOff, unpreprocess, scale_img
-from style_transfer2 import gram_matrix, style_loss, minimize
-from scipy.optimize import fmin_l_bfgs_b
+from style_transfer1 import VGG16_AvgPool, scale_img
+from style_transfer2 import style_loss, minimize
+#from scipy.optimize import fmin_l_bfgs_b
 
 
 # load the content image
@@ -47,7 +48,7 @@ def load_img_and_preprocess(path, shape=None):
 content_img = load_img_and_preprocess(
   # '../large_files/caltech101/101_ObjectCategories/elephant/image_0002.jpg',
   # 'batman.jpg',
-  'content/sydney.jpg',
+  '.\\cnn_class2\\content\\sydney.jpg',
   # (225, 300),
 )
 
@@ -58,7 +59,7 @@ def load_img_and_preprocess(path, shape=None):
   # 'styles/starrynight.jpg',
   # 'styles/flowercarrier.jpg',
   # 'styles/monalisa.jpg',
-  'styles/lesdemoisellesdavignon.jpg',
+  '.\\cnn_class2\\styles\\lesdemoisellesdavignon.jpg',
   (h, w)
 )
 
@@ -78,16 +79,16 @@ def load_img_and_preprocess(path, shape=None):
 # we only want 1 output
 # remember you can call vgg.summary() to see a list of layers
 # 1,2,4,5,7-9,11-13,15-17
-content_model = Model(vgg.input, vgg.layers[13].get_output_at(0))
-content_target = K.variable(content_model.predict(content_img))
+content_model = Model(vgg.input, vgg.layers[13].output)
+content_target = tf.Variable(content_model.predict(content_img))
 
 
 # create the style model
 # we want multiple outputs
 # we will take the same approach as in style_transfer2.py
 symbolic_conv_outputs = [
-  layer.get_output_at(1) for layer in vgg.layers \
-  if layer.name.endswith('conv1')
+  vgg.get_layer(layer.name).output for layer in vgg.layers
+    if layer.name.endswith('conv1')
 ]
 
 # make a big model that outputs multiple layers' outputs
@@ -103,7 +104,22 @@ def load_img_and_preprocess(path, shape=None):
 
 
 # create the total loss which is the sum of content + style loss
-loss = K.mean(K.square(content_model.output - content_target))
+#loss = K.mean(K.square(content_model.output - content_target))
+
+class ContentLossLayer(Layer):
+    def __init__(self, content_target, **kwargs):
+        super(ContentLossLayer, self).__init__(**kwargs)
+        self.content_target = content_target
+
+    def call(self, inputs):
+        return tf.reduce_mean(tf.square(inputs - self.content_target))
+
+with tf.GradientTape() as tape:
+  # Instantiate the content loss layer
+  content_loss_layer = ContentLossLayer(content_target)
+
+  # Now compute the loss
+  loss = content_loss_layer(content_model.output)
 
 for w, symbolic, actual in zip(style_weights, symbolic_conv_outputs, style_layers_outputs):
   # gram_matrix() expects a (H, W, C) as input
@@ -113,7 +129,7 @@ def load_img_and_preprocess(path, shape=None):
 # once again, create the gradients and loss + grads function
 # note: it doesn't matter which model's input you use
 # they are both pointing to the same keras Input layer in memory
-grads = K.gradients(loss, vgg.input)
+grads = tape.gradient(loss, vgg.input)
 
 # just like theano.function
 get_loss_and_grads = K.function(
diff --git a/cnn_class2/tf_resnet.py b/cnn_class2/tf_resnet.py
index 4a3c5fa2..43129c9a 100644
--- a/cnn_class2/tf_resnet.py
+++ b/cnn_class2/tf_resnet.py
@@ -11,14 +11,14 @@
 # compared to keras
 import tensorflow as tf
 import numpy as np
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 import keras
 
-from keras.applications.resnet50 import ResNet50
-from keras.models import Model
-from keras.preprocessing import image
-from keras.layers import Dense
-from keras.applications.resnet50 import preprocess_input, decode_predictions
+from tensorflow.keras.applications.resnet50 import ResNet50
+from tensorflow.keras.models import Model
+#from tensorflow.keras.preprocessing import image
+from tensorflow.keras.layers import Dense
+#from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
 
 from tf_resnet_convblock import ConvLayer, BatchNormLayer, ConvBlock
 from tf_resnet_identity_block import IdentityBlock
diff --git a/cnn_class2/tf_resnet_convblock.py b/cnn_class2/tf_resnet_convblock.py
index 397f160b..5619bf67 100644
--- a/cnn_class2/tf_resnet_convblock.py
+++ b/cnn_class2/tf_resnet_convblock.py
@@ -17,6 +17,7 @@ def init_filter(d, mi, mo, stride):
 
 class ConvLayer:
   def __init__(self, d, mi, mo, stride=2, padding='VALID'):
+    super().__init__()
     self.W = tf.Variable(init_filter(d, mi, mo, stride))
     self.b = tf.Variable(np.zeros(mo, dtype=np.float32))
     self.stride = stride
@@ -50,6 +51,7 @@ def get_params(self):
 
 class BatchNormLayer:
   def __init__(self, D):
+    super().__init__()
     self.running_mean = tf.Variable(np.zeros(D, dtype=np.float32), trainable=False)
     self.running_var  = tf.Variable(np.ones(D, dtype=np.float32), trainable=False)
     self.gamma        = tf.Variable(np.ones(D, dtype=np.float32))
@@ -82,6 +84,7 @@ def get_params(self):
 
 class ConvBlock:
   def __init__(self, mi, fm_sizes, stride=2, activation=tf.nn.relu):
+    super().__init__()
     # conv1, conv2, conv3
     # note: # feature maps shortcut = # feauture maps conv 3
     assert(len(fm_sizes) == 3)
diff --git a/cnn_class2/tf_resnet_first_layers.py b/cnn_class2/tf_resnet_first_layers.py
index 9157b65c..188ee731 100644
--- a/cnn_class2/tf_resnet_first_layers.py
+++ b/cnn_class2/tf_resnet_first_layers.py
@@ -11,13 +11,13 @@
 # compared to keras
 import tensorflow as tf
 import numpy as np
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 import keras
 
-from keras.applications.resnet50 import ResNet50
-from keras.models import Model
-from keras.preprocessing import image
-from keras.applications.resnet50 import preprocess_input, decode_predictions
+from tensorflow.keras.applications.resnet50 import ResNet50
+from tensorflow.keras.models import Model
+#from keras.preprocessing import image
+#from keras.applications.resnet50 import preprocess_input, decode_predictions
 
 from tf_resnet_convblock import ConvLayer, BatchNormLayer, ConvBlock
 
@@ -53,6 +53,7 @@ def get_params(self):
 
 class MaxPoolLayer:
   def __init__(self, dim):
+    super().__init__()
     self.dim = dim
 
   def forward(self, X):
@@ -68,6 +69,7 @@ def get_params(self):
 
 class PartialResNet:
   def __init__(self):
+    super().__init__()
     self.layers = [
       # before conv block
       ConvLayer(d=7, mi=3, mo=64, stride=2, padding='SAME'),
diff --git a/cnn_class2/tf_resnet_identity_block.py b/cnn_class2/tf_resnet_identity_block.py
index 3e30d30c..b8569f9c 100644
--- a/cnn_class2/tf_resnet_identity_block.py
+++ b/cnn_class2/tf_resnet_identity_block.py
@@ -8,13 +8,14 @@
 
 import tensorflow as tf
 import numpy as np
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 
 from tf_resnet_convblock import ConvLayer, BatchNormLayer
 
 
 class IdentityBlock:
   def __init__(self, mi, fm_sizes, activation=tf.nn.relu):
+    super().__init__()
     # conv1, conv2, conv3
     # note: # feature maps shortcut = # feauture maps conv 3
     assert(len(fm_sizes) == 3)
diff --git a/cnn_class2/use_pretrained_weights_resnet.py b/cnn_class2/use_pretrained_weights_resnet.py
index 8f3aae71..41ffacce 100644
--- a/cnn_class2/use_pretrained_weights_resnet.py
+++ b/cnn_class2/use_pretrained_weights_resnet.py
@@ -6,12 +6,12 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.layers import Input, Lambda, Dense, Flatten
-from keras.models import Model
-from keras.applications.resnet import ResNet50, preprocess_input
+from tensorflow.keras.layers import Dense, Flatten
+from tensorflow.keras.models import Model
+from tensorflow.keras.applications.resnet import ResNet50, preprocess_input
 # from keras.applications.inception_v3 import InceptionV3, preprocess_input
-from keras.preprocessing import image
-from keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.preprocessing import image
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
 
 from sklearn.metrics import confusion_matrix
 import numpy as np
@@ -32,17 +32,17 @@
 # valid_path = '../large_files/blood_cell_images/TEST'
 
 # https://www.kaggle.com/moltean/fruits
-# train_path = '../large_files/fruits-360/Training'
-# valid_path = '../large_files/fruits-360/Validation'
-train_path = '../large_files/fruits-360-small/Training'
-valid_path = '../large_files/fruits-360-small/Validation'
+train_path = '.\\large_files\\fruits-360\\Training'
+valid_path = '.\\large_files\\fruits-360\\Validation'
+#train_path = '.\\large_files\\fruits-360-small\\Training'
+#valid_path = '.\\large_files\\fruits-360-small\\Validation'
 
 # useful for getting number of files
-image_files = glob(train_path + '/*/*.jp*g')
-valid_image_files = glob(valid_path + '/*/*.jp*g')
+image_files = glob(train_path + '\\*\\*.jp*g')
+valid_image_files = glob(valid_path + '\\*\\*.jp*g')
 
 # useful for getting number of classes
-folders = glob(train_path + '/*')
+folders = glob(train_path + '\\*')
 
 
 # look at an image for fun
diff --git a/cnn_class2/use_pretrained_weights_vgg.py b/cnn_class2/use_pretrained_weights_vgg.py
index 849dd9f6..0338507e 100644
--- a/cnn_class2/use_pretrained_weights_vgg.py
+++ b/cnn_class2/use_pretrained_weights_vgg.py
@@ -1,16 +1,15 @@
 # https://deeplearningcourses.com/c/advanced-computer-vision
 # https://www.udemy.com/advanced-computer-vision
 from __future__ import print_function, division
-from builtins import range, input
+#from builtins import range, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.layers import Input, Lambda, Dense, Flatten
-from keras.models import Model
-from keras.applications.vgg16 import VGG16
-from keras.applications.vgg16 import preprocess_input
-from keras.preprocessing import image
-from keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.layers import Dense, Flatten
+from tensorflow.keras.models import Model
+from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
+from tensorflow.keras.preprocessing import image
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
 
 from sklearn.metrics import confusion_matrix
 import numpy as np
@@ -31,17 +30,17 @@
 # valid_path = '../large_files/blood_cell_images/TEST'
 
 # https://www.kaggle.com/moltean/fruits
-train_path = '../large_files/fruits-360/Training'
-valid_path = '../large_files/fruits-360/Validation'
-# train_path = '../large_files/fruits-360-small/Training'
-# valid_path = '../large_files/fruits-360-small/Validation'
+#train_path = './large_files/fruits-360/Training'
+#valid_path = '../large_files/fruits-360/Validation'
+train_path = '.\\large_files\\fruits-360-small\\Training'
+valid_path = '.\\large_files\\fruits-360-small\\Validation'
 
-# useful for getting number of files
-image_files = glob(train_path + '/*/*.jp*g')
-valid_image_files = glob(valid_path + '/*/*.jp*g')
+# useful for getting number of file
+image_files = glob(train_path + '\\*\\*.jp*g')
+valid_image_files = glob(valid_path + '\\*\\*.jp*g')
 
 # useful for getting number of classes
-folders = glob(train_path + '/*')
+folders = glob(train_path + '\\*')
 
 
 # look at an image for fun
diff --git a/keras_examples/ann.py b/keras_examples/ann.py
index 08636b15..857fd95c 100644
--- a/keras_examples/ann.py
+++ b/keras_examples/ann.py
@@ -7,8 +7,8 @@
 import matplotlib.pyplot as plt
 
 from util import getKaggleMNIST
-from keras.models import Model
-from keras.layers import Dense, Activation, Input
+from tensorflow.keras.models import Model # type: ignore
+from tensorflow.keras.layers import Dense, Input # type: ignore
 
 
 # get the data
@@ -58,8 +58,8 @@
 plt.show()
 
 # accuracies
-plt.plot(r.history['acc'], label='acc')
-plt.plot(r.history['val_acc'], label='val_acc')
+plt.plot(r.history['accuracy'], label='acc')
+plt.plot(r.history['val_accuracy'], label='val_acc')
 plt.legend()
 plt.show()
 
diff --git a/keras_examples/cnn.py b/keras_examples/cnn.py
index 088cc5b2..f0eee61f 100644
--- a/keras_examples/cnn.py
+++ b/keras_examples/cnn.py
@@ -5,14 +5,14 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.models import Model
-from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Input
+from tensorflow.keras.models import Model # type: ignore
+from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Input # type: ignore
 
 import matplotlib.pyplot as plt
 import pandas as pd
 import numpy as np
 
-from util import getKaggleMNIST3D, getKaggleFashionMNIST3D, getCIFAR10
+from util import getKaggleFashionMNIST3D
 
 
 # get the data
@@ -73,8 +73,8 @@
 plt.show()
 
 # accuracies
-plt.plot(r.history['acc'], label='acc')
-plt.plot(r.history['val_acc'], label='val_acc')
+plt.plot(r.history['accuracy'], label='acc')
+plt.plot(r.history['val_accuracy'], label='val_acc')
 plt.legend()
 plt.show()
 
diff --git a/keras_examples/cnn_dropout_batchnorm.py b/keras_examples/cnn_dropout_batchnorm.py
index f89cd37d..44e89f72 100644
--- a/keras_examples/cnn_dropout_batchnorm.py
+++ b/keras_examples/cnn_dropout_batchnorm.py
@@ -5,14 +5,14 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-from keras.models import Sequential, Model
-from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, Input
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, Input
 
 import matplotlib.pyplot as plt
-import pandas as pd
-import numpy as np
+#import pandas as pd
+#import numpy as np
 
-from util import getKaggleMNIST3D, getKaggleFashionMNIST3D, getCIFAR10
+from util import getKaggleFashionMNIST3D
 
 
 # get the data
@@ -76,8 +76,8 @@
 plt.show()
 
 # accuracies
-plt.plot(r.history['acc'], label='acc')
-plt.plot(r.history['val_acc'], label='val_acc')
+plt.plot(r.history['accuracy'], label='acc')
+plt.plot(r.history['val_accuracy'], label='val_acc')
 plt.legend()
 plt.show()
 
diff --git a/keras_examples/util.py b/keras_examples/util.py
index 2e3af106..22fa6832 100644
--- a/keras_examples/util.py
+++ b/keras_examples/util.py
@@ -12,12 +12,12 @@
 
 def getKaggleMNIST():
     # https://www.kaggle.com/c/digit-recognizer
-    return getMNISTFormat('../large_files/train.csv')
+    return getMNISTFormat('.\\large_files\\digit-recognizer\\train.csv')
 
 
 def getKaggleFashionMNIST():
     # https://www.kaggle.com/zalando-research/fashionmnist
-    return getMNISTFormat('../large_files/fashionmnist/fashion-mnist_train.csv')
+    return getMNISTFormat('.\\large_files\\fashionmnist\\fashion-mnist_train.csv')
 
 def getMNISTFormat(path):
     # MNIST data:
diff --git a/nlp_class2/bow_classifier.py b/nlp_class2/bow_classifier.py
index 25588e3b..70efdde6 100644
--- a/nlp_class2/bow_classifier.py
+++ b/nlp_class2/bow_classifier.py
@@ -32,7 +32,7 @@ def __init__(self):
     word2vec = {}
     embedding = []
     idx2word = []
-    with open('../large_files/glove.6B/glove.6B.50d.txt') as f:
+    with open('../large_files/glove.6B/glove.6B.50d.txt', encoding='utf-8') as f:
       # is just a space-separated text file in the format:
       # word vec[0] vec[1] vec[2] ...
       for line in f:
diff --git a/nlp_class2/cc_matrix_50.npy b/nlp_class2/cc_matrix_50.npy
new file mode 100644
index 00000000..508d3a6b
Binary files /dev/null and b/nlp_class2/cc_matrix_50.npy differ
diff --git a/nlp_class2/glove.py b/nlp_class2/glove.py
index b46c13f2..fdd9e6fa 100644
--- a/nlp_class2/glove.py
+++ b/nlp_class2/glove.py
@@ -20,7 +20,7 @@
 import sys
 sys.path.append(os.path.abspath('..'))
 from rnn_class.util import get_wikipedia_data
-from rnn_class.brown import get_sentences_with_word2idx_limit_vocab, get_sentences_with_word2idx
+from rnn_class.brown import get_sentences_with_word2idx_limit_vocab
 
 # using ALS, what's the least # files to get correct analogies?
 # use this for word2vec training to make it faster
@@ -120,7 +120,7 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
 
 
         costs = []
-        sentence_indexes = range(len(sentences))
+        #sentence_indexes = range(len(sentences))
         for epoch in range(epochs):
             delta = W.dot(U.T) + b.reshape(V, 1) + c.reshape(1, V) + mu - logX
             cost = ( fX * delta * delta ).sum()
diff --git a/nlp_class2/glove_model_50.npz b/nlp_class2/glove_model_50.npz
new file mode 100644
index 00000000..56e47511
Binary files /dev/null and b/nlp_class2/glove_model_50.npz differ
diff --git a/nlp_class2/glove_svd.py b/nlp_class2/glove_svd.py
index a0fd3c0a..a8db4f92 100644
--- a/nlp_class2/glove_svd.py
+++ b/nlp_class2/glove_svd.py
@@ -14,14 +14,13 @@
 from sklearn.decomposition import TruncatedSVD
 
 from datetime import datetime
-from sklearn.utils import shuffle
 from util import find_analogies
 
 
 import sys
 sys.path.append(os.path.abspath('..'))
 from rnn_class.util import get_wikipedia_data
-from rnn_class.brown import get_sentences_with_word2idx_limit_vocab, get_sentences_with_word2idx
+from rnn_class.brown import get_sentences_with_word2idx_limit_vocab
 
 
 class Glove:
diff --git a/nlp_class2/glove_tf.py b/nlp_class2/glove_tf.py
index 9db18bb4..8986ad94 100644
--- a/nlp_class2/glove_tf.py
+++ b/nlp_class2/glove_tf.py
@@ -14,13 +14,13 @@
 import matplotlib.pyplot as plt
 
 from datetime import datetime
-from sklearn.utils import shuffle
+#from sklearn.utils import shuffle
 from util import find_analogies
 
 import sys
 sys.path.append(os.path.abspath('..'))
 from rnn_class.util import get_wikipedia_data
-from rnn_class.brown import get_sentences_with_word2idx_limit_vocab, get_sentences_with_word2idx
+from rnn_class.brown import get_sentences_with_word2idx_limit_vocab
 
 if tf.__version__.startswith('2'):
     tf.compat.v1.disable_eager_execution()
@@ -141,7 +141,7 @@ def fit(self, sentences, cc_matrix=None, learning_rate=1e-4, reg=0.1, xmax=100,
         session.run(init)
 
         costs = []
-        sentence_indexes = range(len(sentences))
+        #sentence_indexes = range(len(sentences))
         for epoch in range(epochs):
             c, _ = session.run((cost, train_op), feed_dict={tfLogX: logX, tffX: fX})
             print("epoch:", epoch, "cost:", c)
@@ -190,7 +190,7 @@ def main(we_file, w2i_file, use_brown=True, n_files=50):
 
     V = len(word2idx)
     model = Glove(100, V, 10)
-    model.fit(sentences, cc_matrix=cc_matrix, epochs=200)
+    model.fit(sentences, cc_matrix=cc_matrix, epochs=10000)
     model.save(we_file)
 
 
diff --git a/nlp_class2/glove_word2idx_50.json b/nlp_class2/glove_word2idx_50.json
new file mode 100644
index 00000000..a3142b0f
--- /dev/null
+++ b/nlp_class2/glove_word2idx_50.json
@@ -0,0 +1 @@
+{"START": 0, "END": 1, "the": 2, "of": 3, "and": 4, "in": 5, "to": 6, "a": 7, "as": 8, "is": 9, "was": 10, "for": 11, "that": 12, "by": 13, "with": 14, "on": 15, "from": 16, "his": 17, "are": 18, "it": 19, "an": 20, "at": 21, "he": 22, "or": 23, "which": 24, "be": 25, "were": 26, "this": 27, "not": 28, "have": 29, "also": 30, "had": 31, "their": 32, "has": 33, "its": 34, "but": 35, "one": 36, "first": 37, "other": 38, "they": 39, "been": 40, "such": 41, "after": 42, "who": 43, "more": 44, "new": 45, "some": 46, "most": 47, "used": 48, "can": 49, "into": 50, "two": 51, "all": 52, "when": 53, "during": 54, "there": 55, "these": 56, "may": 57, "many": 58, "than": 59, "time": 60, "between": 61, "would": 62, "only": 63, "over": 64, "while": 65, "states": 66, "about": 67, "years": 68, "world": 69, "her": 70, "later": 71, "known": 72, "no": 73, "use": 74, "war": 75, "people": 76, "however": 77, "both": 78, "including": 79, "united": 80, "where": 81, "made": 82, "became": 83, "him": 84, "being": 85, "city": 86, "american": 87, "under": 88, "through": 89, "century": 90, "called": 91, "early": 92, "state": 93, "since": 94, "them": 95, "system": 96, "then": 97, "three": 98, "up": 99, "government": 100, "part": 101, "number": 102, "if": 103, "out": 104, "well": 105, "often": 106, "several": 107, "because": 108, "any": 109, "work": 110, "before": 111, "i": 112, "national": 113, "she": 114, "so": 115, "against": 116, "each": 117, "could": 118, "same": 119, "year": 120, "us": 121, "film": 122, "although": 123, "until": 124, "found": 125, "second": 126, "form": 127, "according": 128, "following": 129, "example": 130, "will": 131, "around": 132, "british": 133, "include": 134, "like": 135, "name": 136, "those": 137, "different": 138, "due": 139, "did": 140, "english": 141, "among": 142, "began": 143, "major": 144, "within": 145, "another": 146, "life": 147, "large": 148, "high": 149, "based": 150, "french": 151, "series": 152, "even": 153, "language": 154, "general": 155, "group": 156, "international": 157, "much": 158, "using": 159, "population": 160, "north": 161, "power": 162, "music": 163, "south": 164, "modern": 165, "set": 166, "four": 167, "end": 168, "country": 169, "period": 170, "common": 171, "political": 172, "public": 173, "area": 174, "university": 175, "military": 176, "million": 177, "own": 178, "led": 179, "german": 180, "members": 181, "now": 182, "death": 183, "what": 184, "\u2013": 185, "1": 186, "church": 187, "history": 188, "very": 189, "party": 190, "de": 191, "still": 192, "john": 193, "great": 194, "considered": 195, "said": 196, "law": 197, "european": 198, "small": 199, "book": 200, "order": 201, "published": 202, "king": 203, "do": 204, "late": 205, "day": 206, "development": 207, "family": 208, "support": 209, "president": 210, "water": 211, "important": 212, "various": 213, "along": 214, "without": 215, "central": 216, "categories": 217, "developed": 218, "though": 219, "school": 220, "countries": 221, "control": 222, "east": 223, "human": 224, "army": 225, "west": 226, "took": 227, "place": 228, "long": 229, "term": 230, "wrote": 231, "home": 232, "included": 233, "become": 234, "times": 235, "game": 236, "established": 237, "main": 238, "given": 239, "way": 240, "local": 241, "island": 242, "theory": 243, "last": 244, "union": 245, "house": 246, "usually": 247, "age": 248, "similar": 249, "europe": 250, "held": 251, "make": 252, "force": 253, "western": 254, "back": 255, "production": 256, "ii": 257, "left": 258, "systems": 259, "less": 260, "company": 261, "air": 262, "released": 263, "popular": 264, "forces": 265, "social": 266, "roman": 267, "having": 268, "old": 269, "others": 270, "named": 271, "economic": 272, "further": 273, "groups": 274, "empire": 275, "films": 276, "original": 277, "result": 278, "region": 279, "few": 280, "thus": 281, "largest": 282, "point": 283, "role": 284, "court": 285, "case": 286, "former": 287, "described": 288, "team": 289, "march": 290, "works": 291, "written": 292, "land": 293, "five": 294, "process": 295, "service": 296, "languages": 297, "january": 298, "areas": 299, "river": 300, "produced": 301, "per": 302, "single": 303, "games": 304, "research": 305, "june": 306, "july": 307, "sometimes": 308, "december": 309, "came": 310, "2": 311, "rather": 312, "created": 313, "october": 314, "line": 315, "women": 316, "data": 317, "field": 318, "generally": 319, "does": 320, "continued": 321, "down": 322, "york": 323, "september": 324, "islands": 325, "received": 326, "how": 327, "should": 328, "black": 329, "france": 330, "england": 331, "played": 332, "greek": 333, "november": 334, "either": 335, "must": 336, "germany": 337, "show": 338, "april": 339, "god": 340, "best": 341, "season": 342, "total": 343, "species": 344, "see": 345, "third": 346, "kingdom": 347, "council": 348, "soviet": 349, "especially": 350, "just": 351, "science": 352, "art": 353, "study": 354, "word": 355, "10": 356, "member": 357, "august": 358, "every": 359, "free": 360, "days": 361, "light": 362, "instead": 363, "won": 364, "body": 365, "act": 366, "sea": 367, "trade": 368, "son": 369, "space": 370, "throughout": 371, "men": 372, "died": 373, "children": 374, "society": 375, "near": 376, "foreign": 377, "we": 378, "london": 379, "significant": 380, "information": 381, "version": 382, "built": 383, "energy": 384, "take": 385, "possible": 386, "northern": 387, "standard": 388, "ancient": 389, "christian": 390, "white": 391, "next": 392, "final": 393, "natural": 394, "despite": 395, "himself": 396, "addition": 397, "again": 398, "league": 399, "introduced": 400, "bc": 401, "design": 402, "upon": 403, "man": 404, "making": 405, "never": 406, "rights": 407, "movement": 408, "right": 409, "position": 410, "least": 411, "eastern": 412, "february": 413, "india": 414, "seen": 415, "traditional": 416, "battle": 417, "southern": 418, "change": 419, "education": 420, "parts": 421, "religious": 422, "terms": 423, "play": 424, "formed": 425, "followed": 426, "almost": 427, "america": 428, "influence": 429, "china": 430, "republic": 431, "once": 432, "together": 433, "3": 434, "father": 435, "culture": 436, "royal": 437, "across": 438, "evidence": 439, "television": 440, "six": 441, "chinese": 442, "civil": 443, "higher": 444, "forms": 445, "little": 446, "off": 447, "20": 448, "elements": 449, "certain": 450, "middle": 451, "office": 452, "community": 453, "level": 454, "red": 455, "means": 456, "range": 457, "available": 458, "type": 459, "increased": 460, "lost": 461, "album": 462, "official": 463, "side": 464, "15": 465, "program": 466, "particularly": 467, "numbers": 468, "short": 469, "remained": 470, "young": 471, "itself": 472, "born": 473, "above": 474, "band": 475, "computer": 476, "lower": 477, "special": 478, "present": 479, "nations": 480, "record": 481, "model": 482, "associated": 483, "head": 484, "rule": 485, "thought": 486, "2010": 487, "earth": 488, "particular": 489, "eventually": 490, "low": 491, "latin": 492, "center": 493, "rate": 494, "japanese": 495, "jewish": 496, "college": 497, "good": 498, "services": 499, "words": 500, "minister": 501, "capital": 502, "whose": 503, "2011": 504, "2020": 505, "writers": 506, "character": 507, "leading": 508, "you": 509, "story": 510, "believed": 511, "4": 512, "cities": 513, "5": 514, "announced": 515, "referred": 516, "2021": 517, "allowed": 518, "taken": 519, "located": 520, "building": 521, "went": 522, "typically": 523, "structure": 524, "2022": 525, "moved": 526, "beginning": 527, "africa": 528, "male": 529, "months": 530, "market": 531, "spanish": 532, "meaning": 533, "provided": 534, "source": 535, "food": 536, "12": 537, "writing": 538, "nature": 539, "industry": 540, "st": 541, "living": 542, "project": 543, "2023": 544, "function": 545, "live": 546, "italian": 547, "half": 548, "able": 549, "cases": 550, "effect": 551, "gave": 552, "provide": 553, "style": 554, "current": 555, "appeared": 556, "required": 557, "top": 558, "2008": 559, "served": 560, "radio": 561, "health": 562, "saw": 563, "started": 564, "value": 565, "title": 566, "related": 567, "election": 568, "economy": 569, "2012": 570, "aircraft": 571, "books": 572, "town": 573, "includes": 574, "strong": 575, "william": 576, "companies": 577, "lead": 578, "open": 579, "stated": 580, "average": 581, "2000": 582, "network": 583, "events": 584, "emperor": 585, "today": 586, "c": 587, "players": 588, "majority": 589, "far": 590, "outside": 591, "policy": 592, "full": 593, "view": 594, "2009": 595, "mass": 596, "complex": 597, "30": 598, "return": 599, "returned": 600, "sent": 601, "increase": 602, "2007": 603, "independent": 604, "working": 605, "person": 606, "practice": 607, "limited": 608, "rock": 609, "2019": 610, "legal": 611, "2015": 612, "russian": 613, "brought": 614, "founded": 615, "caused": 616, "features": 617, "close": 618, "individual": 619, "private": 620, "technology": 621, "characters": 622, "earlier": 623, "reported": 624, "size": 625, "indian": 626, "james": 627, "whether": 628, "business": 629, "award": 630, "might": 631, "catholic": 632, "2014": 633, "designed": 634, "class": 635, "cultural": 636, "material": 637, "2016": 638, "commonly": 639, "changes": 640, "primary": 641, "action": 642, "recorded": 643, "proposed": 644, "growth": 645, "2017": 646, "therefore": 647, "studies": 648, "types": 649, "schools": 650, "widely": 651, "japan": 652, "specific": 653, "larger": 654, "too": 655, "2013": 656, "2018": 657, "prime": 658, "cause": 659, "code": 660, "themselves": 661, "subject": 662, "mostly": 663, "african": 664, "2006": 665, "charles": 666, "interest": 667, "historical": 668, "surface": 669, "club": 670, "territory": 671, "run": 672, "11": 673, "video": 674, "base": 675, "25": 676, "media": 677, "seven": 678, "uses": 679, "physical": 680, "software": 681, "effects": 682, "students": 683, "canada": 684, "success": 685, "greater": 686, "parliament": 687, "originally": 688, "performance": 689, "names": 690, "help": 691, "away": 692, "always": 693, "defined": 694, "list": 695, "future": 696, "100": 697, "produce": 698, "likely": 699, "italy": 700, "approximately": 701, "sound": 702, "federal": 703, "county": 704, "replaced": 705, "6": 706, "david": 707, "billion": 708, "song": 709, "recent": 710, "coast": 711, "key": 712, "involved": 713, "added": 714, "george": 715, "release": 716, "mother": 717, "b": 718, "elected": 719, "via": 720, "eg": 721, "personal": 722, "below": 723, "conditions": 724, "regions": 725, "security": 726, "construction": 727, "2005": 728, "sources": 729, "numerous": 730, "concept": 731, "britain": 732, "come": 733, "division": 734, "records": 735, "attack": 736, "soon": 737, "19th": 738, "idea": 739, "killed": 740, "access": 741, "uk": 742, "longer": 743, "largely": 744, "hand": 745, "fact": 746, "successful": 747, "supported": 748, "star": 749, "remains": 750, "units": 751, "lines": 752, "real": 753, "park": 754, "love": 755, "site": 756, "my": 757, "directly": 758, "create": 759, "results": 760, "scholars": 761, "multiple": 762, "adopted": 763, "metal": 764, "already": 765, "henry": 766, "classical": 767, "association": 768, "reached": 769, "commercial": 770, "put": 771, "method": 772, "variety": 773, "whom": 774, "20th": 775, "independence": 776, "problems": 777, "towards": 778, "direct": 779, "football": 780, "native": 781, "wife": 782, "sold": 783, "need": 784, "la": 785, "tradition": 786, "points": 787, "initially": 788, "8": 789, "era": 790, "claimed": 791, "organization": 792, "medical": 793, "authority": 794, "18": 795, "shows": 796, "products": 797, "scientific": 798, "16": 799, "asia": 800, "14": 801, "financial": 802, "relationship": 803, "separate": 804, "centre": 805, "smaller": 806, "worked": 807, "laws": 808, "additional": 809, "performed": 810, "oil": 811, "relations": 812, "letter": 813, "estimated": 814, "fire": 815, "global": 816, "lake": 817, "highest": 818, "artists": 819, "leader": 820, "australia": 821, "peoples": 822, "musical": 823, "cells": 824, "problem": 825, "operations": 826, "report": 827, "discovered": 828, "7": 829, "met": 830, "2001": 831, "career": 832, "event": 833, "response": 834, "status": 835, "centuries": 836, "previous": 837, "methods": 838, "knowledge": 839, "active": 840, "compared": 841, "nearly": 842, "primarily": 843, "2024": 844, "levels": 845, "jews": 846, "pressure": 847, "robert": 848, "highly": 849, "complete": 850, "california": 851, "married": 852, "examples": 853, "gas": 854, "rules": 855, "allow": 856, "2004": 857, "gold": 858, "street": 859, "religion": 860, "campaign": 861, "basis": 862, "true": 863, "treaty": 864, "player": 865, "stage": 866, "placed": 867, "13": 868, "novel": 869, "bank": 870, "give": 871, "cell": 872, "ten": 873, "constitution": 874, "mainly": 875, "committee": 876, "joined": 877, "revolution": 878, "changed": 879, "influenced": 880, "parties": 881, "road": 882, "internet": 883, "agreement": 884, "argued": 885, "divided": 886, "museum": 887, "memory": 888, "better": 889, "academy": 890, "cannot": 891, "worlds": 892, "board": 893, "front": 894, "station": 895, "teams": 896, "philosophy": 897, "spain": 898, "heavy": 899, "individuals": 900, "unit": 901, "percent": 902, "basic": 903, "experience": 904, "our": 905, "congress": 906, "police": 907, "entire": 908, "training": 909, "literature": 910, "israel": 911, "rest": 912, "irish": 913, "grand": 914, "chemical": 915, "arts": 916, "suggested": 917, "money": 918, "troops": 919, "50": 920, "ever": 921, "24": 922, "female": 923, "songs": 924, "paul": 925, "text": 926, "yet": 927, "lack": 928, "relatively": 929, "color": 930, "taking": 931, "appointed": 932, "night": 933, "deaths": 934, "x": 935, "past": 936, "letters": 937, "blue": 938, "latter": 939, "eight": 940, "contains": 941, "shown": 942, "previously": 943, "analysis": 944, "2003": 945, "ended": 946, "animals": 947, "fiction": 948, "ground": 949, "account": 950, "turn": 951, "signed": 952, "marriage": 953, "go": 954, "issues": 955, "potential": 956, "resulting": 957, "whole": 958, "enough": 959, "summer": 960, "opened": 961, "appear": 962, "noted": 963, "decided": 964, "rome": 965, "peace": 966, "attempt": 967, "forced": 968, "issue": 969, "director": 970, "imperial": 971, "daughter": 972, "done": 973, "plan": 974, "paris": 975, "department": 976, "completed": 977, "prior": 978, "1980s": 979, "climate": 980, "temperature": 981, "famous": 982, "test": 983, "ireland": 984, "nuclear": 985, "regional": 986, "accepted": 987, "sense": 988, "thomas": 989, "collection": 990, "1970s": 991, "programs": 992, "powers": 993, "operation": 994, "administration": 995, "russia": 996, "property": 997, "17": 998, "1999": 999, "v": 1000, "green": 1001, "blood": 1002, "origin": 1003, "applied": 1004, "stories": 1005, "treatment": 1006, "ad": 1007, "intended": 1008, "approach": 1009, "length": 1010, "rise": 1011, "passed": 1012, "birth": 1013, "ships": 1014, "move": 1015, "speed": 1016, "functions": 1017, "objects": 1018, "wide": 1019, "hall": 1020, "hours": 1021, "activity": 1022, "difficult": 1023, "probably": 1024, "child": 1025, "date": 1026, "amount": 1027, "2002": 1028, "matter": 1029, "article": 1030, "district": 1031, "brother": 1032, "behind": 1033, "ideas": 1034, "chief": 1035, "earliest": 1036, "navy": 1037, "exist": 1038, "degree": 1039, "machine": 1040, "opposition": 1041, "industrial": 1042, "democratic": 1043, "loss": 1044, "derived": 1045, "product": 1046, "canadian": 1047, "find": 1048, "creation": 1049, "reduced": 1050, "presence": 1051, "failed": 1052, "becoming": 1053, "simple": 1054, "properties": 1055, "makes": 1056, "m": 1057, "dutch": 1058, "get": 1059, "needed": 1060, "finally": 1061, "provides": 1062, "reference": 1063, "lived": 1064, "institute": 1065, "ability": 1066, "assembly": 1067, "annual": 1068, "me": 1069, "carried": 1070, "21": 1071, "humans": 1072, "notable": 1073, "contemporary": 1074, "declared": 1075, "ones": 1076, "represented": 1077, "playing": 1078, "cost": 1079, "composed": 1080, "appears": 1081, "washington": 1082, "1960s": 1083, "extended": 1084, "leaders": 1085, "frequently": 1086, "kings": 1087, "ie": 1088, "9": 1089, "transport": 1090, "elections": 1091, "armed": 1092, "resulted": 1093, "holy": 1094, "port": 1095, "border": 1096, "start": 1097, "increasing": 1098, "dna": 1099, "feature": 1100, "plants": 1101, "disease": 1102, "combined": 1103, "carbon": 1104, "featured": 1105, "element": 1106, "identified": 1107, "iron": 1108, "starting": 1109, "agreed": 1110, "older": 1111, "refer": 1112, "responsible": 1113, "existence": 1114, "stars": 1115, "necessary": 1116, "operating": 1117, "fall": 1118, "1998": 1119, "location": 1120, "commission": 1121, "materials": 1122, "louis": 1123, "1990s": 1124, "news": 1125, "san": 1126, "regular": 1127, "ice": 1128, "dead": 1129, "read": 1130, "except": 1131, "greatest": 1132, "mission": 1133, "internal": 1134, "1997": 1135, "conflict": 1136, "activities": 1137, "22": 1138, "spread": 1139, "e": 1140, "command": 1141, "opposed": 1142, "section": 1143, "offered": 1144, "professional": 1145, "province": 1146, "airport": 1147, "critical": 1148, "initial": 1149, "40": 1150, "reason": 1151, "efforts": 1152, "festival": 1153, "polish": 1154, "iii": 1155, "remain": 1156, "buildings": 1157, "places": 1158, "claims": 1159, "quickly": 1160, "respectively": 1161, "flight": 1162, "soldiers": 1163, "contrast": 1164, "fourth": 1165, "alternative": 1166, "launched": 1167, "occur": 1168, "management": 1169, "models": 1170, "mexico": 1171, "decision": 1172, "big": 1173, "cup": 1174, "woman": 1175, "plant": 1176, "consists": 1177, "prominent": 1178, "remaining": 1179, "definition": 1180, "presented": 1181, "exchange": 1182, "simply": 1183, "image": 1184, "asked": 1185, "applications": 1186, "claim": 1187, "currently": 1188, "clear": 1189, "wars": 1190, "paper": 1191, "workers": 1192, "believe": 1193, "australian": 1194, "risk": 1195, "continue": 1196, "1990": 1197, "egypt": 1198, "tour": 1199, "environment": 1200, "engineering": 1201, "engine": 1202, "digital": 1203, "channel": 1204, "techniques": 1205, "upper": 1206, "1996": 1207, "entered": 1208, "author": 1209, "poland": 1210, "urban": 1211, "muslim": 1212, "sports": 1213, "wanted": 1214, "values": 1215, "speech": 1216, "directed": 1217, "occurred": 1218, "sexual": 1219, "defeated": 1220, "meeting": 1221, "issued": 1222, "electric": 1223, "1992": 1224, "jesus": 1225, "allowing": 1226, "islamic": 1227, "allows": 1228, "contain": 1229, "mary": 1230, "freedom": 1231, "23": 1232, "1991": 1233, "expressed": 1234, "attacks": 1235, "object": 1236, "course": 1237, "defense": 1238, "acid": 1239, "communities": 1240, "library": 1241, "figure": 1242, "positive": 1243, "expected": 1244, "quality": 1245, "beyond": 1246, "scale": 1247, "alexander": 1248, "poor": 1249, "magazine": 1250, "race": 1251, "governor": 1252, "unlike": 1253, "richard": 1254, "observed": 1255, "here": 1256, "subsequently": 1257, "minor": 1258, "month": 1259, "growing": 1260, "historian": 1261, "edition": 1262, "turned": 1263, "treaties": 1264, "regarded": 1265, "things": 1266, "sun": 1267, "19": 1268, "organizations": 1269, "versions": 1270, "charge": 1271, "fully": 1272, "families": 1273, "spent": 1274, "structures": 1275, "focus": 1276, "moon": 1277, "medieval": 1278, "conference": 1279, "governments": 1280, "churches": 1281, "fields": 1282, "convention": 1283, "ocean": 1284, "lord": 1285, "1995": 1286, "dynasty": 1287, "mark": 1288, "hold": 1289, "effective": 1290, "d": 1291, "20thcentury": 1292, "institutions": 1293, "distance": 1294, "1994": 1295, "reign": 1296, "orthodox": 1297, "win": 1298, "subsequent": 1299, "recognized": 1300, "helped": 1301, "victory": 1302, "inspired": 1303, "ethnic": 1304, "distinct": 1305, "told": 1306, "formation": 1307, "share": 1308, "ways": 1309, "27": 1310, "n": 1311, "28": 1312, "ship": 1313, "standards": 1314, "impact": 1315, "formal": 1316, "expansion": 1317, "labour": 1318, "critics": 1319, "26": 1320, "direction": 1321, "los": 1322, "attempted": 1323, "prevent": 1324, "f": 1325, "figures": 1326, "notes": 1327, "bands": 1328, "address": 1329, "protection": 1330, "press": 1331, "appearance": 1332, "marked": 1333, "weapons": 1334, "officially": 1335, "instance": 1336, "serve": 1337, "resources": 1338, "content": 1339, "leaving": 1340, "gods": 1341, "friend": 1342, "countrys": 1343, "golden": 1344, "develop": 1345, "negative": 1346, "nation": 1347, "j": 1348, "refused": 1349, "valley": 1350, "showed": 1351, "equal": 1352, "motion": 1353, "factors": 1354, "vote": 1355, "decades": 1356, "stone": 1357, "refers": 1358, "acts": 1359, "heart": 1360, "prince": 1361, "citizens": 1362, "reaction": 1363, "call": 1364, "arrived": 1365, "removed": 1366, "literary": 1367, "grew": 1368, "bill": 1369, "s": 1370, "faith": 1371, "unique": 1372, "sector": 1373, "car": 1374, "income": 1375, "square": 1376, "saint": 1377, "winter": 1378, "1993": 1379, "gained": 1380, "animal": 1381, "writer": 1382, "table": 1383, "double": 1384, "friends": 1385, "invasion": 1386, "distribution": 1387, "communist": 1388, "executive": 1389, "sought": 1390, "giving": 1391, "mean": 1392, "ordered": 1393, "territories": 1394, "overall": 1395, "staff": 1396, "completely": 1397, "increasingly": 1398, "nine": 1399, "justice": 1400, "expanded": 1401, "christianity": 1402, "historians": 1403, "powerful": 1404, "awarded": 1405, "1989": 1406, "specifically": 1407, "foundation": 1408, "politics": 1409, "g": 1410, "americans": 1411, "keep": 1412, "containing": 1413, "hit": 1414, "peter": 1415, "p": 1416, "supreme": 1417, "studio": 1418, "immediately": 1419, "sites": 1420, "advanced": 1421, "inside": 1422, "takes": 1423, "competition": 1424, "notably": 1425, "railway": 1426, "actions": 1427, "actually": 1428, "normal": 1429, "cross": 1430, "theatre": 1431, "h": 1432, "secretary": 1433, "michael": 1434, "pacific": 1435, "r": 1436, "deal": 1437, "rates": 1438, "attention": 1439, "question": 1440, "apollo": 1441, "users": 1442, "significantly": 1443, "understanding": 1444, "student": 1445, "running": 1446, "spoken": 1447, "principle": 1448, "occurs": 1449, "weeks": 1450, "saying": 1451, "application": 1452, "write": 1453, "fish": 1454, "mentioned": 1455, "domestic": 1456, "pope": 1457, "leadership": 1458, "rejected": 1459, "raised": 1460, "cold": 1461, "possibly": 1462, "resistance": 1463, "creating": 1464, "extensive": 1465, "equipment": 1466, "whereas": 1467, "theories": 1468, "face": 1469, "hydrogen": 1470, "liberal": 1471, "worldwide": 1472, "oldest": 1473, "relative": 1474, "awards": 1475, "recently": 1476, "depending": 1477, "formula": 1478, "bay": 1479, "joseph": 1480, "identity": 1481, "planned": 1482, "cut": 1483, "brown": 1484, "tax": 1485, "determined": 1486, "plays": 1487, "branch": 1488, "describes": 1489, "authors": 1490, "von": 1491, "windows": 1492, "generation": 1493, "sets": 1494, "crisis": 1495, "mathematics": 1496, "chicago": 1497, "medicine": 1498, "moving": 1499, "hard": 1500, "situation": 1501, "differences": 1502, "cycle": 1503, "processes": 1504, "queen": 1505, "goal": 1506, "belief": 1507, "arab": 1508, "travel": 1509, "volume": 1510, "studied": 1511, "perhaps": 1512, "ultimately": 1513, "tried": 1514, "follows": 1515, "reduce": 1516, "require": 1517, "plans": 1518, "scotland": 1519, "policies": 1520, "kept": 1521, "difference": 1522, "importance": 1523, "stations": 1524, "scientists": 1525, "destroyed": 1526, "devices": 1527, "cover": 1528, "phase": 1529, "texts": 1530, "greece": 1531, "heat": 1532, "context": 1533, "census": 1534, "closed": 1535, "labor": 1536, "granted": 1537, "purpose": 1538, "shared": 1539, "mountains": 1540, "connected": 1541, "indigenous": 1542, "aid": 1543, "equivalent": 1544, "programming": 1545, "arms": 1546, "fell": 1547, "temple": 1548, "intelligence": 1549, "dance": 1550, "bce": 1551, "martin": 1552, "existing": 1553, "meant": 1554, "settlement": 1555, "gives": 1556, "something": 1557, "conservative": 1558, "christ": 1559, "say": 1560, "shot": 1561, "controlled": 1562, "avoid": 1563, "ruled": 1564, "mind": 1565, "architecture": 1566, "regarding": 1567, "deep": 1568, "instruments": 1569, "attempts": 1570, "causes": 1571, "represent": 1572, "electronic": 1573, "communication": 1574, "reach": 1575, "presidential": 1576, "review": 1577, "core": 1578, "etc": 1579, "tv": 1580, "projects": 1581, "proved": 1582, "behavior": 1583, "prize": 1584, "officers": 1585, "price": 1586, "comes": 1587, "actors": 1588, "care": 1589, "says": 1590, "closely": 1591, "1986": 1592, "achieved": 1593, "week": 1594, "flow": 1595, "shortly": 1596, "describe": 1597, "learning": 1598, "universe": 1599, "solution": 1600, "bodies": 1601, "bridge": 1602, "widespread": 1603, "1984": 1604, "conducted": 1605, "views": 1606, "universal": 1607, "toward": 1608, "parents": 1609, "1945": 1610, "reform": 1611, "felt": 1612, "opening": 1613, "kind": 1614, "1980": 1615, "reasons": 1616, "influential": 1617, "environmental": 1618, "fighting": 1619, "christians": 1620, "going": 1621, "captured": 1622, "supply": 1623, "fuel": 1624, "suggests": 1625, "1979": 1626, "31": 1627, "daily": 1628, "29": 1629, "winning": 1630, "1975": 1631, "academic": 1632, "portuguese": 1633, "1985": 1634, "match": 1635, "200": 1636, "crew": 1637, "offer": 1638, "reports": 1639, "nor": 1640, "gdp": 1641, "angeles": 1642, "principles": 1643, "developing": 1644, "capacity": 1645, "providing": 1646, "visited": 1647, "sciences": 1648, "authorities": 1649, "historically": 1650, "constant": 1651, "serious": 1652, "computers": 1653, "episode": 1654, "unknown": 1655, "pass": 1656, "combination": 1657, "van": 1658, "1950s": 1659, "mountain": 1660, "heavily": 1661, "championship": 1662, "weight": 1663, "articles": 1664, "traditionally": 1665, "mathematical": 1666, "pay": 1667, "alongside": 1668, "failure": 1669, "contact": 1670, "smith": 1671, "thousands": 1672, "towns": 1673, "round": 1674, "agricultural": 1675, "leave": 1676, "brothers": 1677, "scottish": 1678, "naval": 1679, "defeat": 1680, "physics": 1681, "1970": 1682, "listed": 1683, "effort": 1684, "discovery": 1685, "know": 1686, "citys": 1687, "technical": 1688, "scene": 1689, "colonial": 1690, "solar": 1691, "eu": 1692, "classes": 1693, "dark": 1694, "introduction": 1695, "suffered": 1696, "secondary": 1697, "fifth": 1698, "births": 1699, "requires": 1700, "alliance": 1701, "similarly": 1702, "finished": 1703, "external": 1704, "practices": 1705, "novels": 1706, "particles": 1707, "organized": 1708, "shape": 1709, "room": 1710, "attended": 1711, "hebrew": 1712, "genetic": 1713, "ages": 1714, "edward": 1715, "residents": 1716, "le": 1717, "route": 1718, "t": 1719, "alone": 1720, "build": 1721, "paid": 1722, "statement": 1723, "artist": 1724, "affected": 1725, "secret": 1726, "1983": 1727, "tree": 1728, "online": 1729, "60": 1730, "owned": 1731, "mixed": 1732, "1988": 1733, "courts": 1734, "ranked": 1735, "1968": 1736, "emerged": 1737, "receive": 1738, "positions": 1739, "arabic": 1740, "logic": 1741, "oxygen": 1742, "mobile": 1743, "professor": 1744, "persons": 1745, "contained": 1746, "maintain": 1747, "components": 1748, "acquired": 1749, "maintained": 1750, "host": 1751, "defence": 1752, "moral": 1753, "traditions": 1754, "guitar": 1755, "compounds": 1756, "consider": 1757, "officials": 1758, "becomes": 1759, "minutes": 1760, "target": 1761, "combat": 1762, "village": 1763, "entirely": 1764, "maximum": 1765, "lands": 1766, "fight": 1767, "rivers": 1768, "rare": 1769, "damage": 1770, "agriculture": 1771, "popularity": 1772, "contributed": 1773, "spirit": 1774, "goods": 1775, "roughly": 1776, "symbol": 1777, "voice": 1778, "choice": 1779, "1987": 1780, "aspects": 1781, "typical": 1782, "meet": 1783, "sequence": 1784, "bring": 1785, "carry": 1786, "dedicated": 1787, "easily": 1788, "perform": 1789, "violence": 1790, "constructed": 1791, "publication": 1792, "1969": 1793, "1982": 1794, "ottoman": 1795, "houses": 1796, "jerusalem": 1797, "atlantic": 1798, "christmas": 1799, "evolution": 1800, "banks": 1801, "cast": 1802, "display": 1803, "operated": 1804, "wall": 1805, "18th": 1806, "broadcast": 1807, "cancer": 1808, "slightly": 1809, "1000": 1810, "investment": 1811, "condition": 1812, "senate": 1813, "am": 1814, "trial": 1815, "zone": 1816, "wave": 1817, "republican": 1818, "1972": 1819, "settled": 1820, "k": 1821, "1971": 1822, "descent": 1823, "concluded": 1824, "bible": 1825, "sales": 1826, "comedy": 1827, "permanent": 1828, "hot": 1829, "employed": 1830, "younger": 1831, "hospital": 1832, "atoms": 1833, "orders": 1834, "track": 1835, "frequency": 1836, "confirmed": 1837, "clubs": 1838, "contract": 1839, "ball": 1840, "persian": 1841, "magnetic": 1842, "output": 1843, "device": 1844, "technique": 1845, "causing": 1846, "stable": 1847, "apple": 1848, "forest": 1849, "1974": 1850, "factor": 1851, "bbc": 1852, "electron": 1853, "note": 1854, "signal": 1855, "netherlands": 1856, "asian": 1857, "runs": 1858, "drug": 1859, "measure": 1860, "surrounding": 1861, "sons": 1862, "actual": 1863, "w": 1864, "purposes": 1865, "occupied": 1866, "audience": 1867, "marine": 1868, "otherwise": 1869, "duke": 1870, "spring": 1871, "demand": 1872, "reading": 1873, "post": 1874, "1981": 1875, "sister": 1876, "obtained": 1877, "revealed": 1878, "translation": 1879, "unable": 1880, "improved": 1881, "ibn": 1882, "philosophers": 1883, "rail": 1884, "crime": 1885, "measures": 1886, "recording": 1887, "fleet": 1888, "molecules": 1889, "joint": 1890, "columbia": 1891, "sign": 1892, "affairs": 1893, "1967": 1894, "follow": 1895, "1976": 1896, "wood": 1897, "brain": 1898, "additionally": 1899, "producing": 1900, "decline": 1901, "1973": 1902, "approved": 1903, "jersey": 1904, "safety": 1905, "fundamental": 1906, "movements": 1907, "nazi": 1908, "split": 1909, "crown": 1910, "populations": 1911, "mental": 1912, "coming": 1913, "silver": 1914, "greatly": 1915, "sides": 1916, "lives": 1917, "expression": 1918, "temperatures": 1919, "vehicles": 1920, "radiation": 1921, "strength": 1922, "setting": 1923, "supporting": 1924, "movie": 1925, "debate": 1926, "al": 1927, "covered": 1928, "accounts": 1929, "seats": 1930, "managed": 1931, "painting": 1932, "protect": 1933, "transfer": 1934, "steel": 1935, "succeeded": 1936, "concepts": 1937, "rapid": 1938, "1978": 1939, "writings": 1940, "calendar": 1941, "womens": 1942, "ran": 1943, "composition": 1944, "images": 1945, "connection": 1946, "el": 1947, "ago": 1948, "visit": 1949, "finland": 1950, "hands": 1951, "forward": 1952, "search": 1953, "hill": 1954, "personnel": 1955, "ministry": 1956, "instrument": 1957, "titled": 1958, "quantum": 1959, "advantage": 1960, "dominant": 1961, "tribes": 1962, "establishment": 1963, "establish": 1964, "0": 1965, "teaching": 1966, "your": 1967, "extent": 1968, "broke": 1969, "networks": 1970, "useful": 1971, "peninsula": 1972, "attributed": 1973, "file": 1974, "argues": 1975, "islam": 1976, "why": 1977, "roles": 1978, "constitutional": 1979, "pieces": 1980, "producer": 1981, "experienced": 1982, "cars": 1983, "musicians": 1984, "script": 1985, "chosen": 1986, "electrons": 1987, "drive": 1988, "southeast": 1989, "quite": 1990, "master": 1991, "dates": 1992, "afghanistan": 1993, "principal": 1994, "severe": 1995, "determine": 1996, "sir": 1997, "rose": 1998, "focused": 1999, "UNKNOWN": 2000}
\ No newline at end of file
diff --git a/nlp_class2/logistic.py b/nlp_class2/logistic.py
index 352c2f57..eda2357e 100644
--- a/nlp_class2/logistic.py
+++ b/nlp_class2/logistic.py
@@ -47,7 +47,7 @@
 
 
   # train a logistic model
-  W = np.random.randn(V, V) / np.sqrt(V)
+  W = np.random.randn(V, V)/np.sqrt(V)
 
   losses = []
   epochs = 1
@@ -56,7 +56,7 @@
   def softmax(a):
     a = a - a.max()
     exp_a = np.exp(a)
-    return exp_a / exp_a.sum(axis=1, keepdims=True)
+    return exp_a/exp_a.sum(axis=1, keepdims=True)
 
   # what is the loss if we set W = log(bigram_probs)?
   W_bigram = np.log(bigram_probs)
@@ -85,19 +85,19 @@ def softmax(a):
       W = W - lr * inputs.T.dot(predictions - targets)
 
       # keep track of the loss
-      loss = -np.sum(targets * np.log(predictions)) / (n - 1)
+      loss = -np.sum(targets*np.log(predictions))/(n - 1)
       losses.append(loss)
 
       # keep track of the bigram loss
       # only do it for the first epoch to avoid redundancy
       if epoch == 0:
         bigram_predictions = softmax(inputs.dot(W_bigram))
-        bigram_loss = -np.sum(targets * np.log(bigram_predictions)) / (n - 1)
+        bigram_loss = -np.sum(targets*np.log(bigram_predictions))/(n - 1)
         bigram_losses.append(bigram_loss)
 
 
-      if j % 10 == 0:
-        print("epoch:", epoch, "sentence: %s/%s" % (j, len(sentences)), "loss:", loss)
+      if j%10 == 0:
+        print(f"epoch: {epoch}, sentence: {j}/{len(sentences)}, loss: {loss}")
       j += 1
 
   print("Elapsed time training:", datetime.now() - t0)
@@ -114,8 +114,8 @@ def smoothed_loss(x, decay=0.99):
     y = np.zeros(len(x))
     last = 0
     for t in range(len(x)):
-      z = decay * last + (1 - decay) * x[t]
-      y[t] = z / (1 - decay ** (t + 1))
+      z = decay*last + (1 - decay)*x[t]
+      y[t] = z/(1 - decay**(t + 1))
       last = z
     return y
 
diff --git a/nlp_class2/ner_tf.py b/nlp_class2/ner_tf.py
index 7f8fa2c1..6c2e7a53 100644
--- a/nlp_class2/ner_tf.py
+++ b/nlp_class2/ner_tf.py
@@ -13,16 +13,15 @@
 import os
 import sys
 sys.path.append(os.path.abspath('..'))
-from pos_baseline import get_data
+#from pos_baseline import get_data
 from sklearn.utils import shuffle
 from util import init_weight
 from datetime import datetime
-from sklearn.metrics import f1_score
-
-from tensorflow.contrib.rnn import static_rnn as get_rnn_output
-from tensorflow.contrib.rnn import BasicRNNCell, GRUCell
-
+#from sklearn.metrics import f1_score
+from tensorflow.keras.layers import GRUCell, RNN #type: ignore
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
 
 def get_data(split_sequences=False):
   word2idx = {}
@@ -33,7 +32,7 @@ def get_data(split_sequences=False):
   Ytrain = []
   currentX = []
   currentY = []
-  for line in open('ner.txt'):
+  for line in open('ner.txt', encoding='utf-8'):
     line = line.rstrip()
     if line:
       r = line.split()
@@ -95,16 +94,16 @@ def flatten(l):
 # pad sequences
 Xtrain = tf.keras.preprocessing.sequence.pad_sequences(Xtrain, maxlen=sequence_length)
 Ytrain = tf.keras.preprocessing.sequence.pad_sequences(Ytrain, maxlen=sequence_length)
-Xtest  = tf.keras.preprocessing.sequence.pad_sequences(Xtest,  maxlen=sequence_length)
-Ytest  = tf.keras.preprocessing.sequence.pad_sequences(Ytest,  maxlen=sequence_length)
+Xtest  = tf.keras.preprocessing.sequence.pad_sequences(Xtest, maxlen=sequence_length)
+Ytest  = tf.keras.preprocessing.sequence.pad_sequences(Ytest, maxlen=sequence_length)
 print("Xtrain.shape:", Xtrain.shape)
 print("Ytrain.shape:", Ytrain.shape)
 
 
 
 # inputs
-inputs = tf.placeholder(tf.int32, shape=(None, sequence_length))
-targets = tf.placeholder(tf.int32, shape=(None, sequence_length))
+inputs = tf.compat.v1.placeholder(tf.int32, shape=(None, sequence_length))
+targets = tf.compat.v1.placeholder(tf.int32, shape=(None, sequence_length))
 num_samples = tf.shape(inputs)[0] # useful for later
 
 # embedding
@@ -119,19 +118,18 @@ def flatten(l):
 tfWo = tf.Variable(Wo)
 tfbo = tf.Variable(bo)
 
-# make the rnn unit
-rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)
-
+rnn_unit = RNN(GRUCell(
+  units=hidden_layer_size, activation=tf.nn.relu), return_sequences=True, return_state=True)
 
 # get the output
 x = tf.nn.embedding_lookup(tfWe, inputs)
 
 # converts x from a tensor of shape N x T x D
 # into a list of length T, where each element is a tensor of shape N x D
-x = tf.unstack(x, sequence_length, 1)
+#x = tf.unstack(x, sequence_length, 1)
 
 # get the rnn output
-outputs, states = get_rnn_output(rnn_unit, x, dtype=tf.float32)
+outputs, states = rnn_unit(x)
 
 
 # outputs are now of size (T, N, M)
@@ -151,14 +149,14 @@ def flatten(l):
     labels=labels_flat
   )
 )
-train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost_op)
+train_op = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(cost_op)
 
 
 
 
 # init stuff
-sess = tf.InteractiveSession()
-init = tf.global_variables_initializer()
+sess = tf.compat.v1.InteractiveSession()
+init = tf.compat.v1.global_variables_initializer()
 sess.run(init)
 
 
diff --git a/nlp_class2/neural_network.py b/nlp_class2/neural_network.py
index d44c6f52..4f5148a3 100644
--- a/nlp_class2/neural_network.py
+++ b/nlp_class2/neural_network.py
@@ -48,8 +48,8 @@
 
   # train a shallow neural network model
   D = 100
-  W1 = np.random.randn(V, D) / np.sqrt(V)
-  W2 = np.random.randn(D, V) / np.sqrt(D)
+  W1 = np.random.randn(V, D)/np.sqrt(V)
+  W2 = np.random.randn(D, V)/np.sqrt(D)
 
   losses = []
   epochs = 1
@@ -58,7 +58,7 @@
   def softmax(a):
     a = a - a.max()
     exp_a = np.exp(a)
-    return exp_a / exp_a.sum(axis=1, keepdims=True)
+    return exp_a/exp_a.sum(axis=1, keepdims=True)
 
   # what is the loss if we set W = log(bigram_probs)?
   W_bigram = np.log(bigram_probs)
@@ -84,24 +84,24 @@ def softmax(a):
       predictions = softmax(hidden.dot(W2))
 
       # do a gradient descent step
-      W2 = W2 - lr * hidden.T.dot(predictions - targets)
-      dhidden = (predictions - targets).dot(W2.T) * (1 - hidden * hidden)
-      W1 = W1 - lr * inputs.T.dot(dhidden)
+      W2 = W2 - lr*hidden.T.dot(predictions - targets)
+      dhidden = (predictions - targets).dot(W2.T)*(1 - hidden*hidden)
+      W1 = W1 - lr*inputs.T.dot(dhidden)
 
       # keep track of the loss
-      loss = -np.sum(targets * np.log(predictions)) / (n - 1)
+      loss = -np.sum(targets*np.log(predictions))/(n - 1)
       losses.append(loss)
 
       # keep track of the bigram loss
       # only do it for the first epoch to avoid redundancy
       if epoch == 0:
         bigram_predictions = softmax(inputs.dot(W_bigram))
-        bigram_loss = -np.sum(targets * np.log(bigram_predictions)) / (n - 1)
+        bigram_loss = -np.sum(targets*np.log(bigram_predictions))/(n - 1)
         bigram_losses.append(bigram_loss)
 
 
       if j % 10 == 0:
-        print("epoch:", epoch, "sentence: %s/%s" % (j, len(sentences)), "loss:", loss)
+        print(f"epoch: {epoch}, sentence: {j}/{len(sentences)}, loss: {loss}")
       j += 1
 
   print("Elapsed time training:", datetime.now() - t0)
@@ -118,8 +118,8 @@ def smoothed_loss(x, decay=0.99):
     y = np.zeros(len(x))
     last = 0
     for t in range(len(x)):
-      z = decay * last + (1 - decay) * x[t]
-      y[t] = z / (1 - decay ** (t + 1))
+      z = decay*last + (1 - decay)*x[t]
+      y[t] = z/(1 - decay**(t + 1))
       last = z
     return y
 
diff --git a/nlp_class2/neural_network2.py b/nlp_class2/neural_network2.py
index 159dc571..c9df7f11 100644
--- a/nlp_class2/neural_network2.py
+++ b/nlp_class2/neural_network2.py
@@ -48,8 +48,8 @@
 
   # train a shallow neural network model
   D = 100
-  W1 = np.random.randn(V, D) / np.sqrt(V)
-  W2 = np.random.randn(D, V) / np.sqrt(D)
+  W1 = np.random.randn(V, D)/np.sqrt(V)
+  W2 = np.random.randn(D, V)/np.sqrt(D)
 
   losses = []
   epochs = 1
@@ -58,7 +58,7 @@
   def softmax(a):
     a = a - a.max()
     exp_a = np.exp(a)
-    return exp_a / exp_a.sum(axis=1, keepdims=True)
+    return exp_a/exp_a.sum(axis=1, keepdims=True)
 
   # what is the loss if we set W = log(bigram_probs)?
   W_bigram = np.log(bigram_probs)
@@ -82,7 +82,7 @@ def softmax(a):
       predictions = softmax(hidden.dot(W2))
 
       # keep track of the loss
-      loss = -np.sum(np.log(predictions[np.arange(n - 1), targets])) / (n - 1)
+      loss = -np.sum(np.log(predictions[np.arange(n - 1), targets]))/(n - 1)
       losses.append(loss)
 
       # do a gradient descent step
@@ -90,14 +90,14 @@ def softmax(a):
       # we don't want to make a copy because it would be slow
       doutput = predictions # N x V
       doutput[np.arange(n - 1), targets] -= 1
-      W2 = W2 - lr * hidden.T.dot(doutput) # (D x N) (N x V)
-      dhidden = doutput.dot(W2.T) * (1 - hidden * hidden) # (N x V) (V x D) * (N x D)
+      W2 = W2 - lr*hidden.T.dot(doutput) # (D x N) (N x V)
+      dhidden = doutput.dot(W2.T)*(1 - hidden*hidden) # (N x V) (V x D) * (N x D)
       # # for reference:
       # # original: W1 = W1 - lr * inputs.T.dot(dhidden) # VxN NxD --> VxD
 
       # fastest way
       W1_copy = W1.copy()
-      np.subtract.at(W1, inputs, lr * dhidden)
+      np.subtract.at(W1, inputs, lr*dhidden)
 
       # vs this
       # W1_test = W1_copy.copy()
@@ -118,12 +118,12 @@ def softmax(a):
       # only do it for the first epoch to avoid redundancy
       if epoch == 0:
         bigram_predictions = softmax(W_bigram[inputs])
-        bigram_loss = -np.sum(np.log(bigram_predictions[np.arange(n - 1), targets])) / (n - 1)
+        bigram_loss = -np.sum(np.log(bigram_predictions[np.arange(n - 1), targets]))/(n - 1)
         bigram_losses.append(bigram_loss)
 
 
       if j % 100 == 0:
-        print("epoch:", epoch, "sentence: %s/%s" % (j, len(sentences)), "loss:", loss)
+        print(f"epoch: {epoch}, sentence: {j}/{len(sentences)}, loss: {loss}")
       j += 1
 
 
@@ -141,8 +141,8 @@ def smoothed_loss(x, decay=0.99):
     y = np.zeros(len(x))
     last = 0
     for t in range(len(x)):
-      z = decay * last + (1 - decay) * x[t]
-      y[t] = z / (1 - decay ** (t + 1))
+      z = decay*last + (1 - decay)*x[t]
+      y[t] = z / (1 - decay**(t + 1))
       last = z
     return y
 
diff --git a/nlp_class2/pmi.py b/nlp_class2/pmi.py
index b321e91f..941517cb 100644
--- a/nlp_class2/pmi.py
+++ b/nlp_class2/pmi.py
@@ -49,7 +49,7 @@ def remove_punctuation_3(s):
 num_lines = 0
 num_tokens = 0
 for f in files:
-  for line in open(f):
+  for line in open(f, encoding='utf-8'):
     # don't count headers, structured data, lists, etc...
     if line and line[0] not in ('[', '*', '-', '|', '=', '{', '}'):
       num_lines += 1
@@ -112,7 +112,7 @@ def remove_punctuation_3(s):
   k = 0
   # for line in open('../large_files/text8'):
   for f in files:
-    for line in open(f):
+    for line in open(f, encoding='utf-8'):
       # don't count headers, structured data, lists, etc...
       if line and line[0] not in ('[', '*', '-', '|', '=', '{', '}'):
         line_as_idx = []
@@ -153,7 +153,7 @@ def remove_punctuation_3(s):
 
 # PMI(w, c) = #(w, c) / #(w) / p(c)
 # pmi = wc_counts / wc_counts.sum(axis=1) / c_probs # works only if numpy arrays
-pmi = wc_counts.multiply(1.0 / wc_counts.sum(axis=1) / c_probs).tocsr()
+pmi = wc_counts.multiply(1.0/wc_counts.sum(axis=1)/c_probs).tocsr()
 # this operation changes it to a coo_matrix
 # which doesn't have functions we need, e.g log1p()
 # so convert it back to a csr
@@ -172,9 +172,9 @@ def remove_punctuation_3(s):
 
 
 # initialize weights
-W = np.random.randn(V, D) / np.sqrt(V + D)
+W = np.random.randn(V, D)/np.sqrt(V + D)
 b = np.zeros(V)
-U = np.random.randn(V, D) / np.sqrt(V + D)
+U = np.random.randn(V, D)/np.sqrt(V + D)
 c = np.zeros(V)
 mu = logX.mean()
 
@@ -220,7 +220,7 @@ def remove_punctuation_3(s):
   W = np.linalg.solve(matrix, vector).T
 
   # vectorized update b
-  b = (logX - W.dot(U.T) - c.reshape(1, V) - mu).sum(axis=1) / V
+  b = (logX - W.dot(U.T) - c.reshape(1, V) - mu).sum(axis=1)/V
 
   # vectorized update U
   matrix = reg*np.eye(D) + W.T.dot(W)
@@ -228,7 +228,7 @@ def remove_punctuation_3(s):
   U = np.linalg.solve(matrix, vector).T
 
   # vectorized update c
-  c = (logX - W.dot(U.T) - b.reshape(V, 1)  - mu).sum(axis=0) / V
+  c = (logX - W.dot(U.T) - b.reshape(V, 1)  - mu).sum(axis=0)/V
 
 
 print("train duration:", datetime.now() - t0)
@@ -259,6 +259,9 @@ def remove_punctuation_3(s):
 # set word embedding matrix
 # W = (W + U) / 2
 
+vec = np.asarray(vec)
+W = np.asarray(W)
+
 distances = pairwise_distances(vec.reshape(1, D), W, metric='cosine').reshape(V)
 idx = distances.argsort()[:10]
 
@@ -266,7 +269,9 @@ def remove_punctuation_3(s):
 for i in idx:
   print(top_words[i], distances[i])
 
-print("dist to queen:", cos_dist(W[word2idx['queen']], vec))
+queen_vector = np.squeeze(W[word2idx['queen']])
+vec = np.squeeze(vec)
+print("dist to queen:", cos_dist(queen_vector, vec))
 
 
 
diff --git a/nlp_class2/pmi_counts_2000.npz b/nlp_class2/pmi_counts_2000.npz
new file mode 100644
index 00000000..d626d8d6
Binary files /dev/null and b/nlp_class2/pmi_counts_2000.npz differ
diff --git a/nlp_class2/pos_hmm.py b/nlp_class2/pos_hmm.py
index e3065cd2..0e3345ab 100644
--- a/nlp_class2/pos_hmm.py
+++ b/nlp_class2/pos_hmm.py
@@ -15,7 +15,7 @@
 sys.path.append(os.path.abspath('..'))
 from hmm_class.hmmd_scaled import HMM
 
-from pos_baseline import get_data
+#from pos_baseline import get_data
 from sklearn.utils import shuffle
 from datetime import datetime
 from sklearn.metrics import f1_score
@@ -28,7 +28,7 @@ def accuracy(T, Y):
     for t, y in zip(T, Y):
         n_correct += np.sum(t == y)
         n_total += len(y)
-    return float(n_correct) / n_total
+    return float(n_correct)/n_total
 
 
 def total_f1_score(T, Y):
@@ -41,6 +41,78 @@ def total_f1_score(T, Y):
 # def flatten(l):
 #     return [item for sublist in l for item in sublist]
 
+def get_data(split_sequences=False):
+    if not os.path.exists('chunking'):
+        print("Please create a folder in your local directory called 'chunking'")
+        print("train.txt and test.txt should be stored in there.")
+        print("Please check the comments to get the download link.")
+        exit()
+    elif not os.path.exists('chunking/train.txt'):
+        print("train.txt is not in chunking/train.txt")
+        print("Please check the comments to get the download link.")
+        exit()
+    elif not os.path.exists('chunking/test.txt'):
+        print("test.txt is not in chunking/test.txt")
+        print("Please check the comments to get the download link.")
+        exit()
+
+    word2idx = {}
+    tag2idx = {}
+    word_idx = 0
+    tag_idx = 0
+    Xtrain = []
+    Ytrain = []
+    currentX = []
+    currentY = []
+    for line in open('chunking/train.txt', encoding='utf-8'):
+        line = line.rstrip()
+        if line:
+            r = line.split()
+            word, tag, _ = r
+            if word not in word2idx:
+                word2idx[word] = word_idx
+                word_idx += 1
+            currentX.append(word2idx[word])
+            
+            if tag not in tag2idx:
+                tag2idx[tag] = tag_idx
+                tag_idx += 1
+            currentY.append(tag2idx[tag])
+        elif split_sequences:
+            Xtrain.append(currentX)
+            Ytrain.append(currentY)
+            currentX = []
+            currentY = []
+
+    if not split_sequences:
+        Xtrain = currentX
+        Ytrain = currentY
+
+    # load and score test data
+    Xtest = []
+    Ytest = []
+    currentX = []
+    currentY = []
+    for line in open('chunking/test.txt', encoding='utf-8'):
+        line = line.rstrip()
+        if line:
+            r = line.split()
+            word, tag, _ = r
+            if word in word2idx:
+                currentX.append(word2idx[word])
+            else:
+                currentX.append(word_idx) # use this as unknown
+            currentY.append(tag2idx[tag])
+        elif split_sequences:
+            Xtest.append(currentX)
+            Ytest.append(currentY)
+            currentX = []
+            currentY = []
+    if not split_sequences:
+        Xtest = currentX
+        Ytest = currentY
+
+    return Xtrain, Ytrain, Xtest, Ytest, word2idx
 
 def main(smoothing=1e-1):
     # X = words, Y = POS tags
diff --git a/nlp_class2/pos_ner_keras.py b/nlp_class2/pos_ner_keras.py
index 7a1335e1..9c64609e 100644
--- a/nlp_class2/pos_ner_keras.py
+++ b/nlp_class2/pos_ner_keras.py
@@ -9,20 +9,21 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
+import tensorflow as tf
 import os
 import sys
 sys.path.append(os.path.abspath('..'))
-from pos_baseline import get_data
+#from pos_baseline import get_data
 from sklearn.utils import shuffle
-from util import init_weight
+#from util import init_weight
 from datetime import datetime
-from sklearn.metrics import f1_score
+#from sklearn.metrics import f1_score
 
-from keras.models import Model
-from keras.layers import Input, Dense, Embedding, LSTM, GRU
-from keras.preprocessing.sequence import pad_sequences
-from keras.preprocessing.text import Tokenizer
-from keras.optimizers import Adam
+from tensorflow.keras.models import Model #type: ignore
+from tensorflow.keras.layers import Input, Dense, Embedding, GRU, LSTM, SimpleRNN #type: ignore
+from tensorflow.keras.preprocessing.sequence import pad_sequences #type: ignore
+from tensorflow.keras.preprocessing.text import Tokenizer #type: ignore
+from tensorflow.keras.optimizers import Adam #type: ignore
 
 
 MAX_VOCAB_SIZE = 20000
@@ -30,73 +31,12 @@
 
 
 
-def get_data_pos(split_sequences=False):
-  if not os.path.exists('chunking'):
-    print("Please create a folder in your local directory called 'chunking'")
-    print("train.txt and test.txt should be stored in there.")
-    print("Please check the comments to get the download link.")
-    exit()
-  elif not os.path.exists('chunking/train.txt'):
-    print("train.txt is not in chunking/train.txt")
-    print("Please check the comments to get the download link.")
-    exit()
-  elif not os.path.exists('chunking/test.txt'):
-    print("test.txt is not in chunking/test.txt")
-    print("Please check the comments to get the download link.")
-    exit()
-
-  Xtrain = []
-  Ytrain = []
-  currentX = []
-  currentY = []
-  for line in open('chunking/train.txt'):
-    line = line.rstrip()
-    if line:
-      r = line.split()
-      word, tag, _ = r
-      currentX.append(word)
-      
-      currentY.append(tag)
-    elif split_sequences:
-      Xtrain.append(currentX)
-      Ytrain.append(currentY)
-      currentX = []
-      currentY = []
-
-  if not split_sequences:
-    Xtrain = currentX
-    Ytrain = currentY
-
-  # load and score test data
-  Xtest = []
-  Ytest = []
-  currentX = []
-  currentY = []
-  for line in open('chunking/test.txt'):
-    line = line.rstrip()
-    if line:
-      r = line.split()
-      word, tag, _ = r
-      currentX.append(word)
-      currentY.append(tag)
-    elif split_sequences:
-      Xtest.append(currentX)
-      Ytest.append(currentY)
-      currentX = []
-      currentY = []
-  if not split_sequences:
-    Xtest = currentX
-    Ytest = currentY
-
-  return Xtrain, Ytrain, Xtest, Ytest
-
-
 def get_data_ner(split_sequences=False):
   Xtrain = []
   Ytrain = []
   currentX = []
   currentY = []
-  for line in open('ner.txt'):
+  for line in open('ner.txt', encoding='utf-8'):
     line = line.rstrip()
     if line:
       r = line.split()
@@ -138,7 +78,7 @@ def get_data_ner(split_sequences=False):
 
 # get word -> integer mapping
 word2idx = tokenizer.word_index
-print('Found %s unique tokens.' % len(word2idx))
+print(f'Found {len(word2idx)} unique tokens.')
 vocab_size = min(MAX_VOCAB_SIZE, len(word2idx) + 1)
 
 
@@ -150,7 +90,7 @@ def get_data_ner(split_sequences=False):
 
 # get tag -> integer mapping
 tag2idx = tokenizer2.word_index
-print('Found %s unique tags.' % len(tag2idx))
+print(f'Found {len(tag2idx)} unique tags.')
 num_tags = min(MAX_TAGS, len(tag2idx) + 1)
 
 
@@ -189,34 +129,32 @@ def get_data_ner(split_sequences=False):
 # build the model
 input_ = Input(shape=(sequence_length,))
 x = Embedding(vocab_size, embedding_dim)(input_)
-x = GRU(hidden_layer_size, return_sequences=True)(x)
+x = SimpleRNN(hidden_layer_size, return_sequences=True)(x)
 output = Dense(num_tags, activation='softmax')(x)
 
 
 model = Model(input_, output)
 model.compile(
   loss='categorical_crossentropy',
-  optimizer=Adam(lr=1e-2),
+  optimizer=Adam(learning_rate=1e-2),
   metrics=['accuracy']
 )
 
 
 print('Training model...')
-r = model.fit(
-  Xtrain,
-  Ytrain_onehot,
-  batch_size=batch_size,
-  epochs=epochs,
-  validation_data=(Xtest, Ytest_onehot)
-)
+r = model.fit(Xtrain,
+              Ytrain_onehot,
+              batch_size=batch_size,
+              epochs=epochs,
+              validation_data=(Xtest, Ytest_onehot))
 
-# plot some data
+# plot loss
 plt.plot(r.history['loss'], label='loss')
 plt.plot(r.history['val_loss'], label='val_loss')
 plt.legend()
 plt.show()
 
-# accuracies
+# plot accuracy
 plt.plot(r.history['accuracy'], label='acc')
 plt.plot(r.history['val_accuracy'], label='val_acc')
 plt.legend()
diff --git a/nlp_class2/pos_tf.py b/nlp_class2/pos_tf.py
index 974453b6..c4d1724c 100644
--- a/nlp_class2/pos_tf.py
+++ b/nlp_class2/pos_tf.py
@@ -13,15 +13,15 @@
 import os
 import sys
 sys.path.append(os.path.abspath('..'))
-from pos_baseline import get_data
+#from pos_baseline import get_data
 from sklearn.utils import shuffle
 from util import init_weight
 from datetime import datetime
-from sklearn.metrics import f1_score
-
-from tensorflow.contrib.rnn import static_rnn as get_rnn_output
-from tensorflow.contrib.rnn import BasicRNNCell, GRUCell
+#from sklearn.metrics import f1_score
+from tensorflow.keras.layers import GRUCell, RNN #type: ignore
 
+if tf.__version__.startswith('2'):
+    tf.compat.v1.disable_eager_execution()
 
 
 def get_data(split_sequences=False):
@@ -47,7 +47,7 @@ def get_data(split_sequences=False):
   Ytrain = []
   currentX = []
   currentY = []
-  for line in open('chunking/train.txt'):
+  for line in open('chunking/train.txt', encoding='utf-8'):
     line = line.rstrip()
     if line:
       r = line.split()
@@ -76,7 +76,7 @@ def get_data(split_sequences=False):
   Ytest = []
   currentX = []
   currentY = []
-  for line in open('chunking/test.txt'):
+  for line in open('chunking/test.txt', encoding='utf-8'):
     line = line.rstrip()
     if line:
       r = line.split()
@@ -110,7 +110,7 @@ def flatten(l):
 
 
 # training config
-epochs = 20
+epochs = 200
 learning_rate = 1e-2
 mu = 0.99
 batch_size = 32
@@ -131,8 +131,8 @@ def flatten(l):
 
 
 # inputs
-inputs = tf.placeholder(tf.int32, shape=(None, sequence_length))
-targets = tf.placeholder(tf.int32, shape=(None, sequence_length))
+inputs = tf.compat.v1.placeholder(tf.int32, shape=(None, sequence_length))
+targets = tf.compat.v1.placeholder(tf.int32, shape=(None, sequence_length))
 num_samples = tf.shape(inputs)[0] # useful for later
 
 # embedding
@@ -148,7 +148,8 @@ def flatten(l):
 tfbo = tf.Variable(bo)
 
 # make the rnn unit
-rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)
+rnn_unit = RNN(GRUCell(
+  units=hidden_layer_size, activation=tf.nn.relu), return_sequences=True, return_state=True)
 
 
 # get the output
@@ -156,10 +157,10 @@ def flatten(l):
 
 # converts x from a tensor of shape N x T x M
 # into a list of length T, where each element is a tensor of shape N x M
-x = tf.unstack(x, sequence_length, 1)
+#x = tf.unstack(x, sequence_length, 1)
 
 # get the rnn output
-outputs, states = get_rnn_output(rnn_unit, x, dtype=tf.float32)
+outputs, states = rnn_unit(x)
 
 
 # outputs are now of size (T, N, M)
@@ -179,14 +180,14 @@ def flatten(l):
     labels=labels_flat
   )
 )
-train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost_op)
+train_op = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(cost_op)
 
 
 
 
 # init stuff
-sess = tf.InteractiveSession()
-init = tf.global_variables_initializer()
+sess = tf.compat.v1.InteractiveSession()
+init = tf.compat.v1.global_variables_initializer()
 sess.run(init)
 
 
@@ -222,8 +223,7 @@ def flatten(l):
     # print stuff out periodically
     if j % 10 == 0:
       sys.stdout.write(
-        "j/N: %d/%d correct rate so far: %f, cost so far: %f\r" %
-        (j, n_batches, float(n_correct)/n_total, cost)
+        f"j/N: {j}/{n_batches} correct rate so far: {float(n_correct)/n_total}, cost so far: {cost}\r"
       )
       sys.stdout.flush()
 
@@ -236,13 +236,13 @@ def flatten(l):
     pii = pi[yi > 0]
     n_test_correct += np.sum(yii == pii)
     n_test_total += len(yii)
-  test_acc = float(n_test_correct) / n_test_total
+  test_acc = float(n_test_correct)/n_test_total
 
   print(
-      "i:", i, "cost:", "%.4f" % cost,
-      "train acc:", "%.4f" % (float(n_correct)/n_total),
-      "test acc:", "%.4f" % test_acc,
-      "time for epoch:", (datetime.now() - t0)
+      f'''i: {i}, cost: {cost:.4f},
+      train acc: {float(n_correct)/n_total:.4f},
+      test acc: {test_acc:.4f},
+      time for epoch: {(datetime.now() - t0)}'''
   )
   costs.append(cost)
 
diff --git a/nlp_class2/rntn_tensorflow.py b/nlp_class2/rntn_tensorflow.py
index 77b563f2..f3022da1 100644
--- a/nlp_class2/rntn_tensorflow.py
+++ b/nlp_class2/rntn_tensorflow.py
@@ -47,9 +47,9 @@ def __init__(self, V, D, K, activation):
         We = init_weight(V, D)
 
         # quadratic terms
-        W11 = np.random.randn(D, D, D) / np.sqrt(3*D)
-        W22 = np.random.randn(D, D, D) / np.sqrt(3*D)
-        W12 = np.random.randn(D, D, D) / np.sqrt(3*D)
+        W11 = np.random.randn(D, D, D)/np.sqrt(3*D)
+        W22 = np.random.randn(D, D, D)/np.sqrt(3*D)
+        W12 = np.random.randn(D, D, D)/np.sqrt(3*D)
 
         # linear terms
         W1 = init_weight(D, D)
diff --git a/nlp_class2/rntn_tensorflow_rnn.py b/nlp_class2/rntn_tensorflow_rnn.py
index 816ff4a2..a47d40aa 100644
--- a/nlp_class2/rntn_tensorflow_rnn.py
+++ b/nlp_class2/rntn_tensorflow_rnn.py
@@ -13,7 +13,7 @@
 import tensorflow as tf
 
 from sklearn.utils import shuffle
-from util import init_weight, get_ptb_data, display_tree
+from util import init_weight, get_ptb_data
 from datetime import datetime
 from sklearn.metrics import f1_score
 
@@ -191,7 +191,7 @@ def condition(hiddens, n):
                 it += 1
                 if it % 10 == 0:
                     sys.stdout.write(
-                        "j/N: %d/%d correct rate so far: %f, cost so far: %f\r" %
+                        "j/N: %d/%d correct rate so far: %.4f, cost so far: %.4f\r" %
                         (it, N, float(n_correct)/n_total, cost)
                     )
                     sys.stdout.flush()
@@ -212,10 +212,10 @@ def condition(hiddens, n):
 
 
             print(
-                "i:", i, "cost:", cost,
-                "train acc:", float(n_correct)/n_total,
-                "test acc:", float(n_test_correct)/n_test_total,
-                "time for epoch:", (datetime.now() - t0)
+                "i: ",i, "cost: %.4f", cost,
+                "train acc: %.4f", float(n_correct)/n_total,
+                "test acc: %.4f", float(n_test_correct)/n_test_total,
+                "time for epoch: ",(datetime.now() - t0)
             )
             costs.append(cost)
 
diff --git a/nlp_class2/tfidf_tsne.py b/nlp_class2/tfidf_tsne.py
index 55bd4ce5..329ef46a 100644
--- a/nlp_class2/tfidf_tsne.py
+++ b/nlp_class2/tfidf_tsne.py
@@ -20,7 +20,7 @@
 import sys
 sys.path.append(os.path.abspath('..'))
 from rnn_class.util import get_wikipedia_data
-from rnn_class.brown import get_sentences_with_word2idx_limit_vocab, get_sentences_with_word2idx
+#from rnn_class.brown import get_sentences_with_word2idx_limit_vocab, get_sentences_with_word2idx
 
 from util import find_analogies
 from sklearn.feature_extraction.text import TfidfTransformer
diff --git a/nlp_class2/util.py b/nlp_class2/util.py
index f2a79888..620192ba 100644
--- a/nlp_class2/util.py
+++ b/nlp_class2/util.py
@@ -182,7 +182,7 @@ def get_ptb_data():
     test = []
 
     # train set first
-    for line in open('../large_files/trees/train.txt'):
+    for line in open('../large_files/trees/train.txt', encoding='utf-8'):
         line = line.rstrip()
         if line:
             t = str2tree(line, word2idx)
@@ -194,7 +194,7 @@ def get_ptb_data():
             # break
 
     # test set
-    for line in open('../large_files/trees/test.txt'):
+    for line in open('../large_files/trees/test.txt', encoding='utf-8'):
         line = line.rstrip()
         if line:
             t = str2tree(line, word2idx)
diff --git a/nlp_class2/visualize_countries.py b/nlp_class2/visualize_countries.py
index 9d0a44e8..456ff0e5 100644
--- a/nlp_class2/visualize_countries.py
+++ b/nlp_class2/visualize_countries.py
@@ -31,7 +31,7 @@ def main(we_file='glove_model_50.npz', w2i_file='glove_word2idx_50.json'):
     Z = Z[idx]
     plt.scatter(Z[:,0], Z[:,1])
     for i in range(len(words)):
-        plt.annotate(s=words[i], xy=(Z[i,0], Z[i,1]))
+        plt.annotate(text=words[i], xy=(Z[i,0], Z[i,1]))
     plt.show()
 
 
diff --git a/nlp_class2/word2vec.py b/nlp_class2/word2vec.py
index ba92e68c..e8989fb6 100644
--- a/nlp_class2/word2vec.py
+++ b/nlp_class2/word2vec.py
@@ -49,7 +49,7 @@ def get_wiki():
   files = glob('../large_files/enwiki*.txt')
   all_word_counts = {}
   for f in files:
-    for line in open(f):
+    for line in open(f, encoding='utf-8'):
       if line and line[0] not in '[*-|=\{\}':
         s = remove_punctuation(line).lower().split()
         if len(s) > 1:
@@ -68,7 +68,7 @@ def get_wiki():
 
   sents = []
   for f in files:
-    for line in open(f):
+    for line in open(f, encoding='utf-8'):
       if line and line[0] not in '[*-|=\{\}':
         s = remove_punctuation(line).lower().split()
         if len(s) > 1:
@@ -100,7 +100,7 @@ def train_model(savedir):
 
 
   # learning rate decay
-  learning_rate_delta = (learning_rate - final_learning_rate) / epochs
+  learning_rate_delta = (learning_rate - final_learning_rate)/epochs
 
 
   # params
@@ -122,7 +122,7 @@ def train_model(savedir):
 
   # for subsampling each sentence
   threshold = 1e-5
-  p_drop = 1 - np.sqrt(threshold / p_neg)
+  p_drop = 1 - np.sqrt(threshold/p_neg)
 
 
   # train the model
@@ -137,9 +137,7 @@ def train_model(savedir):
     t0 = datetime.now()
     for sentence in sentences:
       # keep only certain words based on p_neg
-      sentence = [w for w in sentence \
-        if np.random.random() < (1 - p_drop[w])
-      ]
+      sentence = [w for w in sentence if np.random.random()<(1 - p_drop[w])]
       if len(sentence) < 2:
         continue
 
@@ -170,14 +168,14 @@ def train_model(savedir):
 
       counter += 1
       if counter % 100 == 0:
-        sys.stdout.write("processed %s / %s\r" % (counter, len(sentences)))
+        sys.stdout.write(f"processed {counter}/{len(sentence)}\r")
         sys.stdout.flush()
         # break
 
 
     # print stuff so we don't stare at a blank screen
     dt = datetime.now() - t0
-    print("epoch complete:", epoch, "cost:", cost, "dt:", dt)
+    print(f"epoch complete: {epoch}, cost: {cost}, dt: {dt}")
 
     # save the cost
     costs.append(cost)
@@ -195,10 +193,10 @@ def train_model(savedir):
   if not os.path.exists(savedir):
     os.mkdir(savedir)
 
-  with open('%s/word2idx.json' % savedir, 'w') as f:
+  with open(f'{savedir}/word2idx.json', 'w') as f:
     json.dump(word2idx, f)
 
-  np.savez('%s/weights.npz' % savedir, W, V)
+  np.savez(f'{savedir}/weights.npz', W, V)
 
   # return the model
   return word2idx, W, V
@@ -220,7 +218,7 @@ def get_negative_sampling_distribution(sentences, vocab_size):
   p_neg = word_freq**0.75
 
   # normalize it
-  p_neg = p_neg / p_neg.sum()
+  p_neg = p_neg/p_neg.sum()
 
   assert(np.all(p_neg > 0))
   return p_neg
@@ -259,12 +257,12 @@ def sgd(input_, targets, label, learning_rate, W, V):
   W[input_] -= learning_rate*gW # D
 
   # return cost (binary cross entropy)
-  cost = label * np.log(prob + 1e-10) + (1 - label) * np.log(1 - prob + 1e-10)
+  cost = label*np.log(prob + 1e-10) + (1 - label)*np.log(1 - prob + 1e-10)
   return cost.sum()
 
 
 def load_model(savedir):
-  with open('%s/word2idx.json' % savedir) as f:
+  with open(f'{savedir}/word2idx.json') as f:
     word2idx = json.load(f)
   npz = np.load('%s/weights.npz' % savedir)
   W = npz['arr_0']
@@ -277,7 +275,7 @@ def analogy(pos1, neg1, pos2, neg2, word2idx, idx2word, W):
   V, D = W.shape
 
   # don't actually use pos2 in calculation, just print what's expected
-  print("testing: %s - %s = %s - %s" % (pos1, neg1, pos2, neg2))
+  print(f"testing: {pos1} - {neg1} = {pos2} - {neg2}")
   for w in (pos1, neg1, pos2, neg2):
     if w not in word2idx:
       print("Sorry, %s not in word2idx" % w)
@@ -303,12 +301,12 @@ def analogy(pos1, neg1, pos2, neg2, word2idx, idx2word, W):
       break
   # print("best_idx:", best_idx)
 
-  print("got: %s - %s = %s - %s" % (pos1, neg1, idx2word[best_idx], neg2))
+  print(f"got: {pos1} - {neg1} = {idx2word[best_idx]} - {neg2}")
   print("closest 10:")
   for i in idx:
     print(idx2word[i], distances[i])
 
-  print("dist to %s:" % pos2, cos_dist(p2, vec))
+  print(f"dist to {pos2}: {cos_dist(p2, vec)}")
 
 
 def test_model(word2idx, W, V):
diff --git a/nlp_class2/word2vec_tf.py b/nlp_class2/word2vec_tf.py
index d272b003..2d53ab22 100644
--- a/nlp_class2/word2vec_tf.py
+++ b/nlp_class2/word2vec_tf.py
@@ -47,23 +47,23 @@ def download_text8(dst):
   pass
 
 
-def get_text8():
-  # download the data if it is not yet in the right place
-  path = '../large_files/text8'
-  if not os.path.exists(path):
-    download_text8(path)
-
-  words = open(path).read()
-  word2idx = {}
-  sents = [[]]
-  count = 0
-  for word in words.split():
-    if word not in word2idx:
-      word2idx[word] = count
-      count += 1
-    sents[0].append(word2idx[word])
-  print("count:", count)
-  return sents, word2idx
+# def get_text8():
+#   # download the data if it is not yet in the right place
+#   path = '../large_files/text8'
+#   if not os.path.exists(path):
+#     download_text8(path)
+
+#   words = open(path).read()
+#   word2idx = {}
+#   sents = [[]]
+#   count = 0
+#   for word in words.split():
+#     if word not in word2idx:
+#       word2idx[word] = count
+#       count += 1
+#     sents[0].append(word2idx[word])
+#   print("count:", count)
+#   return sents, word2idx
 
 
 def get_wiki():
@@ -71,7 +71,7 @@ def get_wiki():
   files = glob('../large_files/enwiki*.txt')
   all_word_counts = {}
   for f in files:
-    for line in open(f):
+    for line in open(f, encoding='utf-8'):
       if line and line[0] not in '[*-|=\{\}':
         s = remove_punctuation(line).lower().split()
         if len(s) > 1:
@@ -90,7 +90,7 @@ def get_wiki():
 
   sents = []
   for f in files:
-    for line in open(f):
+    for line in open(f, encoding='utf-8'):
       if line and line[0] not in '[*-|=\{\}':
         s = remove_punctuation(line).lower().split()
         if len(s) > 1:
@@ -122,7 +122,7 @@ def train_model(savedir):
   D = 50 # word embedding size
 
   # learning rate decay
-  learning_rate_delta = (learning_rate - final_learning_rate) / epochs
+  learning_rate_delta = (learning_rate - final_learning_rate)/epochs
 
   # distribution for drawing negative samples
   p_neg = get_negative_sampling_distribution(sentences)
@@ -202,7 +202,7 @@ def dot(A, B):
 
   # for subsampling each sentence
   threshold = 1e-5
-  p_drop = 1 - np.sqrt(threshold / p_neg)
+  p_drop = 1 - np.sqrt(threshold/p_neg)
 
 
   # train the model
@@ -221,9 +221,7 @@ def dot(A, B):
     for sentence in sentences:
 
       # keep only certain words based on p_neg
-      sentence = [w for w in sentence \
-        if np.random.random() < (1 - p_drop[w])
-      ]
+      sentence = [w for w in sentence if np.random.random() < (1 - p_drop[w])]
       if len(sentence) < 2:
         continue
 
@@ -282,14 +280,14 @@ def dot(A, B):
 
       counter += 1
       if counter % 100 == 0:
-        sys.stdout.write("processed %s / %s\r" % (counter, len(sentences)))
+        sys.stdout.write(f"processed {counter}/{len(sentences)}\r")
         sys.stdout.flush()
         # break
 
 
     # print stuff so we don't stare at a blank screen
     dt = datetime.now() - t0
-    print("epoch complete:", epoch, "cost:", cost, "dt:", dt)
+    print(f"epoch complete: {epoch}, cost: {cost}, dt: {dt}")
 
     # save the cost
     costs.append(cost)
@@ -310,10 +308,10 @@ def dot(A, B):
   if not os.path.exists(savedir):
     os.mkdir(savedir)
 
-  with open('%s/word2idx.json' % savedir, 'w') as f:
+  with open(f'{savedir}/word2idx.json', 'w') as f:
     json.dump(word2idx, f)
 
-  np.savez('%s/weights.npz' % savedir, W, V)
+  np.savez(f'{savedir}/weights.npz', W, V)
 
   # return the model
   return word2idx, W, V
@@ -341,7 +339,7 @@ def get_negative_sampling_distribution(sentences):
       p_neg[j] = word_freq[j]**0.75
 
   # normalize it
-  p_neg = p_neg / p_neg.sum()
+  p_neg = p_neg/p_neg.sum()
 
   assert(np.all(p_neg > 0))
   return p_neg
@@ -366,9 +364,9 @@ def get_context(pos, sentence, window_size):
 
 
 def load_model(savedir):
-  with open('%s/word2idx.json' % savedir) as f:
+  with open(f'{savedir}/word2idx.json') as f:
     word2idx = json.load(f)
-  npz = np.load('%s/weights.npz' % savedir)
+  npz = np.load(f'{savedir}/weights.npz')
   W = npz['arr_0']
   V = npz['arr_1']
   return word2idx, W, V
@@ -379,10 +377,10 @@ def analogy(pos1, neg1, pos2, neg2, word2idx, idx2word, W):
   V, D = W.shape
 
   # don't actually use pos2 in calculation, just print what's expected
-  print("testing: %s - %s = %s - %s" % (pos1, neg1, pos2, neg2))
+  print(f"testing: {pos1} - {neg1} = {pos2} - {neg2}")
   for w in (pos1, neg1, pos2, neg2):
     if w not in word2idx:
-      print("Sorry, %s not in word2idx" % w)
+      print(f"Sorry, {w} not in word2idx")
       return
 
   p1 = W[word2idx[pos1]]
@@ -403,12 +401,12 @@ def analogy(pos1, neg1, pos2, neg2, word2idx, idx2word, W):
       best_idx = i
       break
 
-  print("got: %s - %s = %s - %s" % (pos1, neg1, idx2word[idx[0]], neg2))
+  print(f"got: {pos1} - {neg1} = {idx2word[idx[0]]} - {neg2}" )
   print("closest 10:")
   for i in idx:
     print(idx2word[i], distances[i])
 
-  print("dist to %s:" % pos2, cos_dist(p2, vec))
+  print(f"dist to {pos2}: {cos_dist(p2, vec)}")
 
 
 def test_model(word2idx, W, V):
diff --git a/recommenders/autorec.py b/recommenders/autorec.py
index fa0bd415..9d044099 100644
--- a/recommenders/autorec.py
+++ b/recommenders/autorec.py
@@ -5,17 +5,17 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-import numpy as np
-import pandas as pd
+#import numpy as np
+#import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
-from scipy.sparse import save_npz, load_npz
+from scipy.sparse import load_npz
 
-import keras.backend as K
-from keras.models import Model
-from keras.layers import Input, Dropout, Dense
-from keras.regularizers import l2
-from keras.optimizers import SGD
+import tensorflow.keras.backend as K #type:ignore
+from tensorflow.keras.models import Model #type:ignore
+from tensorflow.keras.layers import Input, Dropout, Dense #type:ignore
+from tensorflow.keras.regularizers import l2 #type:ignore
+from tensorflow.keras.optimizers import SGD #type:ignore
 
 # config
 batch_size = 128
@@ -23,8 +23,8 @@
 reg = 0.0001
 # reg = 0
 
-A = load_npz("Atrain.npz")
-A_test = load_npz("Atest.npz")
+A = load_npz(".\\large_files\\movielens-20m-dataset\\Atrain.npz")
+A_test = load_npz(".\\large_files\\movielens-20m-dataset\\Atest.npz")
 mask = (A > 0) * 1.0
 mask_test = (A_test > 0) * 1.0
 
@@ -56,6 +56,8 @@
 
 def custom_loss(y_true, y_pred):
   mask = K.cast(K.not_equal(y_true, 0), dtype='float32')
+  y_true = K.cast(y_true, dtype='float32')
+  y_pred = K.cast(y_pred, dtype='float32')
   diff = y_pred - y_true
   sqdiff = diff * diff * mask
   sse = K.sum(K.sum(sqdiff))
@@ -96,7 +98,7 @@ def test_generator(A, M, A_test, M_test):
 model = Model(i, x)
 model.compile(
   loss=custom_loss,
-  optimizer=SGD(lr=0.08, momentum=0.9),
+  optimizer=SGD(learning_rate=0.08, momentum=0.9),
   # optimizer='adam',
   metrics=[custom_loss],
 )
diff --git a/recommenders/itembased.py b/recommenders/itembased.py
index f87f9481..ff5619a8 100644
--- a/recommenders/itembased.py
+++ b/recommenders/itembased.py
@@ -1,37 +1,37 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+from builtins import range#, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
 import pickle
 import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
+#import pandas as pd
+#import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
-from datetime import datetime
+#from datetime import datetime
 from sortedcontainers import SortedList
 
 # load in the data
 import os
-if not os.path.exists('user2movie.json') or \
-   not os.path.exists('movie2user.json') or \
-   not os.path.exists('usermovie2rating.json') or \
-   not os.path.exists('usermovie2rating_test.json'):
+if not os.path.exists('.\\large_files\\movielens-20m-dataset\\user2movie.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\movie2user.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json'):
    import preprocess2dict
 
 
-with open('user2movie.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\user2movie.json', 'rb') as f:
   user2movie = pickle.load(f)
 
-with open('movie2user.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\movie2user.json', 'rb') as f:
   movie2user = pickle.load(f)
 
-with open('usermovie2rating.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json', 'rb') as f:
   usermovie2rating = pickle.load(f)
 
-with open('usermovie2rating_test.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json', 'rb') as f:
   usermovie2rating_test = pickle.load(f)
 
 
diff --git a/recommenders/mf2.py b/recommenders/mf2.py
index 62b599c6..bcf54b54 100644
--- a/recommenders/mf2.py
+++ b/recommenders/mf2.py
@@ -1,13 +1,13 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+from builtins import range#, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
 import pickle
 import numpy as np
-import pandas as pd
+#import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
 from datetime import datetime
@@ -15,23 +15,23 @@
 
 # load in the data
 import os
-if not os.path.exists('user2movie.json') or \
-   not os.path.exists('movie2user.json') or \
-   not os.path.exists('usermovie2rating.json') or \
-   not os.path.exists('usermovie2rating_test.json'):
+if not os.path.exists('.\\large_files\\movielens-20m-dataset\\user2movie.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\movie2user.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json'):
    import preprocess2dict
 
 
-with open('user2movie.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\user2movie.json', 'rb') as f:
   user2movie = pickle.load(f)
 
-with open('movie2user.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\movie2user.json', 'rb') as f:
   movie2user = pickle.load(f)
 
-with open('usermovie2rating.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json', 'rb') as f:
   usermovie2rating = pickle.load(f)
 
-with open('usermovie2rating_test.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json', 'rb') as f:
   usermovie2rating_test = pickle.load(f)
 
 
diff --git a/recommenders/mf_keras.py b/recommenders/mf_keras.py
index efc3315b..5f8ea4ad 100644
--- a/recommenders/mf_keras.py
+++ b/recommenders/mf_keras.py
@@ -1,23 +1,23 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+#from builtins import range, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-import pickle
-import numpy as np
+#import pickle
+#import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
 
-from keras.models import Model
-from keras.layers import Input, Embedding, Dot, Add, Flatten
-from keras.regularizers import l2
-from keras.optimizers import SGD, Adam
+from tensorflow.keras.models import Model #type:ignore
+from tensorflow.keras.layers import Input, Embedding, Dot, Add, Flatten #type:ignore
+from tensorflow.keras.regularizers import l2 #type:ignore
+from tensorflow.keras.optimizers import SGD #type:ignore
 
 # load in the data
-df = pd.read_csv('../large_files/movielens-20m-dataset/edited_rating.csv')
+df = pd.read_csv('.\\large_files\\movielens-20m-dataset\\edited_rating.csv')
 
 N = df.userId.max() + 1 # number of users
 M = df.movie_idx.max() + 1 # number of movies
@@ -71,7 +71,7 @@
   loss='mse',
   # optimizer='adam',
   # optimizer=Adam(lr=0.01),
-  optimizer=SGD(lr=0.08, momentum=0.9),
+  optimizer=SGD(learning_rate=0.08, momentum=0.9),
   metrics=['mse'],
 )
 
diff --git a/recommenders/mf_keras_deep.py b/recommenders/mf_keras_deep.py
index f3888a7a..b22c4abb 100644
--- a/recommenders/mf_keras_deep.py
+++ b/recommenders/mf_keras_deep.py
@@ -1,24 +1,24 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+#from builtins import range, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-import pickle
-import numpy as np
+#import pickle
+#import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
 
-from keras.models import Model
-from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
-from keras.layers import Dropout, BatchNormalization, Activation
-from keras.regularizers import l2
-from keras.optimizers import SGD, Adam
+from tensorflow.keras.models import Model  # type:ignore
+from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate # type:ignore
+from tensorflow.keras.layers import Dropout, BatchNormalization, Activation # type:ignore
+#from tensorflow.keras.regularizers import l2
+from tensorflow.keras.optimizers import SGD#, Adam # type:ignore
 
 # load in the data
-df = pd.read_csv('../large_files/movielens-20m-dataset/edited_rating.csv')
+df = pd.read_csv('.\\large_files\\movielens-20m-dataset\\edited_rating.csv')
 
 N = df.userId.max() + 1 # number of users
 M = df.movie_idx.max() + 1 # number of movies
@@ -47,12 +47,12 @@
 
 # the neural network
 x = Dense(400)(x)
-# x = BatchNormalization()(x)
+x = BatchNormalization()(x)
+x = Activation('relu')(x)
+x = Dropout(0.5)(x)
+x = Dense(100)(x)
+x = BatchNormalization()(x)
 x = Activation('relu')(x)
-# x = Dropout(0.5)(x)
-# x = Dense(100)(x)
-# x = BatchNormalization()(x)
-# x = Activation('relu')(x)
 x = Dense(1)(x)
 
 model = Model(inputs=[u, m], outputs=x)
@@ -60,7 +60,7 @@
   loss='mse',
   # optimizer='adam',
   # optimizer=Adam(lr=0.01),
-  optimizer=SGD(lr=0.08, momentum=0.9),
+  optimizer=SGD(learning_rate=0.08, momentum=0.9),
   metrics=['mse'],
 )
 
diff --git a/recommenders/preprocess.py b/recommenders/preprocess.py
index 72585460..9e8d19ef 100644
--- a/recommenders/preprocess.py
+++ b/recommenders/preprocess.py
@@ -1,14 +1,14 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+#from builtins import range, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
 import pandas as pd
 
 # https://www.kaggle.com/grouplens/movielens-20m-dataset
-df = pd.read_csv('../large_files/movielens-20m-dataset/rating.csv')
+df = pd.read_csv('.\\large_files\\movielens-20m-dataset\\rating.csv')
 
 
 
@@ -34,8 +34,9 @@
 
 # add them to the data frame
 # takes awhile
-df['movie_idx'] = df.apply(lambda row: movie2idx[row.movieId], axis=1)
+#df['movie_idx'] = df.apply(lambda row: movie2idx[row.movieId], axis=1)
+df['movie_idx'] = df.movieId.map(movie2idx)
 
 df = df.drop(columns=['timestamp'])
 
-df.to_csv('../large_files/movielens-20m-dataset/edited_rating.csv', index=False)
\ No newline at end of file
+df.to_csv('.\\large_files\\movielens-20m-dataset\\edited_rating.csv', index=False)
\ No newline at end of file
diff --git a/recommenders/preprocess2dict.py b/recommenders/preprocess2dict.py
index 2ed5d8b7..e019cde4 100644
--- a/recommenders/preprocess2dict.py
+++ b/recommenders/preprocess2dict.py
@@ -1,19 +1,19 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+#from builtins import range, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
 import pickle
-import numpy as np
+#import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
 
 # load in the data
 # https://www.kaggle.com/grouplens/movielens-20m-dataset
-df = pd.read_csv('../large_files/movielens-20m-dataset/very_small_rating.csv')
+df = pd.read_csv('.\\large_files\\movielens-20m-dataset\\small_rating.csv')
 
 N = df.userId.max() + 1 # number of users
 M = df.movie_idx.max() + 1 # number of movies
@@ -25,58 +25,62 @@
 df_test = df.iloc[cutoff:]
 
 # a dictionary to tell us which users have rated which movies
-user2movie = {}
+user2movie = df_train.groupby('userId').movie_idx.agg(list).to_dict()
 # a dicationary to tell us which movies have been rated by which users
-movie2user = {}
+movie2user = df_train.groupby('movie_idx').userId.agg(list).to_dict()
 # a dictionary to look up ratings
-usermovie2rating = {}
-print("Calling: update_user2movie_and_movie2user")
-count = 0
-def update_user2movie_and_movie2user(row):
-  global count
-  count += 1
-  if count % 100000 == 0:
-    print("processed: %.3f" % (float(count)/cutoff))
-
-  i = int(row.userId)
-  j = int(row.movie_idx)
-  if i not in user2movie:
-    user2movie[i] = [j]
-  else:
-    user2movie[i].append(j)
-
-  if j not in movie2user:
-    movie2user[j] = [i]
-  else:
-    movie2user[j].append(i)
-
-  usermovie2rating[(i,j)] = row.rating
-df_train.apply(update_user2movie_and_movie2user, axis=1)
+user_movie_keys = zip(df_train.userId, df_train.movie_idx)
+usermovie2rating = pd.Series(df_train.rating.values, index=user_movie_keys).to_dict()
+
+# print("Calling: update_user2movie_and_movie2user")
+# count = 0
+# def update_user2movie_and_movie2user(row):
+#   global count
+#   count += 1
+#   if count % 100000 == 0:
+#     print("processed: %.3f" % (float(count)/cutoff))
+
+#   i = int(row.userId)
+#   j = int(row.movie_idx)
+#   if i not in user2movie:
+#     user2movie[i] = [j]
+#   else:
+#     user2movie[i].append(j)
+
+#   if j not in movie2user:
+#     movie2user[j] = [i]
+#   else:
+#     movie2user[j].append(i)
+
+#   usermovie2rating[(i,j)] = row.rating
+#df_train.apply(update_user2movie_and_movie2user, axis=1)
 
 # test ratings dictionary
-usermovie2rating_test = {}
-print("Calling: update_usermovie2rating_test")
-count = 0
-def update_usermovie2rating_test(row):
-  global count
-  count += 1
-  if count % 100000 == 0:
-    print("processed: %.3f" % (float(count)/len(df_test)))
-
-  i = int(row.userId)
-  j = int(row.movie_idx)
-  usermovie2rating_test[(i,j)] = row.rating
-df_test.apply(update_usermovie2rating_test, axis=1)
+user_movie_keys_test = zip(df_test.userId, df_test.movie_idx)
+usermovie2rating_test = pd.Series(df_test.rating.values, index=user_movie_keys_test).to_dict()
+
+# print("Calling: update_usermovie2rating_test")
+# count = 0
+# def update_usermovie2rating_test(row):
+#   global count
+#   count += 1
+#   if count % 100000 == 0:
+#     print("processed: %.3f" % (float(count)/len(df_test)))
+
+#   i = int(row.userId)
+#   j = int(row.movie_idx)
+#   usermovie2rating_test[(i,j)] = row.rating
+# df_test.apply(update_usermovie2rating_test, axis=1)
 
 # note: these are not really JSONs
-with open('user2movie.json', 'wb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\user2movie.json', 'wb') as f:
   pickle.dump(user2movie, f)
 
-with open('movie2user.json', 'wb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\movie2user.json', 'wb') as f:
   pickle.dump(movie2user, f)
 
-with open('usermovie2rating.json', 'wb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json', 'wb') as f:
   pickle.dump(usermovie2rating, f)
 
-with open('usermovie2rating_test.json', 'wb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json', 'wb') as f:
   pickle.dump(usermovie2rating_test, f)
diff --git a/recommenders/preprocess2sparse.py b/recommenders/preprocess2sparse.py
index 864de56d..fdc525a7 100644
--- a/recommenders/preprocess2sparse.py
+++ b/recommenders/preprocess2sparse.py
@@ -5,14 +5,14 @@
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-import numpy as np
+#import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
-from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz
+from scipy.sparse import lil_matrix, save_npz
 
 # load in the data
-df = pd.read_csv('../large_files/movielens-20m-dataset/edited_rating.csv')
+df = pd.read_csv('.\\large_files\\movielens-20m-dataset\\edited_rating.csv')
 # df = pd.read_csv('../large_files/movielens-20m-dataset/small_rating.csv')
 
 N = df.userId.max() + 1 # number of users
@@ -41,7 +41,7 @@ def update_train(row):
 # mask, to tell us which entries exist and which do not
 A = A.tocsr()
 mask = (A > 0)
-save_npz("Atrain.npz", A)
+save_npz(".\\large_files\\movielens-20m-dataset\\Atrain.npz", A)
 
 # test ratings dictionary
 A_test = lil_matrix((N, M))
@@ -59,4 +59,4 @@ def update_test(row):
 df_test.apply(update_test, axis=1)
 A_test = A_test.tocsr()
 mask_test = (A_test > 0)
-save_npz("Atest.npz", A_test)
+save_npz(".\\large_files\\movielens-20m-dataset\\Atest.npz", A_test)
diff --git a/recommenders/preprocess_shrink.py b/recommenders/preprocess_shrink.py
index 665a80e6..e7aa5b87 100644
--- a/recommenders/preprocess_shrink.py
+++ b/recommenders/preprocess_shrink.py
@@ -1,18 +1,18 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+#from builtins import range, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
-import pickle
-import numpy as np
+#import pickle
+#import numpy as np
 import pandas as pd
 from collections import Counter
 
 # load in the data
 # https://www.kaggle.com/grouplens/movielens-20m-dataset
-df = pd.read_csv('../large_files/movielens-20m-dataset/edited_rating.csv')
+df = pd.read_csv('.\\large_files\\movielens-20m-dataset\\edited_rating.csv')
 print("original dataframe size:", len(df))
 
 N = df.userId.max() + 1 # number of users
@@ -25,8 +25,8 @@
 n = 10000
 m = 2000
 
-user_ids = [u for u, c in user_ids_count.most_common(n)]
-movie_ids = [m for m, c in movie_ids_count.most_common(m)]
+user_ids = [u for u, _ in user_ids_count.most_common(n)]
+movie_ids = [m for m, _ in movie_ids_count.most_common(m)]
 
 # make a copy, otherwise ids won't be overwritten
 df_small = df[df.userId.isin(user_ids) & df.movie_idx.isin(movie_ids)].copy()
@@ -55,4 +55,4 @@
 print("max movie id:", df_small.movie_idx.max())
 
 print("small dataframe size:", len(df_small))
-df_small.to_csv('../large_files/movielens-20m-dataset/small_rating.csv', index=False)
+df_small.to_csv('.\\large_files\\movielens-20m-dataset\\small_rating.csv', index=False)
diff --git a/recommenders/rbm_tf_k_faster.py b/recommenders/rbm_tf_k_faster.py
index 9a1a242a..71b9666b 100644
--- a/recommenders/rbm_tf_k_faster.py
+++ b/recommenders/rbm_tf_k_faster.py
@@ -10,8 +10,8 @@
 import matplotlib.pyplot as plt
 from sklearn.utils import shuffle
 
-import pandas as pd
-from scipy.sparse import lil_matrix, csr_matrix, save_npz, load_npz
+#import pandas as pd
+from scipy.sparse import load_npz
 from datetime import datetime
 
 if tf.__version__.startswith('2'):
@@ -33,6 +33,7 @@ def dot2(H, W):
 
 class RBM(object):
     def __init__(self, D, M, K):
+        super().__init__()
         self.D = D # input feature size
         self.M = M # hidden size
         self.K = K # number of ratings
@@ -115,7 +116,7 @@ def build(self, D, M, K):
         self.session.run(initop)
 
     def fit(self, X, X_test, epochs=10, batch_sz=256, show_fig=True):
-        N, D = X.shape
+        N, _ = X.shape
         n_batches = N // batch_sz
 
 
@@ -134,7 +135,7 @@ def fit(self, X, X_test, epochs=10, batch_sz=256, show_fig=True):
                 )
 
                 if j % 100 == 0:
-                    print("j / n_batches:", j, "/", n_batches, "cost:", c)
+                    print(f"j / n_batches: {j}/{n_batches}", "cost: ",c)
             print("duration:", datetime.now() - t0)
 
             # calculate the true train and test cost
@@ -209,10 +210,10 @@ def get_sse(self, X, Xt):
 
 
 def main():
-    A = load_npz("Atrain.npz")
-    A_test = load_npz("Atest.npz")
+    A = load_npz(".\\large_files\\movielens-20m-dataset\\Atrain.npz")
+    A_test = load_npz(".\\large_files\\movielens-20m-dataset\\Atest.npz")
 
-    N, M = A.shape
+    _, M = A.shape
     rbm = RBM(M, 50, 10)
     rbm.fit(A, A_test)
 
diff --git a/recommenders/spark.py b/recommenders/spark.py
index 23ea365a..33154899 100644
--- a/recommenders/spark.py
+++ b/recommenders/spark.py
@@ -9,11 +9,14 @@
 # tmp = p.take(5)
 # print(tmp)
 
-from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
-import os
+from pyspark.mllib.recommendation import ALS, Rating
+from pyspark import SparkContext
+#import os
 
 # load in the data
-data = sc.textFile("../large_files/movielens-20m-dataset/small_rating.csv")
+sc = SparkContext('local', 'random')
+data = sc.textFile(".\\large_files\\movielens-20m-dataset\\small_rating.csv")
+#'/mnt/c/Users/Saif/Downloads/personal/Udemy_labs/nlp/machine_learning_examples/large_files/movielens-20m-dataset//small_ratings.csv'
 
 # filter out header
 header = data.first() #extract header
diff --git a/recommenders/spark2.py b/recommenders/spark2.py
index 5879269d..8310c69b 100644
--- a/recommenders/spark2.py
+++ b/recommenders/spark2.py
@@ -7,7 +7,7 @@
 # tmp = p.take(5)
 # print(tmp)
 
-from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
+from pyspark.mllib.recommendation import ALS, Rating
 from pyspark import SparkContext
 
 # increase memory
@@ -18,8 +18,8 @@
 
 
 # load in the data
-# data = sc.textFile("../large_files/movielens-20m-dataset/small_rating.csv")
-data = sc.textFile("../large_files/movielens-20m-dataset/rating.csv.gz")
+data = sc.textFile("/mnt/c/Users/Saif/Downloads/personal/Udemy_labs/nlp/machine_learning_examples/large_files/movielens-20m-dataset/rating.csv")
+#data = sc.textFile(".\\large_files\\movielens-20m-dataset\\rating.csv.gz")
 
 # filter out header
 header = data.first() #extract header
diff --git a/recommenders/tfidf.py b/recommenders/tfidf.py
index a6078ec3..7c380205 100644
--- a/recommenders/tfidf.py
+++ b/recommenders/tfidf.py
@@ -2,12 +2,12 @@
 import json
 
 from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
+from sklearn.metrics.pairwise import cosine_similarity
 
 
 # get the data from: https://www.kaggle.com/tmdb/tmdb-movie-metadata
 # load in the data
-df = pd.read_csv('../large_files/tmdb_5000_movies.csv')
+df = pd.read_csv('.\\large_files\\tmdb_5000_movies.csv')
 
 
 # convert the relevant data for each movie into a single string
@@ -59,7 +59,7 @@ def recommend(title):
   recommended_idx = (-scores).argsort()[1:6]
 
   # return the titles of the recommendations
-  return df['title'].iloc[recommended_idx]
+  return df['title'].iloc[recommended_idx].values
 
 
 print("\nRecommendations for 'Scream 3':")
diff --git a/recommenders/userbased.py b/recommenders/userbased.py
index b512a722..07e84489 100644
--- a/recommenders/userbased.py
+++ b/recommenders/userbased.py
@@ -1,44 +1,44 @@
 # https://udemy.com/recommender-systems
 # https://deeplearningcourses.com/recommender-systems
 from __future__ import print_function, division
-from builtins import range, input
+from builtins import range#, input
 # Note: you may need to update your version of future
 # sudo pip install -U future
 
 import pickle
 import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from sklearn.utils import shuffle
-from datetime import datetime
+#import pandas as pd
+#import matplotlib.pyplot as plt
+#from sklearn.utils import shuffle
+#from datetime import datetime
 from sortedcontainers import SortedList
 
 # load in the data
 import os
-if not os.path.exists('user2movie.json') or \
-   not os.path.exists('movie2user.json') or \
-   not os.path.exists('usermovie2rating.json') or \
-   not os.path.exists('usermovie2rating_test.json'):
+if not os.path.exists('.\\large_files\\movielens-20m-dataset\\user2movie.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\movie2user.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json') or \
+   not os.path.exists('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json'):
    import preprocess2dict
 
 
-with open('user2movie.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\user2movie.json', 'rb') as f:
   user2movie = pickle.load(f)
 
-with open('movie2user.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\movie2user.json', 'rb') as f:
   movie2user = pickle.load(f)
 
-with open('usermovie2rating.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating.json', 'rb') as f:
   usermovie2rating = pickle.load(f)
 
-with open('usermovie2rating_test.json', 'rb') as f:
+with open('.\\large_files\\movielens-20m-dataset\\usermovie2rating_test.json', 'rb') as f:
   usermovie2rating_test = pickle.load(f)
 
 
 N = np.max(list(user2movie.keys())) + 1
 # the test set may contain movies the train set doesn't have data on
 m1 = np.max(list(movie2user.keys()))
-m2 = np.max([m for (u, m), r in usermovie2rating_test.items()])
+m2 = np.max([m for (_, m), _ in usermovie2rating_test.items()])
 M = max(m1, m2) + 1
 print("N:", N, "M:", M)
 
diff --git a/rnn_class/util.py b/rnn_class/util.py
index 54801efa..5aa98a66 100644
--- a/rnn_class/util.py
+++ b/rnn_class/util.py
@@ -118,7 +118,7 @@ def get_wikipedia_data(n_files, n_vocab, by_paragraph=False):
 
     for f in input_files:
         print("reading:", f)
-        for line in open(prefix + f):
+        for line in open(prefix + f, encoding='utf-8'):
             line = line.strip()
             # don't count headers, structured data, lists, etc...
             if line and line[0] not in ('[', '*', '-', '|', '=', '{', '}'):