diff --git a/deep_learning4e.py b/deep_learning4e.py index 64aa49e90..734a9307c 100644 --- a/deep_learning4e.py +++ b/deep_learning4e.py @@ -9,14 +9,14 @@ from keras.preprocessing import sequence from utils4e import (Sigmoid, dot_product, softmax1D, conv1D, gaussian_kernel, element_wise_product, vector_add, - random_weights, scalar_vector_product, matrix_multiplication, map_vector, mse_loss) + random_weights, scalar_vector_product, matrix_multiplication, map_vector, mean_squared_error_loss) class Node: """ A node in a computational graph contains the pointer to all its parents. - :param val: value of current node. - :param parents: a container of all parents of current node. + :param val: value of current node + :param parents: a container of all parents of current node """ def __init__(self, val=None, parents=None): @@ -55,40 +55,40 @@ def forward(self, inputs): raise NotImplementedError -class OutputLayer(Layer): - """1D softmax output layer in 19.3.2""" +class InputLayer(Layer): + """1D input layer. Layer size is the same as input vector size.""" def __init__(self, size=3): super().__init__(size) def forward(self, inputs): + """Take each value of the inputs to each unit in the layer.""" assert len(self.nodes) == len(inputs) - res = softmax1D(inputs) - for node, val in zip(self.nodes, res): - node.val = val - return res + for node, inp in zip(self.nodes, inputs): + node.val = inp + return inputs -class InputLayer(Layer): - """1D input layer. Layer size is the same as input vector size.""" +class OutputLayer(Layer): + """1D softmax output layer in 19.3.2.""" def __init__(self, size=3): super().__init__(size) def forward(self, inputs): - """Take each value of the inputs to each unit in the layer.""" assert len(self.nodes) == len(inputs) - for node, inp in zip(self.nodes, inputs): - node.val = inp - return inputs + res = softmax1D(inputs) + for node, val in zip(self.nodes, res): + node.val = val + return res class DenseLayer(Layer): """ 1D dense layer in a neural network. - :param in_size: input vector size, int. - :param out_size: output vector size, int. - :param activation: activation function, Activation object. + :param in_size: (int) input vector size + :param out_size: (int) output vector size + :param activation: (Activation object) activation function """ def __init__(self, in_size=3, out_size=3, activation=None): @@ -124,7 +124,7 @@ def __init__(self, size=3, kernel_size=3): node.weights = gaussian_kernel(kernel_size) def forward(self, features): - # each node in layer takes a channel in the features. + # each node in layer takes a channel in the features assert len(self.nodes) == len(features) res = [] # compute the convolution output of each channel, store it in node.val @@ -154,7 +154,8 @@ def forward(self, features): for i in range(len(self.nodes)): feature = features[i] # get the max value in a kernel_size * kernel_size area - out = [max(feature[i:i + self.kernel_size]) for i in range(len(feature) - self.kernel_size + 1)] + out = [max(feature[i:i + self.kernel_size]) + for i in range(len(feature) - self.kernel_size + 1)] res.append(out) self.nodes[i].val = out return res @@ -270,13 +271,13 @@ def adam(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 / 10 ** 8, def BackPropagation(inputs, targets, theta, net, loss): """ - The back-propagation algorithm for multilayer networks in only one epoch, to calculate gradients of theta - :param inputs: a batch of inputs in an array. Each input is an iterable object. - :param targets: a batch of targets in an array. Each target is an iterable object. - :param theta: parameters to be updated. - :param net: a list of predefined layer objects representing their linear sequence. - :param loss: a predefined loss function taking array of inputs and targets. - :return: gradients of theta, loss of the input batch. + The back-propagation algorithm for multilayer networks in only one epoch, to calculate gradients of theta. + :param inputs: a batch of inputs in an array. Each input is an iterable object + :param targets: a batch of targets in an array. Each target is an iterable object + :param theta: parameters to be updated + :param net: a list of predefined layer objects representing their linear sequence + :param loss: a predefined loss function taking array of inputs and targets + :return: gradients of theta, loss of the input batch """ assert len(inputs) == len(targets) @@ -325,9 +326,9 @@ def BackPropagation(inputs, targets, theta, net, loss): class BatchNormalizationLayer(Layer): """Batch normalization layer.""" - def __init__(self, size, epsilon=0.001): + def __init__(self, size, eps=0.001): super().__init__(size) - self.epsilon = epsilon + self.eps = eps # self.weights = [beta, gamma] self.weights = [0, 0] self.inputs = None @@ -341,7 +342,7 @@ def forward(self, inputs): res = [] # get normalized value of each input for i in range(len(self.nodes)): - val = [(inputs[i] - mu) * self.weights[0] / np.sqrt(self.epsilon + stderr ** 2) + self.weights[1]] + val = [(inputs[i] - mu) * self.weights[0] / np.sqrt(self.eps + stderr ** 2) + self.weights[1]] res.append(val) self.nodes[i].val = val return res @@ -375,7 +376,7 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=None, learning_rate=0.01, epoch raw_net.append(DenseLayer(hidden_input_size, output_size)) # update parameters of the network - learned_net = optimizer(dataset, raw_net, mse_loss, epochs, l_rate=learning_rate, + learned_net = optimizer(dataset, raw_net, mean_squared_error_loss, epochs, l_rate=learning_rate, batch_size=batch_size, verbose=verbose) def predict(example): @@ -394,7 +395,7 @@ def predict(example): return predict -def PerceptronLearner(dataset, learning_rate=0.01, epochs=100, verbose=None): +def PerceptronLearner(dataset, learning_rate=0.01, epochs=100, optimizer=gradient_descent, batch_size=1, verbose=None): """ Simple perceptron neural network. """ @@ -405,7 +406,8 @@ def PerceptronLearner(dataset, learning_rate=0.01, epochs=100, verbose=None): raw_net = [InputLayer(input_size), DenseLayer(input_size, output_size)] # update the network - learned_net = gradient_descent(dataset, raw_net, mse_loss, epochs, l_rate=learning_rate, verbose=verbose) + learned_net = optimizer(dataset, raw_net, mean_squared_error_loss, epochs, l_rate=learning_rate, + batch_size=batch_size, verbose=verbose) def predict(example): layer_out = learned_net[1].forward(example) @@ -419,7 +421,7 @@ def SimpleRNNLearner(train_data, val_data, epochs=2): RNN example for text sentimental analysis. :param train_data: a tuple of (training data, targets) Training data: ndarray taking training examples, while each example is coded by embedding - Targets: ndarray taking targets of each example. Each target is mapped to an integer. + Targets: ndarray taking targets of each example. Each target is mapped to an integer :param val_data: a tuple of (validation data, targets) :param epochs: number of epochs :return: a keras model diff --git a/learning.py b/learning.py index 99ef8abc2..764392c7d 100644 --- a/learning.py +++ b/learning.py @@ -968,7 +968,7 @@ def ada_boost(dataset, L, K): h.append(h_k) error = sum(weight for example, weight in zip(examples, w) if example[target] != h_k(example)) # avoid divide-by-0 from either 0% or 100% error rates - error = clip(error, eps, 1 - eps) + error = np.clip(error, eps, 1 - eps) for j, example in enumerate(examples): if example[target] == h_k(example): w[j] *= error / (1 - error) diff --git a/learning4e.py b/learning4e.py index f581b9ec1..7dba31cfa 100644 --- a/learning4e.py +++ b/learning4e.py @@ -742,7 +742,7 @@ def ada_boost(dataset, L, K): h.append(h_k) error = sum(weight for example, weight in zip(examples, w) if example[target] != h_k(example)) # avoid divide-by-0 from either 0% or 100% error rates - error = clip(error, eps, 1 - eps) + error = np.clip(error, eps, 1 - eps) for j, example in enumerate(examples): if example[target] == h_k(example): w[j] *= error / (1 - error) diff --git a/tests/test_deep_learning4e.py b/tests/test_deep_learning4e.py index ed8979a0a..305c2e65c 100644 --- a/tests/test_deep_learning4e.py +++ b/tests/test_deep_learning4e.py @@ -11,8 +11,8 @@ def test_neural_net(): iris = DataSet(name='iris') classes = ['setosa', 'versicolor', 'virginica'] iris.classes_to_numbers(classes) - nnl_adam = NeuralNetLearner(iris, [4], learning_rate=0.001, epochs=200, optimizer=adam) nnl_gd = NeuralNetLearner(iris, [4], learning_rate=0.15, epochs=100, optimizer=gradient_descent) + nnl_adam = NeuralNetLearner(iris, [4], learning_rate=0.001, epochs=200, optimizer=adam) tests = [([5.0, 3.1, 0.9, 0.1], 0), ([5.1, 3.5, 1.0, 0.0], 0), ([4.9, 3.3, 1.1, 0.1], 0), @@ -22,25 +22,28 @@ def test_neural_net(): ([7.5, 4.1, 6.2, 2.3], 2), ([7.3, 4.0, 6.1, 2.4], 2), ([7.0, 3.3, 6.1, 2.5], 2)] - assert grade_learner(nnl_adam, tests) >= 1 / 3 assert grade_learner(nnl_gd, tests) >= 1 / 3 - assert err_ratio(nnl_adam, iris) < 0.21 assert err_ratio(nnl_gd, iris) < 0.21 + assert grade_learner(nnl_adam, tests) >= 1 / 3 + assert err_ratio(nnl_adam, iris) < 0.21 def test_perceptron(): iris = DataSet(name='iris') classes = ['setosa', 'versicolor', 'virginica'] iris.classes_to_numbers(classes) - pl = PerceptronLearner(iris, learning_rate=0.01, epochs=100) + pl_gd = PerceptronLearner(iris, learning_rate=0.01, epochs=100, optimizer=gradient_descent) + pl_adam = PerceptronLearner(iris, learning_rate=0.01, epochs=100, optimizer=adam) tests = [([5, 3, 1, 0.1], 0), ([5, 3.5, 1, 0], 0), ([6, 3, 4, 1.1], 1), ([6, 2, 3.5, 1], 1), ([7.5, 4, 6, 2], 2), ([7, 3, 6, 2.5], 2)] - assert grade_learner(pl, tests) > 1 / 2 - assert err_ratio(pl, iris) < 0.4 + assert grade_learner(pl_gd, tests) > 1 / 2 + assert err_ratio(pl_gd, iris) < 0.4 + assert grade_learner(pl_adam, tests) > 1 / 2 + assert err_ratio(pl_adam, iris) < 0.4 def test_rnn(): diff --git a/tests/test_utils.py b/tests/test_utils.py index 31b5848f0..6c2a50808 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -173,10 +173,6 @@ def test_normalize(): assert normalize([1, 2, 1]) == [0.25, 0.5, 0.25] -def test_clip(): - assert [clip(x, 0, 1) for x in [-1, 0.5, 10]] == [0, 0.5, 1] - - def test_gaussian(): assert gaussian(1, 0.5, 0.7) == 0.6664492057835993 assert gaussian(5, 2, 4.5) == 0.19333405840142462 @@ -201,10 +197,6 @@ def test_distance_squared(): assert distance_squared((1, 2), (5, 5)) == 25.0 -def test_vector_clip(): - assert vector_clip((-1, 10), (0, 0), (9, 9)) == (0, 9) - - def test_turn_heading(): assert turn_heading((0, 1), 1) == (-1, 0) assert turn_heading((0, 1), -1) == (1, 0) diff --git a/utils.py b/utils.py index 4bf29a9a3..fd683d34a 100644 --- a/utils.py +++ b/utils.py @@ -233,8 +233,20 @@ def euclidean_distance(x, y): return np.sqrt(sum((_x - _y) ** 2 for _x, _y in zip(x, y))) +def manhattan_distance(x, y): + return sum(abs(_x - _y) for _x, _y in zip(x, y)) + + +def hamming_distance(x, y): + return sum(_x != _y for _x, _y in zip(x, y)) + + def cross_entropy_loss(x, y): - return (-1.0 / len(x)) * sum(x * np.log(y) + (1 - x) * np.log(1 - y) for x, y in zip(x, y)) + return (-1.0 / len(x)) * sum(_x * np.log(_y) + (1 - _x) * np.log(1 - _y) for _x, _y in zip(x, y)) + + +def mean_squared_error_loss(x, y): + return (1.0 / len(x)) * sum((_x - _y) ** 2 for _x, _y in zip(x, y)) def rms_error(x, y): @@ -242,25 +254,17 @@ def rms_error(x, y): def ms_error(x, y): - return mean((x - y) ** 2 for x, y in zip(x, y)) + return mean((_x - _y) ** 2 for _x, _y in zip(x, y)) def mean_error(x, y): - return mean(abs(x - y) for x, y in zip(x, y)) - - -def manhattan_distance(x, y): - return sum(abs(_x - _y) for _x, _y in zip(x, y)) + return mean(abs(_x - _y) for _x, _y in zip(x, y)) def mean_boolean_error(x, y): return mean(_x != _y for _x, _y in zip(x, y)) -def hamming_distance(x, y): - return sum(_x != _y for _x, _y in zip(x, y)) - - def normalize(dist): """Multiply each number by a constant such that the sum is 1.0""" if isinstance(dist, dict): @@ -277,20 +281,15 @@ def random_weights(min_value, max_value, num_weights): return [random.uniform(min_value, max_value) for _ in range(num_weights)] -def clip(x, lowest, highest): - """Return x clipped to the range [lowest..highest].""" - return max(lowest, min(x, highest)) +def sigmoid(x): + """Return activation value of x with sigmoid function.""" + return 1 / (1 + np.exp(-x)) def sigmoid_derivative(value): return value * (1 - value) -def sigmoid(x): - """Return activation value of x with sigmoid function.""" - return 1 / (1 + np.exp(-x)) - - def elu(x, alpha=0.01): return x if x > 0 else alpha * (np.exp(x) - 1) @@ -389,13 +388,6 @@ def distance_squared(a, b): return (xA - xB) ** 2 + (yA - yB) ** 2 -def vector_clip(vector, lowest, highest): - """Return vector, except if any element is less than the corresponding - value of lowest or more than the corresponding value of highest, clip to - those values.""" - return type(vector)(map(clip, vector, lowest, highest)) - - # ______________________________________________________________________________ # Misc Functions @@ -484,7 +476,6 @@ def failure_test(algorithm, tests): to check for correctness. On the other hand, a lot of algorithms output something particular on fail (for example, False, or None). tests is a list with each element in the form: (values, failure_output).""" - from statistics import mean return mean(int(algorithm(x) != y) for x, y in tests) diff --git a/utils4e.py b/utils4e.py index 1c376066e..b0fbf8df8 100644 --- a/utils4e.py +++ b/utils4e.py @@ -319,6 +319,14 @@ def euclidean_distance(x, y): return np.sqrt(sum((_x - _y) ** 2 for _x, _y in zip(x, y))) +def manhattan_distance(x, y): + return sum(abs(_x - _y) for _x, _y in zip(x, y)) + + +def hamming_distance(x, y): + return sum(_x != _y for _x, _y in zip(x, y)) + + def rms_error(x, y): return np.sqrt(ms_error(x, y)) @@ -331,28 +339,20 @@ def mean_error(x, y): return mean(abs(x - y) for x, y in zip(x, y)) -def manhattan_distance(x, y): - return sum(abs(_x - _y) for _x, _y in zip(x, y)) - - def mean_boolean_error(x, y): return mean(_x != _y for _x, _y in zip(x, y)) -def hamming_distance(x, y): - return sum(_x != _y for _x, _y in zip(x, y)) - - -# 19.2 Common Loss Functions +# loss functions def cross_entropy_loss(x, y): - """Example of cross entropy loss. x and y are 1D iterable objects.""" - return (-1.0 / len(x)) * sum(x * np.log(y) + (1 - x) * np.log(1 - y) for x, y in zip(x, y)) + """Cross entropy loss function. x and y are 1D iterable objects.""" + return (-1.0 / len(x)) * sum(x * np.log(_y) + (1 - _x) * np.log(1 - _y) for _x, _y in zip(x, y)) -def mse_loss(x, y): - """Example of min square loss. x and y are 1D iterable objects.""" +def mean_squared_error_loss(x, y): + """Min square loss function. x and y are 1D iterable objects.""" return (1.0 / len(x)) * sum((_x - _y) ** 2 for _x, _y in zip(x, y)) @@ -395,29 +395,21 @@ def gaussian_kernel_2D(size=3, sigma=0.5): return g / g.sum() -# ______________________________________________________________________________ -# loss and activation functions +# activation functions class Activation: def f(self, x): - pass + return NotImplementedError def derivative(self, x): - pass - - -def clip(x, lowest, highest): - """Return x clipped to the range [lowest..highest].""" - return max(lowest, min(x, highest)) + return NotImplementedError def softmax1D(x): """Return the softmax vector of input vector x.""" - exps = [np.exp(_x) for _x in x] - sum_exps = sum(exps) - return [exp / sum_exps for exp in exps] + return np.exp(x) / sum(np.exp(x)) class Sigmoid(Activation): @@ -545,13 +537,6 @@ def distance_squared(a, b): return (xA - xB) ** 2 + (yA - yB) ** 2 -def vector_clip(vector, lowest, highest): - """Return vector, except if any element is less than the corresponding - value of lowest or more than the corresponding value of highest, clip to - those values.""" - return type(vector)(map(clip, vector, lowest, highest)) - - # ______________________________________________________________________________ # Misc Functions @@ -642,7 +627,6 @@ def failure_test(algorithm, tests): to check for correctness. On the other hand, a lot of algorithms output something particular on fail (for example, False, or None). tests is a list with each element in the form: (values, failure_output).""" - from statistics import mean return mean(int(algorithm(x) != y) for x, y in tests)