Skip to content

Commit 73bc907

Browse files
committed
initial commit
0 parents  commit 73bc907

17 files changed

+1535
-0
lines changed

.DS_Store

6 KB
Binary file not shown.

doc/.DS_Store

6 KB
Binary file not shown.

doc/assignment_prompt.pdf

108 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# AI Assignment 2: Neural Network"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {
14+
"collapsed": true
15+
},
16+
"outputs": [],
17+
"source": [
18+
"import numpy as np"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 2,
24+
"metadata": {
25+
"collapsed": true
26+
},
27+
"outputs": [],
28+
"source": [
29+
"def sigmoid(z):\n",
30+
" '''The sigmoid function'''\n",
31+
" return 1.0 / (1.0 + np.exp(-z))\n",
32+
"\n",
33+
"def sigmoid_prime(z):\n",
34+
" '''Derivative of the sigmoid function'''\n",
35+
" return sigmoid(z) * (1-sigmoid(z))"
36+
]
37+
},
38+
{
39+
"cell_type": "code",
40+
"execution_count": 32,
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"def read_neural_net_file(nn_filename):\n",
45+
" '''\n",
46+
" Reads neural net initialization file\n",
47+
" '''\n",
48+
" \n",
49+
" with open(nn_filename) as f:\n",
50+
" line = f.readline()\n",
51+
" nums = [int(num) for num in line.split()]\n",
52+
" Ni, Nh, No = nums\n",
53+
"\n",
54+
" b1 = np.zeros([Nh, 1])\n",
55+
" w1 = np.zeros([Nh, Ni])\n",
56+
"\n",
57+
" for i in range(Nh):\n",
58+
" line = f.readline()\n",
59+
" nums = [float(num) for num in line.split()]\n",
60+
" b1[i] = nums[0]\n",
61+
" w1[i, :] = nums[1:]\n",
62+
"\n",
63+
" b2 = np.zeros([No, 1])\n",
64+
" w2 = np.zeros([No, Nh])\n",
65+
"\n",
66+
" for i in range(No):\n",
67+
" line = f.readline()\n",
68+
" nums = [float(num) for num in line.split()]\n",
69+
" b2[i] = nums[0]\n",
70+
" w2[i, :] = nums[1:]\n",
71+
"\n",
72+
" return Ni, Nh, No, b1, w1, b2, w2"
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": 35,
78+
"metadata": {},
79+
"outputs": [],
80+
"source": [
81+
"def read_train_file(train_filename):\n",
82+
" '''\n",
83+
" Reads training file\n",
84+
" '''\n",
85+
" \n",
86+
" with open('tests/wdbc_train.txt') as f:\n",
87+
" line = f.readline()\n",
88+
" nums = [int(num) for num in line.split()]\n",
89+
" num_obs, Ni, No = nums\n",
90+
"\n",
91+
" inputs = np.zeros([num_obs, Ni])\n",
92+
" outputs = np.zeros([num_obs, No])\n",
93+
"\n",
94+
" for i in range(num_obs):\n",
95+
" line = f.readline()\n",
96+
" nums = [float(num) for num in line.split()]\n",
97+
" inputs[i, :] = nums[:Ni]\n",
98+
" outputs[i, :] = nums[Ni:]\n",
99+
" \n",
100+
" return inputs, outputs"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": null,
106+
"metadata": {
107+
"collapsed": true
108+
},
109+
"outputs": [],
110+
"source": [
111+
"class NeuralNetwork(object):\n",
112+
" def __init__(self, neural_net_filename):\n",
113+
" self.Ni, self.Nh, self.No, self.b1, self.w1, self.b2, self.w2\n",
114+
" = read_neural_net_file(neural_net_filename)\n",
115+
"\n",
116+
"\n",
117+
" def feedforward(self, a):\n",
118+
" \"\"\"Return the output of the network if ``a`` is input.\"\"\"\n",
119+
" for b, w in zip(self.biases, self.weights):\n",
120+
" a = sigmoid(np.dot(w, a)+b)\n",
121+
" return a\n",
122+
" \n",
123+
" \n",
124+
" def SGD(self, training_data, epochs, mini_batch_size, eta,\n",
125+
" test_data=None):\n",
126+
" \"\"\"Train the neural network using mini-batch stochastic\n",
127+
" gradient descent. The ``training_data`` is a list of tuples\n",
128+
" ``(x, y)`` representing the training inputs and the desired\n",
129+
" outputs. The other non-optional parameters are\n",
130+
" self-explanatory. If ``test_data`` is provided then the\n",
131+
" network will be evaluated against the test data after each\n",
132+
" epoch, and partial progress printed out. This is useful for\n",
133+
" tracking progress, but slows things down substantially.\"\"\"\n",
134+
" if test_data: n_test = len(test_data)\n",
135+
" n = len(training_data)\n",
136+
" for j in xrange(epochs):\n",
137+
" random.shuffle(training_data)\n",
138+
" mini_batches = [\n",
139+
" training_data[k:k+mini_batch_size]\n",
140+
" for k in xrange(0, n, mini_batch_size)]\n",
141+
" for mini_batch in mini_batches:\n",
142+
" self.update_mini_batch(mini_batch, eta)\n",
143+
" if test_data:\n",
144+
" print \"Epoch {0}: {1} / {2}\".format(\n",
145+
" j, self.evaluate(test_data), n_test)\n",
146+
" else:\n",
147+
" print \"Epoch {0} complete\".format(j)\n",
148+
"\n",
149+
" \n",
150+
" def update_mini_batch(self, mini_batch, eta):\n",
151+
" \"\"\"Update the network's weights and biases by applying\n",
152+
" gradient descent using backpropagation to a single mini batch.\n",
153+
" The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``\n",
154+
" is the learning rate.\"\"\"\n",
155+
" nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
156+
" nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
157+
" for x, y in mini_batch:\n",
158+
" delta_nabla_b, delta_nabla_w = self.backprop(x, y)\n",
159+
" nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n",
160+
" nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n",
161+
" self.weights = [w-(eta/len(mini_batch))*nw\n",
162+
" for w, nw in zip(self.weights, nabla_w)]\n",
163+
" self.biases = [b-(eta/len(mini_batch))*nb\n",
164+
" for b, nb in zip(self.biases, nabla_b)]\n",
165+
"\n",
166+
" \n",
167+
" def backprop(self, x, y):\n",
168+
" \"\"\"Return a tuple ``(nabla_b, nabla_w)`` representing the\n",
169+
" gradient for the cost function C_x. ``nabla_b`` and\n",
170+
" ``nabla_w`` are layer-by-layer lists of numpy arrays, similar\n",
171+
" to ``self.biases`` and ``self.weights``.\"\"\"\n",
172+
" nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
173+
" nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
174+
" # feedforward\n",
175+
" activation = x\n",
176+
" activations = [x] # list to store all the activations, layer by layer\n",
177+
" zs = [] # list to store all the z vectors, layer by layer\n",
178+
" for b, w in zip(self.biases, self.weights):\n",
179+
" z = np.dot(w, activation)+b\n",
180+
" zs.append(z)\n",
181+
" activation = sigmoid(z)\n",
182+
" activations.append(activation)\n",
183+
" # backward pass\n",
184+
" delta = self.cost_derivative(activations[-1], y) * \\\n",
185+
" sigmoid_prime(zs[-1])\n",
186+
" nabla_b[-1] = delta\n",
187+
" nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n",
188+
" # Note that the variable l in the loop below is used a little\n",
189+
" # differently to the notation in Chapter 2 of the book. Here,\n",
190+
" # l = 1 means the last layer of neurons, l = 2 is the\n",
191+
" # second-last layer, and so on. It's a renumbering of the\n",
192+
" # scheme in the book, used here to take advantage of the fact\n",
193+
" # that Python can use negative indices in lists.\n",
194+
" for l in xrange(2, self.num_layers):\n",
195+
" z = zs[-l]\n",
196+
" sp = sigmoid_prime(z)\n",
197+
" delta = np.dot(self.weights[-l+1].transpose(), delta) * sp\n",
198+
" nabla_b[-l] = delta\n",
199+
" nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())\n",
200+
" return (nabla_b, nabla_w)\n",
201+
"\n",
202+
" \n",
203+
" def evaluate(self, test_data):\n",
204+
" \"\"\"Return the number of test inputs for which the neural\n",
205+
" network outputs the correct result. Note that the neural\n",
206+
" network's output is assumed to be the index of whichever\n",
207+
" neuron in the final layer has the highest activation.\"\"\"\n",
208+
" test_results = [(np.argmax(self.feedforward(x)), y)\n",
209+
" for (x, y) in test_data]\n",
210+
" return sum(int(x == y) for (x, y) in test_results)\n",
211+
"\n",
212+
" \n",
213+
" def cost_derivative(self, output_activations, y):\n",
214+
" \"\"\"Return the vector of partial derivatives \\partial C_x /\n",
215+
" \\partial a for the output activations.\"\"\"\n",
216+
" return (output_activations-y)"
217+
]
218+
},
219+
{
220+
"cell_type": "code",
221+
"execution_count": null,
222+
"metadata": {
223+
"collapsed": true
224+
},
225+
"outputs": [],
226+
"source": []
227+
}
228+
],
229+
"metadata": {
230+
"kernelspec": {
231+
"display_name": "Python 3",
232+
"language": "python",
233+
"name": "python3"
234+
},
235+
"language_info": {
236+
"codemirror_mode": {
237+
"name": "ipython",
238+
"version": 3
239+
},
240+
"file_extension": ".py",
241+
"mimetype": "text/x-python",
242+
"name": "python",
243+
"nbconvert_exporter": "python",
244+
"pygments_lexer": "ipython3",
245+
"version": "3.6.1"
246+
}
247+
},
248+
"nbformat": 4,
249+
"nbformat_minor": 2
250+
}

0 commit comments

Comments
 (0)