Skip to content

Commit 1a193c3

Browse files
author
Ajith Thomas
committed
Initial commit
0 parents  commit 1a193c3

File tree

7 files changed

+438
-0
lines changed

7 files changed

+438
-0
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#Directories to be ignored
2+
/.idea/
3+
/notebooks/
4+
/__pycache__/

addressing.py

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#!/usr/bin/python3
2+
3+
import tensorflow as tf
4+
import numpy as np
5+
6+
from tensorflow.keras import Model
7+
from tensorflow.keras.layers import Dense
8+
9+
10+
class Addressing(Model):
11+
12+
def __init__(self, memory_locations=128, memory_vector_size=20, maximum_shifts=3, reading=True):
13+
super(Addressing, self).__init__()
14+
15+
self.memory_locations = memory_locations # N locations
16+
self.memory_vector_size = memory_vector_size # M vector size
17+
self.maximum_shifts = maximum_shifts
18+
self.reading = reading
19+
20+
self.read_split = [self.memory_vector_size, 1, 1, self.maximum_shifts, 1]
21+
self.write_split = [self.memory_vector_size, 1, 1, self.maximum_shifts, 1,
22+
self.memory_vector_size, self.memory_vector_size]
23+
24+
if self.reading:
25+
self.emit_len = np.sum(self.read_split)
26+
else:
27+
self.emit_len = np.sum(self.write_split)
28+
29+
self.fc_addr = Dense(units=self.emit_len, activation='sigmoid', name="emit_params", trainable=False,
30+
kernel_initializer='glorot_uniform', bias_initializer='glorot_normal')
31+
32+
self.k_t = None
33+
self.beta_t = None
34+
self.g_t = None
35+
self.s_t = None
36+
self.gamma_t = None
37+
38+
self.e_t = None
39+
self.a_t = None
40+
41+
# All of the below are the weights over N locations produced by the addressing mechanism
42+
# [Batch size, N]
43+
self.w_c_t = None
44+
self.w_g_t = None
45+
self.w_tidle_t = None
46+
self.w_t = None
47+
48+
def emit_head_params(self, fc_output):
49+
50+
if self.reading:
51+
k_t, beta_t, g_t, s_t, gamma_t = tf.split(fc_output, self.read_split, axis=-1)
52+
53+
self.k_t = tf.identity(k_t)
54+
self.beta_t = tf.nn.softplus(beta_t)
55+
self.g_t = tf.nn.sigmoid(g_t)
56+
self.s_t = tf.nn.softmax(s_t, axis=1)
57+
self.gamma_t = 1.0 + tf.nn.softplus(gamma_t)
58+
59+
else:
60+
k_t, beta_t, g_t, s_t, gamma_t, e_t, a_t = tf.split(fc_output, self.write_split, axis=-1)
61+
62+
self.k_t = tf.identity(k_t)
63+
self.beta_t = tf.nn.softplus(beta_t)
64+
self.g_t = tf.nn.sigmoid(g_t)
65+
self.s_t = tf.nn.softmax(s_t, axis=1)
66+
self.gamma_t = 1.0 + tf.nn.softplus(gamma_t)
67+
68+
self.e_t = tf.nn.sigmoid(e_t)
69+
self.a_t = tf.identity(a_t)
70+
71+
@staticmethod
72+
def cosine_similarity(k, m):
73+
k_mag = tf.sqrt(tf.reduce_sum(tf.square(k), axis=-1))
74+
m_mag = tf.sqrt(tf.reduce_sum(tf.square(m), axis=-1))
75+
mag_prod = tf.multiply(k_mag, m_mag)
76+
dot = tf.squeeze(tf.keras.layers.dot([k, m], axes=(-1, -1)), axis=1)
77+
return tf.divide(dot, mag_prod)
78+
79+
@staticmethod
80+
def circular_convolution(w, s):
81+
kernels=tf.TensorArray(dtype=s.dtype, size=s.shape[0])
82+
83+
for i in range(0, s.shape[0]):
84+
kernels.write(i, tf.roll(w, shift=i-(s.shape[0]//2), axis=0))
85+
86+
w_circ_conv = tf.transpose(kernels.stack())
87+
return tf.reduce_sum(w_circ_conv*s, axis=1)
88+
89+
def content_addressing(self, M_t):
90+
k_t = tf.expand_dims(self.k_t, axis=1)
91+
self.w_c_t = tf.nn.softmax(self.beta_t * self.cosine_similarity(k_t, M_t), axis=1)
92+
93+
def interpolation(self, w_t_prev):
94+
self.w_g_t = (self.g_t * self.w_c_t) + ((1 - self.g_t)*w_t_prev)
95+
96+
def convolutional_shift(self):
97+
convolved_weights = tf.TensorArray(dtype=self.w_g_t.dtype, size=self.w_g_t.shape[0])
98+
99+
for i in range(self.s_t.shape[0]):
100+
cc = self.circular_convolution(self.w_g_t[i], self.s_t[i])
101+
convolved_weights.write(i, cc)
102+
103+
self.w_tidle_t = convolved_weights.stack()
104+
105+
def sharpening(self):
106+
self.w_t = tf.nn.softmax(tf.pow(self.w_tidle_t, self.gamma_t), axis=1)
107+
108+
def call(self, controller_output, w_t_prev, M_t):
109+
# Controller outputs used for addressing
110+
self.emit_head_params(self.fc_addr(controller_output))
111+
112+
# Addressing mechanism
113+
self.content_addressing(M_t)
114+
self.interpolation(w_t_prev)
115+
self.convolutional_shift()
116+
self.sharpening()
117+
118+
if self.reading:
119+
return self.w_t # The new weight over the N locations of the memory matrix, and
120+
else:
121+
return self.w_t, self.e_t, self.a_t

controller.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/python3
2+
3+
import tensorflow as tf
4+
5+
from tensorflow.keras import Model
6+
from tensorflow.keras.layers import Dense
7+
8+
9+
class Controller(Model):
10+
11+
def __init__(self, controller_size=100):
12+
super(Controller, self).__init__()
13+
14+
# 3-layer feedforward controller
15+
self.d1 = Dense(units=controller_size, activation=tf.nn.sigmoid, name="controller_d1",
16+
kernel_initializer='glorot_uniform', bias_initializer='glorot_normal')
17+
self.d2 = Dense(units=controller_size, activation=tf.nn.sigmoid, name="controller_d2",
18+
kernel_initializer='glorot_uniform', bias_initializer='glorot_normal')
19+
self.d3 = Dense(units=controller_size, activation=tf.nn.sigmoid, name="controller_d3",
20+
kernel_initializer='glorot_uniform', bias_initializer='glorot_normal')
21+
22+
def call(self, controller_input):
23+
out = self.d1(controller_input)
24+
out = self.d2(out)
25+
return self.d3(out)
26+

heads.py

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/python3
2+
3+
import tensorflow as tf
4+
5+
from tensorflow.keras import Model
6+
7+
from addressing import Addressing
8+
9+
10+
class ReadHead(Model):
11+
12+
def __init__(self, memory_locations=128, memory_vector_size=20, maximum_shifts=3):
13+
super(ReadHead, self).__init__()
14+
15+
self.addr_mech = Addressing(memory_locations, memory_vector_size, maximum_shifts, reading=True)
16+
17+
def call(self, controller_output, w_t_1, M_t):
18+
w_t = self.addr_mech(controller_output, w_t_1, M_t)
19+
r_t = tf.squeeze(tf.matmul(tf.expand_dims(w_t, axis=1), M_t), axis=1)
20+
return r_t, w_t
21+
22+
23+
class WriteHead(Model):
24+
25+
def __init__(self, memory_locations=128, memory_vector_size=20, maximum_shifts=3):
26+
super(WriteHead, self).__init__()
27+
28+
self.memory_vector_size = memory_vector_size
29+
self.addr_mech = Addressing(memory_locations, memory_vector_size, maximum_shifts, reading=False)
30+
31+
def call(self, controller_output, w_t_1, M_t_1):
32+
w_t, e_t, a_t = self.addr_mech(controller_output, w_t_1, M_t_1)
33+
w_t = tf.expand_dims(w_t, axis=1)
34+
35+
# Erase
36+
e_t = tf.expand_dims(e_t, axis=1)
37+
M_tidle_t = tf.multiply(M_t_1, (1.0 - tf.matmul(w_t, e_t, transpose_a=True)))
38+
39+
# Add
40+
a_t = tf.expand_dims(a_t, axis=1)
41+
M_t = M_tidle_t + tf.matmul(w_t, a_t, transpose_a=True)
42+
43+
return M_t, tf.squeeze(e_t, axis=1), tf.squeeze(a_t, axis=1), tf.squeeze(w_t, axis=1)
44+

ntm.py

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#!/usr/bin/python3
2+
3+
import tensorflow as tf
4+
import numpy as np
5+
6+
from controller import Controller
7+
from heads import ReadHead, WriteHead
8+
9+
from tensorflow.keras import Model
10+
from tensorflow.keras.layers import Dense
11+
12+
13+
class NTM(Model):
14+
15+
def __init__(self, controller_size=100, memory_locations=128, memory_vector_size=20, maximum_shifts=3, output_size=8):
16+
super(NTM, self).__init__()
17+
18+
self.memory_locations = memory_locations # N locations
19+
self.memory_vector_size = memory_vector_size # M size memory vectors
20+
self.maximum_shifts = maximum_shifts
21+
22+
self.controller = Controller(controller_size)
23+
self.read_head = ReadHead(self.memory_locations, self.memory_vector_size, self.maximum_shifts)
24+
self.write_head = WriteHead(self.memory_locations, self.memory_vector_size, self.maximum_shifts)
25+
26+
self.final_fc = Dense(units=output_size, activation=tf.nn.sigmoid, name="final_fc",
27+
kernel_initializer='glorot_uniform', bias_initializer='glorot_normal')
28+
29+
self.stddev = 1.0 / (np.sqrt(self.memory_locations + self.memory_vector_size))
30+
31+
# The learned bias vector
32+
self.r_bias = tf.constant(tf.random.normal([1, self.memory_vector_size]) * 0.01) # Bias for previous reads
33+
self.M_bias = tf.constant(tf.random.uniform([1, self.memory_locations, self.memory_vector_size],
34+
minval=-self.stddev, maxval=self.stddev)) # Bias for Memory matrix
35+
36+
# States of the NTM
37+
self.r_t_1 = None # Previous read vector variable [Batch size, M]
38+
self.w_t_1 = None # Previous weights over the memory matrix [Batch size, N]
39+
self.M_t = None # The memory matrix [Batch size, N, M]
40+
41+
# Extra outputs that are tracked
42+
self.e_t = None
43+
self.a_t = None
44+
45+
def create_new_state(self, batch_size): # Creates a new NTM state
46+
# This has to be manually called if stateful is set to true
47+
if self.r_t_1 is None:
48+
self.r_t_1 = tf.Variable(tf.tile(self.r_bias, [batch_size, 1]), trainable=False)
49+
else:
50+
self.r_t_1.assign(tf.tile(self.r_bias, [batch_size, 1]))
51+
52+
if self.w_t_1 is None:
53+
self.w_t_1 = tf.Variable(tf.zeros([batch_size, self.memory_locations]), trainable=False)
54+
else:
55+
self.w_t_1.assign(tf.zeros([batch_size, self.memory_locations]))
56+
57+
if self.M_t is None:
58+
self.M_t = tf.Variable(tf.tile(self.M_bias, [batch_size, 1, 1]), trainable=False)
59+
else:
60+
self.M_t.assign(tf.tile(self.M_bias, [batch_size, 1, 1]))
61+
62+
def call(self, inputs, stateful=False):
63+
# Convert from [Batch, Timesteps, Features] to [Timesteps, Batch, Features]
64+
inputs = tf.transpose(inputs, [1, 0, 2])
65+
outputs = tf.TensorArray(dtype=inputs.dtype, size=inputs.shape[0])
66+
67+
if not stateful: # A new state will not be created at the start of each new batch
68+
self.create_new_state(inputs.shape[1])
69+
70+
for i in range(inputs.shape[0]):
71+
# Concatenated input and previous reads [Batch, Features + N]
72+
controller_inputs = tf.concat([inputs[i], self.r_t_1], axis=1)
73+
controller_outputs = self.controller(controller_inputs) # [Batch size, Controller size]
74+
75+
r_t, w_t = self.read_head(controller_outputs, tf.identity(self.w_t_1), tf.identity(self.M_t)) # [Batch size, M], [Batch size, N]
76+
self.r_t_1.assign(r_t)
77+
self.w_t_1.assign(w_t)
78+
79+
# [Batch size, M, N], [Batch size, M], [Batch size, M], [Batch size, N]
80+
M_t, self.e_t, self.a_t, w_t = self.write_head(controller_outputs, tf.identity(self.w_t_1), tf.identity(self.M_t))
81+
self.M_t.assign(M_t)
82+
self.w_t_1.assign(w_t)
83+
84+
fc_input = tf.concat([controller_outputs, self.r_t_1], axis=1) # [Batch size, Controller size + M],
85+
output_t = self.final_fc(fc_input) # [Batch size, Output size]
86+
outputs.write(i, output_t) # Write it to an array
87+
88+
outputs = tf.transpose(outputs.stack(), [1, 0, 2]) # [Batch size, Timesteps, Output size]
89+
return outputs
90+
91+
92+
# ntm = NTM(controller_size=100, memory_locations=10, memory_vector_size=5, output_size=3)
93+
94+
# # [Batch, Timesteps, Features]
95+
# inp = tf.Variable(tf.reshape(tf.range(0.0,4.0,0.1),[2,5,4]))
96+
# out = ntm(inp)
97+
# print(out)

seqgen.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/python3
2+
3+
import numpy as np
4+
5+
6+
# Function to generate sequences of different lengths for the copy task
7+
def generate_patterns(batch_size=100,
8+
max_sequence=20,
9+
min_sequence=1,
10+
in_bits=8,
11+
out_bits=8,
12+
pad=1e-12,
13+
low_tol=1e-12,
14+
high_tol=1.0,
15+
fixed_seq_len=False): # Function to generate sequences of different lengths
16+
17+
ti = []
18+
to = []
19+
20+
for _ in range(batch_size):
21+
22+
if not fixed_seq_len:
23+
seq_len_row = np.random.randint(low=min_sequence, high=max_sequence+1)
24+
else:
25+
seq_len_row = max_sequence
26+
27+
pat = np.random.randint(low=0, high=2, size=(seq_len_row,out_bits))
28+
pat = pat.astype(np.float32)
29+
30+
# Applying tolerance (So that values don't go to zero and cause NaN errors)
31+
pat[pat < 1] = low_tol
32+
pat[pat >= 1] = high_tol
33+
34+
# Padding can be added if needed
35+
x = np.ones(((max_sequence*2)+2, in_bits+2), dtype=pat.dtype) * pad # Input pattern has two extra side track
36+
y = np.ones(((max_sequence*2)+2, out_bits), dtype=pat.dtype) * pad # Side tracks are not produced
37+
38+
# Creates a delayed output (Target delay)
39+
x[1:seq_len_row+1, 2:] = pat
40+
y[seq_len_row+2:(2*seq_len_row)+2, :] = pat # No side tracks needed for the output
41+
42+
x[1:seq_len_row+1, 0:2] = low_tol
43+
x[0, :] = low_tol
44+
x[0, 1] = 1.0 # Start of sequence
45+
x[seq_len_row+1, :] = low_tol
46+
x[seq_len_row+1, 0] = 1.0 # End of sequence
47+
48+
ti.append(x)
49+
to.append(y)
50+
51+
return np.array(ti), np.array(to)

0 commit comments

Comments
 (0)