Skip to content

Commit 11fb794

Browse files
committed
Initial Import from working state using simple model and simple CNN-LSTM architecture.
1 parent 8a081b5 commit 11fb794

File tree

7 files changed

+1020
-0
lines changed

7 files changed

+1020
-0
lines changed

Diff for: README.md

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
## What: Temporal Autoencoder for Predicting Video
2+
3+
## How: Tensorflow version of CNN to LSTM to uCNN
4+
5+
## Why:
6+
7+
# Inspired by papers:
8+
9+
http://www.jmlr.org/proceedings/papers/v2/sutskever07a/sutskever07a.pdf
10+
https://arxiv.org/abs/1411.4389
11+
https://arxiv.org/abs/1504.08023
12+
https://arxiv.org/abs/1506.04214 (like this paper with RNN but now with LSTM)
13+
https://arxiv.org/abs/1511.06380
14+
https://arxiv.org/abs/1511.05440
15+
https://arxiv.org/abs/1605.08104
16+
http://file.scirp.org/pdf/AM20100400007_46529567.pdf
17+
https://arxiv.org/abs/1607.03597
18+
19+
20+
# Uses parts of (or inspired by) the following repos:
21+
22+
https://github.com/tensorflow/models/blob/master/real_nvp/real_nvp_utils.py
23+
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
24+
https://github.com/machrisaa/tensorflow-vgg
25+
https://github.com/loliverhennigh/
26+
https://coxlab.github.io/prednet/
27+
https://github.com/tensorflow/models/tree/master/video_prediction
28+
https://github.com/yoonkim/lstm-char-cnn
29+
https://github.com/anayebi/keras-extra
30+
https://github.com/tgjeon/TensorFlow-Tutorials-for-Time-Series
31+
https://github.com/jtoy/awesome-tensorflow
32+
https://github.com/aymericdamien/TensorFlow-Examples
33+
34+
# Inspired by the following articles:
35+
36+
http://spectrum.ieee.org/automaton/robotics/artificial-intelligence/deep-learning-ai-listens-to-machines-for-signs-of-trouble?adbsc=social_20170124_69611636&adbid=823956941219053569&adbpl=tw&adbpr=740238495952736256
37+
38+
http://www.theverge.com/2016/8/4/12369494/descartes-artificial-intelligence-crop-predictions-usda
39+
40+
https://devblogs.nvidia.com/parallelforall/exploring-spacenet-dataset-using-digits/
41+
42+
# And inspired to a lesser extent the following papers:
43+
44+
https://arxiv.org/abs/1508.01211
45+
https://arxiv.org/abs/1507.08750
46+
https://arxiv.org/abs/1505.00295
47+
www.ijcsi.org/papers/IJCSI-8-4-1-139-148.pdf
48+
cs231n.stanford.edu/reports2016/223_Report.pdf
49+
50+
# Program Requirements:
51+
52+
* Tensorflow and related packages like python
53+
54+
* OpenCV
55+
56+
# How to run:
57+
58+
python main.py
59+
60+
And check result by making model vs. predicted video:
61+
62+
sh mergemov.sh
63+
64+
smplayer out_all.mp4
65+
or
66+
smplayer out_all2_fast.mp4
67+
68+
69+
# Parameters:
70+
71+
1) In main.py:
72+
73+
* In main(), continuetrain: choose to use checkpoints (if exist) or not.
74+
75+
* Choose global flags
76+
77+
2) In balls.py:
78+
79+
* number of balls num_balls
80+
* SIZE: size of ball's bounding box in pixels
81+
82+
83+
# Ideas and Future Work:
84+
85+
* Test on other models
86+
87+
* Try more filters
88+
89+
* Try more depth
90+
91+
* Train with geodesic acceleration (can't be done in python in tensorflow)
92+
93+
* Try homogenous LSTM/CNN architecture
94+
95+
* Include depth in CNN even if not explicitly 3D data, to avoid issues
96+
with overlapping pixel space causing diffusion
97+
98+
* Estimate velocity field in rgb, to avoid collisions most likely state as
99+
averaging to no motion due to L2 error's treatment of two possible
100+
states.
101+
102+
* Use entropy generation rate to train attention where can best predict.
103+
104+
* Try rotation, faces, and ultimately real video.
105+
106+
107+
108+

Diff for: clstm.py

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
2+
import tensorflow as tf
3+
4+
class CRNNCell(object):
5+
"""CRNN cell.
6+
"""
7+
8+
def __call__(self, inputs, state, scope=None):
9+
"""Run this RNN cell on inputs, starting from the inputted state.
10+
"""
11+
raise NotImplementedError("Abstract method")
12+
13+
@property
14+
def state_size(self):
15+
"""sizes of states used by cell.
16+
"""
17+
raise NotImplementedError("Abstract method")
18+
19+
@property
20+
def output_size(self):
21+
"""Integer or TensorShape: size of outputs produced by cell."""
22+
raise NotImplementedError("Abstract method")
23+
24+
def set_zero_state(self, batch_size, dtype):
25+
"""Return zero-filled state tensor(s).
26+
Args:
27+
batch_size: int, float, or unit Tensor representing batch size.
28+
dtype: data type for the state.
29+
Returns:
30+
tensor with shape '[batch_size x shape[0] x shape[1] x features]
31+
filled with zeros
32+
"""
33+
34+
shape = self.shape
35+
features = self.features
36+
zeros = tf.zeros([batch_size, shape[0], shape[1], features * 2])
37+
return zeros
38+
39+
class clstm(CRNNCell):
40+
"""CNN LSTM network's single cell.
41+
"""
42+
43+
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
44+
45+
def __init__(self, shape, filter_size, features, forget_bias=1.0, input_size=None,
46+
state_is_tuple=False, activation=tf.nn.tanh):
47+
"""Initialize the basic CLSTM cell.
48+
Args:
49+
shape: int tuple of the height and width of the cell
50+
filter_size: int tuple of the height and width of the filter
51+
features: int of the depth of the cell
52+
forget_bias: float, the bias added to forget gates (see above).
53+
input_size: Deprecated.
54+
state_is_tuple: If True, accepted and returned states are 2-tuples of
55+
the `c_state` and `m_state`. If False, they are concatenated
56+
along the column axis. Soon deprecated.
57+
activation: Activation function of inner states.
58+
"""
59+
if input_size is not None:
60+
logging.warn("%s: Input_size parameter is deprecated.", self)
61+
self.shape = shape
62+
self.filter_size = filter_size
63+
self.features = features
64+
self._forget_bias = forget_bias
65+
self._state_is_tuple = state_is_tuple
66+
self._activation = activation
67+
68+
@property
69+
def state_size(self):
70+
return (LSTMStateTuple(self._num_units, self._num_units)
71+
if self._state_is_tuple else 2 * self._num_units)
72+
73+
@property
74+
def output_size(self):
75+
return self._num_units
76+
77+
def __call__(self, inputs, state, scope=None):
78+
"""Long short-term memory cell (LSTM)."""
79+
with tf.variable_scope(scope or type(self).__name__):
80+
# Parameters of gates are concatenated into one multiply for efficiency.
81+
if self._state_is_tuple:
82+
c, h = state
83+
else:
84+
c, h = tf.split(3, 2, state)
85+
concat = _convolve_linear([inputs, h], self.filter_size, self.features * 4, True)
86+
87+
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
88+
i, j, f, o = tf.split(3, 4, concat)
89+
90+
new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
91+
self._activation(j))
92+
new_h = self._activation(new_c) * tf.nn.sigmoid(o)
93+
94+
if self._state_is_tuple:
95+
new_state = LSTMStateTuple(new_c, new_h)
96+
else:
97+
new_state = tf.concat(3, [new_c, new_h])
98+
return new_h, new_state
99+
100+
def _convolve_linear(args, filter_size, features, bias, bias_start=0.0, scope=None):
101+
"""convolution:
102+
Args:
103+
args: 4D Tensor or list of 4D, batch x n, Tensors.
104+
filter_size: int tuple of filter with height and width.
105+
features: int, as number of features.
106+
bias_start: starting value to initialize bias; 0 by default.
107+
scope: VariableScope for created subgraph; defaults to "Linear".
108+
Returns:
109+
4D Tensor with shape [batch h w features]
110+
Raises:
111+
ValueError: if some of arguments have unspecified or wrong shape.
112+
"""
113+
114+
# Calculate total size of arguments on dimension 1.
115+
total_arg_size_depth = 0
116+
shapes = [a.get_shape().as_list() for a in args]
117+
for shape in shapes:
118+
if len(shape) != 4:
119+
raise ValueError("Linear needs 4D arguments: %s" % str(shapes))
120+
if not shape[3]:
121+
raise ValueError("Linear needs shape[4] of arguments: %s" % str(shapes))
122+
else:
123+
total_arg_size_depth += shape[3]
124+
125+
dtype = [a.dtype for a in args][0]
126+
127+
# Computation
128+
with tf.variable_scope(scope or "Conv"):
129+
mat = tf.get_variable(
130+
"Mat", [filter_size[0], filter_size[1], total_arg_size_depth, features], dtype=dtype)
131+
if len(args) == 1:
132+
res = tf.nn.conv2d(args[0], mat, strides=[1, 1, 1, 1], padding='SAME')
133+
else:
134+
res = tf.nn.conv2d(tf.concat(3, args), mat, strides=[1, 1, 1, 1], padding='SAME')
135+
if not bias:
136+
return res
137+
bias_term = tf.get_variable(
138+
"Bias", [features],
139+
dtype=dtype,
140+
initializer=tf.constant_initializer(
141+
bias_start, dtype=dtype))
142+
return res + bias_term
143+

Diff for: layers.py

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
2+
#######################################################
3+
#
4+
# Setup CNN, dCNN, and FC layers
5+
# Code adapted from:
6+
# https://github.com/tensorflow/models/blob/master/real_nvp/real_nvp_utils.py
7+
# https://github.com/machrisaa/tensorflow-vgg
8+
# And from Tensorflow CIFAR10 example.
9+
#
10+
#######################################################
11+
12+
13+
import tensorflow as tf
14+
import numpy as np
15+
16+
FLAGS = tf.app.flags.FLAGS
17+
18+
tf.app.flags.DEFINE_float('weight_decay', 0.0005,
19+
""" """)
20+
21+
tf.app.flags.DEFINE_float('weights_init', .1,
22+
"""initial weights for fc layers""")
23+
24+
def _activation_summary(x):
25+
"""Helper to create summaries for activations.
26+
27+
Creates a summary that provides a histogram of activations.
28+
Creates a summary that measure the sparsity of activations.
29+
30+
Args:
31+
x: Tensor
32+
Returns:
33+
nothing
34+
"""
35+
tensor_name = x.op.name
36+
tf.histogram_summary(tensor_name + '/activations', x)
37+
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
38+
39+
def _variable_on_cpu(name, shape, initializer):
40+
"""Helper to create a Variable stored on CPU memory.
41+
42+
Args:
43+
name: name of the variable
44+
shape: list of ints
45+
initializer: initializer for Variable
46+
47+
Returns:
48+
Variable Tensor
49+
"""
50+
with tf.device('/cpu:0'):
51+
var = tf.get_variable(name, shape, initializer=initializer)
52+
return var
53+
54+
55+
def _variable_with_weight_decay(name, shape, stddev, wd):
56+
"""Helper to create an initialized Variable with weight decay.
57+
58+
Note that the Variable is initialized with a truncated normal distribution.
59+
A weight decay is added only if one is specified.
60+
61+
Args:
62+
name: name of the variable
63+
shape: list of ints
64+
stddev: standard deviation of a truncated Gaussian
65+
wd: add L2Loss weight decay multiplied by this float. If None, weight
66+
decay is not added for this Variable.
67+
68+
Returns:
69+
Variable Tensor
70+
"""
71+
var = _variable_on_cpu(name, shape,
72+
tf.truncated_normal_initializer(stddev=stddev))
73+
if wd:
74+
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
75+
weight_decay.set_shape([])
76+
tf.add_to_collection('losses', weight_decay)
77+
return var
78+
79+
def cnn2d_layer(inputs, kernel, stride, features, idx, linear = False):
80+
with tf.variable_scope('{0}_cnn'.format(idx)) as scope:
81+
input_channels = inputs.get_shape()[3] # rgb
82+
83+
weights = _variable_with_weight_decay('weights', shape=[kernel,kernel,input_channels,features],stddev=0.01, wd=FLAGS.weight_decay)
84+
biases = _variable_on_cpu('biases',[features],tf.constant_initializer(0.01))
85+
86+
cnn = tf.nn.conv2d(inputs, weights, strides=[1, stride, stride, 1], padding='SAME')
87+
cnn_biased = tf.nn.bias_add(cnn, biases)
88+
if linear:
89+
return cnn_biased
90+
cnn_rect = tf.nn.elu(cnn_biased,name='{0}_cnn'.format(idx))
91+
return cnn_rect
92+
93+
def dcnn2d_layer(inputs, kernel, stride, features, idx, linear = False):
94+
with tf.variable_scope('{0}_trans_cnn'.format(idx)) as scope:
95+
input_channels = inputs.get_shape()[3] # rgb
96+
97+
weights = _variable_with_weight_decay('weights', shape=[kernel,kernel,features,input_channels], stddev=0.01, wd=FLAGS.weight_decay)
98+
biases = _variable_on_cpu('biases',[features],tf.constant_initializer(0.01))
99+
batch_size = tf.shape(inputs)[0]
100+
output_shape = tf.pack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features])
101+
dcnn = tf.nn.conv2d_transpose(inputs, weights, output_shape, strides=[1,stride,stride,1], padding='SAME')
102+
dcnn_biased = tf.nn.bias_add(dcnn, biases)
103+
if linear:
104+
return dcnn_biased
105+
dcnn_rect = tf.nn.elu(dcnn_biased,name='{0}_dcnn'.format(idx))
106+
return dcnn_rect
107+
108+
109+
def fc_layer(inputs, hiddens, idx, flat = False, linear = False):
110+
with tf.variable_scope('{0}_fc'.format(idx)) as scope:
111+
input_shape = inputs.get_shape().as_list()
112+
if flat:
113+
dim = input_shape[1]*input_shape[2]*input_shape[3]
114+
inputs_processed = tf.reshape(inputs, [-1,dim])
115+
else:
116+
dim = input_shape[1]
117+
inputs_processed = inputs
118+
119+
weights = _variable_with_weight_decay('weights', shape=[dim,hiddens],stddev=FLAGS.weights_init, wd=FLAGS.weight_decay)
120+
biases = _variable_on_cpu('biases', [hiddens], tf.constant_initializer(FLAGS.weights_init))
121+
if linear:
122+
return tf.add(tf.matmul(inputs_processed,weights),biases,name=str(idx)+'_fc')
123+
124+
ip = tf.add(tf.matmul(inputs_processed,weights),biases)
125+
return tf.nn.elu(ip,name=str(idx)+'_fc')
126+

0 commit comments

Comments
 (0)