pseudotensor
diff --git a/‎README.md
Lines changed: 108 additions & 0 deletions b/‎README.md
Lines changed: 108 additions & 0 deletions
diff --git a/‎clstm.py
Lines changed: 143 additions & 0 deletions b/‎clstm.py
Lines changed: 143 additions & 0 deletions
diff --git a/‎layers.py
Lines changed: 126 additions & 0 deletions b/‎layers.py
Lines changed: 126 additions & 0 deletions
@@ -0,0 +1,108 @@
+## What: Temporal Autoencoder for Predicting Video
+
+## How: Tensorflow version of CNN to LSTM to uCNN
+
+## Why:
+
+# Inspired by papers:
+
+http://www.jmlr.org/proceedings/papers/v2/sutskever07a/sutskever07a.pdf
+https://arxiv.org/abs/1411.4389
+https://arxiv.org/abs/1504.08023
+https://arxiv.org/abs/1506.04214 (like this paper with RNN but now with LSTM)
+https://arxiv.org/abs/1511.06380
+https://arxiv.org/abs/1511.05440
+https://arxiv.org/abs/1605.08104
+http://file.scirp.org/pdf/AM20100400007_46529567.pdf
+https://arxiv.org/abs/1607.03597
+
+
+# Uses parts of (or inspired by) the following repos:
+
+https://github.com/tensorflow/models/blob/master/real_nvp/real_nvp_utils.py
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
+https://github.com/machrisaa/tensorflow-vgg
+https://github.com/loliverhennigh/
+https://coxlab.github.io/prednet/
+https://github.com/tensorflow/models/tree/master/video_prediction
+https://github.com/yoonkim/lstm-char-cnn
+https://github.com/anayebi/keras-extra
+https://github.com/tgjeon/TensorFlow-Tutorials-for-Time-Series
+https://github.com/jtoy/awesome-tensorflow
+https://github.com/aymericdamien/TensorFlow-Examples
+
+# Inspired by the following articles:
+
+http://spectrum.ieee.org/automaton/robotics/artificial-intelligence/deep-learning-ai-listens-to-machines-for-signs-of-trouble?adbsc=social_20170124_69611636&adbid=823956941219053569&adbpl=tw&adbpr=740238495952736256
+
+http://www.theverge.com/2016/8/4/12369494/descartes-artificial-intelligence-crop-predictions-usda
+
+https://devblogs.nvidia.com/parallelforall/exploring-spacenet-dataset-using-digits/
+
+# And inspired to a lesser extent the following papers:
+
+https://arxiv.org/abs/1508.01211
+https://arxiv.org/abs/1507.08750
+https://arxiv.org/abs/1505.00295
+www.ijcsi.org/papers/IJCSI-8-4-1-139-148.pdf
+cs231n.stanford.edu/reports2016/223_Report.pdf
+
+# Program Requirements:
+
+* Tensorflow and related packages like python
+
+* OpenCV
+
+# How to run:
+
+python main.py
+
+And check result by making model vs. predicted video:
+
+sh mergemov.sh
+
+smplayer out_all.mp4
+or
+smplayer out_all2_fast.mp4
+
+
+# Parameters:
+
+1) In main.py:
+
+* In main(), continuetrain: choose to use checkpoints (if exist) or not.
+
+* Choose global flags
+
+2) In balls.py:
+
+* number of balls  num_balls
+* SIZE: size of ball's bounding box in pixels
+
+
+# Ideas and Future Work:
+
+* Test on other models
+
+* Try more filters
+
+* Try more depth
+
+* Train with geodesic acceleration (can't be done in python in tensorflow)
+
+* Try homogenous LSTM/CNN architecture
+
+* Include depth in CNN even if not explicitly 3D data, to avoid issues
+  with overlapping pixel space causing diffusion
+
+* Estimate velocity field in rgb, to avoid collisions most likely state as
+  averaging to no motion due to L2 error's treatment of two possible
+  states.
+
+* Use entropy generation rate to train attention where can best predict.
+
+* Try rotation, faces, and ultimately real video.
+
+
+
+
@@ -0,0 +1,143 @@
+
+import tensorflow as tf
+
+class CRNNCell(object):
+  """CRNN cell.
+  """
+
+  def __call__(self, inputs, state, scope=None):
+    """Run this RNN cell on inputs, starting from the inputted state.
+    """
+    raise NotImplementedError("Abstract method")
+
+  @property
+  def state_size(self):
+    """sizes of states used by cell.
+    """
+    raise NotImplementedError("Abstract method")
+
+  @property
+  def output_size(self):
+    """Integer or TensorShape: size of outputs produced by cell."""
+    raise NotImplementedError("Abstract method")
+
+  def set_zero_state(self, batch_size, dtype):
+    """Return zero-filled state tensor(s).
+    Args:
+      batch_size: int, float, or unit Tensor representing batch size.
+      dtype: data type for the state.
+    Returns:
+      tensor with shape '[batch_size x shape[0] x shape[1] x features]
+      filled with zeros
+    """
+    
+    shape = self.shape 
+    features = self.features
+    zeros = tf.zeros([batch_size, shape[0], shape[1], features * 2]) 
+    return zeros
+
+class clstm(CRNNCell):
+  """CNN LSTM network's single cell.
+  """
+
+# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py
+
+  def __init__(self, shape, filter_size, features, forget_bias=1.0, input_size=None,
+               state_is_tuple=False, activation=tf.nn.tanh):
+    """Initialize the basic CLSTM cell.
+    Args:
+      shape: int tuple of the height and width of the cell
+      filter_size: int tuple of the height and width of the filter
+      features: int of the depth of the cell 
+      forget_bias: float, the bias added to forget gates (see above).
+      input_size: Deprecated.
+      state_is_tuple: If True, accepted and returned states are 2-tuples of
+        the `c_state` and `m_state`.  If False, they are concatenated
+        along the column axis.  Soon deprecated.
+      activation: Activation function of inner states.
+    """
+    if input_size is not None:
+      logging.warn("%s: Input_size parameter is deprecated.", self)
+    self.shape = shape 
+    self.filter_size = filter_size
+    self.features = features 
+    self._forget_bias = forget_bias
+    self._state_is_tuple = state_is_tuple
+    self._activation = activation
+
+  @property
+  def state_size(self):
+    return (LSTMStateTuple(self._num_units, self._num_units)
+            if self._state_is_tuple else 2 * self._num_units)
+
+  @property
+  def output_size(self):
+    return self._num_units
+
+  def __call__(self, inputs, state, scope=None):
+    """Long short-term memory cell (LSTM)."""
+    with tf.variable_scope(scope or type(self).__name__):
+      # Parameters of gates are concatenated into one multiply for efficiency.
+      if self._state_is_tuple:
+        c, h = state
+      else:
+        c, h = tf.split(3, 2, state)
+      concat = _convolve_linear([inputs, h], self.filter_size, self.features * 4, True)
+
+      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
+      i, j, f, o = tf.split(3, 4, concat)
+
+      new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
+               self._activation(j))
+      new_h = self._activation(new_c) * tf.nn.sigmoid(o)
+
+      if self._state_is_tuple:
+        new_state = LSTMStateTuple(new_c, new_h)
+      else:
+        new_state = tf.concat(3, [new_c, new_h])
+      return new_h, new_state
+
+def _convolve_linear(args, filter_size, features, bias, bias_start=0.0, scope=None):
+  """convolution:
+  Args:
+    args: 4D Tensor or list of 4D, batch x n, Tensors.
+    filter_size: int tuple of filter with height and width.
+    features: int, as number of features.
+    bias_start: starting value to initialize bias; 0 by default.
+    scope: VariableScope for created subgraph; defaults to "Linear".
+  Returns:
+    4D Tensor with shape [batch h w features]
+  Raises:
+    ValueError: if some of arguments have unspecified or wrong shape.
+  """
+
+  # Calculate total size of arguments on dimension 1.
+  total_arg_size_depth = 0
+  shapes = [a.get_shape().as_list() for a in args]
+  for shape in shapes:
+    if len(shape) != 4:
+      raise ValueError("Linear needs 4D arguments: %s" % str(shapes))
+    if not shape[3]:
+      raise ValueError("Linear needs shape[4] of arguments: %s" % str(shapes))
+    else:
+      total_arg_size_depth += shape[3]
+
+  dtype = [a.dtype for a in args][0]
+
+  # Computation
+  with tf.variable_scope(scope or "Conv"):
+    mat = tf.get_variable(
+        "Mat", [filter_size[0], filter_size[1], total_arg_size_depth, features], dtype=dtype)
+    if len(args) == 1:
+      res = tf.nn.conv2d(args[0], mat, strides=[1, 1, 1, 1], padding='SAME')
+    else:
+      res = tf.nn.conv2d(tf.concat(3, args), mat, strides=[1, 1, 1, 1], padding='SAME')
+    if not bias:
+      return res
+    bias_term = tf.get_variable(
+        "Bias", [features],
+        dtype=dtype,
+        initializer=tf.constant_initializer(
+            bias_start, dtype=dtype))
+  return res + bias_term
+
@@ -0,0 +1,126 @@
+
+#######################################################
+#
+# Setup CNN, dCNN, and FC layers
+# Code adapted from:
+#  https://github.com/tensorflow/models/blob/master/real_nvp/real_nvp_utils.py
+#  https://github.com/machrisaa/tensorflow-vgg
+#  And from Tensorflow CIFAR10 example.
+#
+#######################################################
+
+
+import tensorflow as tf
+import numpy as np
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_float('weight_decay', 0.0005,
+                          """ """)
+
+tf.app.flags.DEFINE_float('weights_init', .1,
+                            """initial weights for fc layers""")
+
+def _activation_summary(x):
+  """Helper to create summaries for activations.
+
+  Creates a summary that provides a histogram of activations.
+  Creates a summary that measure the sparsity of activations.
+
+  Args:
+    x: Tensor
+  Returns:
+    nothing
+  """
+  tensor_name = x.op.name
+  tf.histogram_summary(tensor_name + '/activations', x)
+  tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
+
+def _variable_on_cpu(name, shape, initializer):
+  """Helper to create a Variable stored on CPU memory.
+
+  Args:
+    name: name of the variable
+    shape: list of ints
+    initializer: initializer for Variable
+
+  Returns:
+    Variable Tensor
+  """
+  with tf.device('/cpu:0'):
+    var = tf.get_variable(name, shape, initializer=initializer)
+  return var
+
+
+def _variable_with_weight_decay(name, shape, stddev, wd):
+  """Helper to create an initialized Variable with weight decay.
+
+  Note that the Variable is initialized with a truncated normal distribution.
+  A weight decay is added only if one is specified.
+
+  Args:
+    name: name of the variable
+    shape: list of ints
+    stddev: standard deviation of a truncated Gaussian
+    wd: add L2Loss weight decay multiplied by this float. If None, weight
+        decay is not added for this Variable.
+
+  Returns:
+    Variable Tensor
+  """
+  var = _variable_on_cpu(name, shape,
+                         tf.truncated_normal_initializer(stddev=stddev))
+  if wd:
+    weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
+    weight_decay.set_shape([])
+    tf.add_to_collection('losses', weight_decay)
+  return var
+
+def cnn2d_layer(inputs, kernel, stride, features, idx, linear = False):
+  with tf.variable_scope('{0}_cnn'.format(idx)) as scope:
+    input_channels = inputs.get_shape()[3] # rgb
+
+    weights = _variable_with_weight_decay('weights', shape=[kernel,kernel,input_channels,features],stddev=0.01, wd=FLAGS.weight_decay)
+    biases = _variable_on_cpu('biases',[features],tf.constant_initializer(0.01))
+
+    cnn = tf.nn.conv2d(inputs, weights, strides=[1, stride, stride, 1], padding='SAME')
+    cnn_biased = tf.nn.bias_add(cnn, biases)
+    if linear:
+      return cnn_biased
+    cnn_rect = tf.nn.elu(cnn_biased,name='{0}_cnn'.format(idx))
+    return cnn_rect
+
+def dcnn2d_layer(inputs, kernel, stride, features, idx, linear = False):
+  with tf.variable_scope('{0}_trans_cnn'.format(idx)) as scope:
+    input_channels = inputs.get_shape()[3] # rgb
+    
+    weights = _variable_with_weight_decay('weights', shape=[kernel,kernel,features,input_channels], stddev=0.01, wd=FLAGS.weight_decay)
+    biases = _variable_on_cpu('biases',[features],tf.constant_initializer(0.01))
+    batch_size = tf.shape(inputs)[0]
+    output_shape = tf.pack([tf.shape(inputs)[0], tf.shape(inputs)[1]*stride, tf.shape(inputs)[2]*stride, features]) 
+    dcnn = tf.nn.conv2d_transpose(inputs, weights, output_shape, strides=[1,stride,stride,1], padding='SAME')
+    dcnn_biased = tf.nn.bias_add(dcnn, biases)
+    if linear:
+      return dcnn_biased
+    dcnn_rect = tf.nn.elu(dcnn_biased,name='{0}_dcnn'.format(idx))
+    return dcnn_rect
+     
+
+def fc_layer(inputs, hiddens, idx, flat = False, linear = False):
+  with tf.variable_scope('{0}_fc'.format(idx)) as scope:
+    input_shape = inputs.get_shape().as_list()
+    if flat:
+      dim = input_shape[1]*input_shape[2]*input_shape[3]
+      inputs_processed = tf.reshape(inputs, [-1,dim])
+    else:
+      dim = input_shape[1]
+      inputs_processed = inputs
+    
+    weights = _variable_with_weight_decay('weights', shape=[dim,hiddens],stddev=FLAGS.weights_init, wd=FLAGS.weight_decay)
+    biases = _variable_on_cpu('biases', [hiddens], tf.constant_initializer(FLAGS.weights_init))
+    if linear:
+      return tf.add(tf.matmul(inputs_processed,weights),biases,name=str(idx)+'_fc')
+  
+    ip = tf.add(tf.matmul(inputs_processed,weights),biases)
+    return tf.nn.elu(ip,name=str(idx)+'_fc')
+