art-programmer
diff --git a/‎RecordReaderAll.py
Lines changed: 120 additions & 0 deletions b/‎RecordReaderAll.py
Lines changed: 120 additions & 0 deletions
diff --git a/‎crfasrnn_layer.py
Lines changed: 136 additions & 0 deletions b/‎crfasrnn_layer.py
Lines changed: 136 additions & 0 deletions
@@ -0,0 +1,120 @@
+import tensorflow as tf
+import numpy as np
+import threading
+import PIL.Image as Image
+from functools import partial
+from multiprocessing import Pool
+import cv2
+
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from modules import *
+
+
+HEIGHT=192
+WIDTH=256
+NUM_PLANES = 20
+NUM_THREADS = 4
+
+
+
+class RecordReaderAll():
+    def __init__(self):
+        return
+
+    def getBatch(self, filename_queue, numOutputPlanes = 20, batchSize = 16, min_after_dequeue = 1000, random=True, getLocal=False, getSegmentation=False, test=True):
+        reader = tf.TFRecordReader()
+        _, serialized_example = reader.read(filename_queue)
+
+        features = tf.parse_single_example(
+            serialized_example,
+            # Defaults are not specified since both keys are required.
+            features={
+                #'height': tf.FixedLenFeature([], tf.int64),
+                #'width': tf.FixedLenFeature([], tf.int64),
+                'image_raw': tf.FixedLenFeature([], tf.string),
+                'image_path': tf.FixedLenFeature([], tf.string),
+                'num_planes': tf.FixedLenFeature([], tf.int64),
+                'plane': tf.FixedLenFeature([NUM_PLANES * 3], tf.float32),
+                #'plane_relation': tf.FixedLenFeature([NUM_PLANES * NUM_PLANES], tf.float32),
+                'segmentation_raw': tf.FixedLenFeature([], tf.string),
+                'depth': tf.FixedLenFeature([HEIGHT * WIDTH], tf.float32),
+                'normal': tf.FixedLenFeature([HEIGHT * WIDTH * 3], tf.float32),
+                'semantics_raw': tf.FixedLenFeature([], tf.string),                
+                'boundary_raw': tf.FixedLenFeature([], tf.string),
+                'info': tf.FixedLenFeature([4 * 4 + 4], tf.float32),                
+            })
+
+        # Convert from a scalar string tensor (whose single string has
+        # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
+        # [mnist.IMAGE_PIXELS].
+        image = tf.decode_raw(features['image_raw'], tf.uint8)
+        image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
+        image = tf.reshape(image, [HEIGHT, WIDTH, 3])
+
+        
+        depth = features['depth']
+        depth = tf.reshape(depth, [HEIGHT, WIDTH, 1])
+
+        normal = features['normal']
+        normal = tf.reshape(normal, [HEIGHT, WIDTH, 3])
+        
+        normal = tf.nn.l2_normalize(normal, dim=2)
+        
+        #normal = tf.stack([normal[:, :, 1], normal[:, :, 0], normal[:, :, 2]], axis=2)
+
+
+        semantics = tf.decode_raw(features['semantics_raw'], tf.uint8)
+        semantics = tf.cast(tf.reshape(semantics, [HEIGHT, WIDTH]), tf.int32)
+
+        numPlanes = tf.minimum(tf.cast(features['num_planes'], tf.int32), numOutputPlanes)
+
+        numPlanesOri = numPlanes
+        numPlanes = tf.maximum(numPlanes, 1)
+        
+        planes = features['plane']
+        planes = tf.reshape(planes, [NUM_PLANES, 3])
+        planes = tf.slice(planes, [0, 0], [numPlanes, 3])
+
+        #shuffle_inds = tf.one_hot(tf.random_shuffle(tf.range(numPlanes)), depth = numPlanes)
+        shuffle_inds = tf.one_hot(tf.range(numPlanes), numPlanes)
+        
+        planes = tf.transpose(tf.matmul(tf.transpose(planes), shuffle_inds))
+        planes = tf.reshape(planes, [numPlanes, 3])
+        planes = tf.concat([planes, tf.zeros([numOutputPlanes - numPlanes, 3])], axis=0)
+        planes = tf.reshape(planes, [numOutputPlanes, 3])
+
+        
+        boundary = tf.decode_raw(features['boundary_raw'], tf.uint8)
+        boundary = tf.cast(tf.reshape(boundary, (HEIGHT, WIDTH, 2)), tf.float32)
+
+        #boundary = tf.decode_raw(features['boundary_raw'], tf.float64)
+        #boundary = tf.cast(tf.reshape(boundary, (HEIGHT, WIDTH, 3)), tf.float32)        
+        #boundary = tf.slice(boundary, [0, 0, 0], [HEIGHT, WIDTH, 2])
+
+        segmentation = tf.decode_raw(features['segmentation_raw'], tf.uint8)
+        segmentation = tf.reshape(segmentation, [HEIGHT, WIDTH, 1])
+
+
+        
+        coef = tf.range(numPlanes)
+        coef = tf.reshape(tf.matmul(tf.reshape(coef, [-1, numPlanes]), tf.cast(shuffle_inds, tf.int32)), [1, 1, numPlanes])
+        
+        plane_masks = tf.cast(tf.equal(segmentation, tf.cast(coef, tf.uint8)), tf.float32)
+        plane_masks = tf.concat([plane_masks, tf.zeros([HEIGHT, WIDTH, numOutputPlanes - numPlanes])], axis=2)
+        plane_masks = tf.reshape(plane_masks, [HEIGHT, WIDTH, numOutputPlanes])
+
+        #non_plane_mask = tf.cast(tf.equal(segmentation, tf.cast(numOutputPlanes, tf.uint8)), tf.float32)
+        non_plane_mask = 1 - tf.reduce_max(plane_masks, axis=2, keep_dims=True)
+        #tf.cast(tf.equal(segmentation, tf.cast(numOutputPlanes, tf.uint8)), tf.float32)
+
+        
+        if random:
+            image_inp, plane_inp, depth_gt, normal_gt, semantics_gt, plane_masks_gt, boundary_gt, num_planes_gt, non_plane_mask_gt, image_path, info = tf.train.shuffle_batch([image, planes, depth, normal, semantics, plane_masks, boundary, numPlanesOri, non_plane_mask, features['image_path'], features['info']], batch_size=batchSize, capacity=min_after_dequeue + (NUM_THREADS + 2) * batchSize, num_threads=NUM_THREADS, min_after_dequeue=min_after_dequeue)
+        else:
+            image_inp, plane_inp, depth_gt, normal_gt, semantics_gt, plane_masks_gt, boundary_gt, num_planes_gt, non_plane_mask_gt, image_path, info = tf.train.batch([image, planes, depth, normal, semantics, plane_masks, boundary, numPlanesOri, non_plane_mask, features['image_path'], features['info']], batch_size=batchSize, capacity=(NUM_THREADS + 2) * batchSize, num_threads=1)
+            pass
+        global_gt_dict = {'plane': plane_inp, 'depth': depth_gt, 'normal': normal_gt, 'semantics': semantics_gt, 'segmentation': plane_masks_gt, 'boundary': boundary_gt, 'num_planes': num_planes_gt, 'non_plane_mask': non_plane_mask_gt, 'image_path': image_path, 'info': info}
+        return image_inp, global_gt_dict, {}
@@ -0,0 +1,136 @@
+"""
+MIT License
+Copyright (c) 2017 Sadeep Jayasumana
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import numpy as np
+import tensorflow as tf
+from keras.engine.topology import Layer
+custom_module = tf.load_op_library('./cpp/high_dim_filter.so')
+import high_dim_filter_grad  # Register gradients for the custom op
+
+
+class CrfRnnLayer(Layer):
+    """ Implements the CRF-RNN layer described in:
+    Conditional Random Fields as Recurrent Neural Networks,
+    S. Zheng, S. Jayasumana, B. Romera-Paredes, V. Vineet, Z. Su, D. Du, C. Huang and P. Torr,
+    ICCV 2015
+    """
+
+    def __init__(self, image_dims, num_classes,
+                 theta_alpha, theta_beta, theta_gamma,
+                 num_iterations, **kwargs):
+        self.image_dims = image_dims
+        self.num_classes = num_classes
+        self.theta_alpha = theta_alpha
+        self.theta_beta = theta_beta
+        self.theta_gamma = theta_gamma
+        self.num_iterations = num_iterations
+        self.spatial_ker_weights = None
+        self.bilateral_ker_weights = None
+        self.compatibility_matrix = None
+        super(CrfRnnLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        # # Weights of the spatial kernel
+        # self.spatial_ker_weights = self.add_weight(name='spatial_ker_weights',
+        #                                            shape=(self.num_classes, self.num_classes),
+        #                                            initializer='uniform',
+        #                                            trainable=True)
+
+        # # Weights of the bilateral kernel
+        # self.bilateral_ker_weights = self.add_weight(name='bilateral_ker_weights',
+        #                                              shape=(self.num_classes, self.num_classes),
+        #                                              initializer='uniform',
+        #                                              trainable=True)
+
+        # # Compatibility matrix
+        # self.compatibility_matrix = self.add_weight(name='compatibility_matrix',
+        #                                             shape=(self.num_classes, self.num_classes),
+        #                                             initializer='uniform',
+        #                                             trainable=True)
+
+
+        weights = np.load('weights.npy')
+        weights = [weights[0], weights[1], weights[2]]
+        self.spatial_ker_weights = tf.Variable(weights[0][:self.num_classes, :self.num_classes], name='spatial_ker_weights', trainable=True)
+        self.bilateral_ker_weights = tf.Variable(weights[1][:self.num_classes, :self.num_classes], name='bilateral_ker_weights', trainable=True)
+        self.compatibility_matrix = tf.Variable(weights[2][:self.num_classes, :self.num_classes], name='compatibility_matrix', trainable=True)
+
+        
+        # self.spatial_ker_weights = tf.constant(weights[0].reshape(-1), name='spatial_ker_weights', shape=(self.num_classes, self.num_classes))
+        # self.bilateral_ker_weights = tf.constant(weights[1].reshape(-1), name='bilateral_ker_weights', shape=(self.num_classes, self.num_classes))
+        # self.compatibility_ker_weights = tf.constant(weights[2].reshape(-1), name='compatibility_ker_weights', shape=(self.num_classes, self.num_classes))
+        
+        
+        super(CrfRnnLayer, self).build(input_shape)
+
+
+    def call(self, inputs):
+        batchSize = int(inputs[0].shape[0])
+        c, h, w = self.num_classes, self.image_dims[0], self.image_dims[1]
+        all_ones = np.ones((c, h, w), dtype=np.float32)
+
+        outputs = []
+        for batchIndex in xrange(batchSize):
+            unaries = tf.transpose(inputs[0][batchIndex, :, :, :], perm=(2, 0, 1))
+            rgb = tf.transpose(inputs[1][batchIndex, :, :, :], perm=(2, 0, 1))
+
+
+            # Prepare filter normalization coefficients
+            spatial_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=False,
+                                                              theta_gamma=self.theta_gamma)
+            bilateral_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=True,
+                                                                theta_alpha=self.theta_alpha,
+                                                                theta_beta=self.theta_beta)
+            q_values = unaries
+
+            for i in range(self.num_iterations):
+                softmax_out = tf.nn.softmax(q_values, dim=0)
+
+                # Spatial filtering
+                spatial_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=False,
+                                                            theta_gamma=self.theta_gamma)
+                spatial_out = spatial_out / spatial_norm_vals
+
+                # Bilateral filtering
+                bilateral_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=True,
+                                                              theta_alpha=self.theta_alpha,
+                                                              theta_beta=self.theta_beta)
+                bilateral_out = bilateral_out / bilateral_norm_vals
+
+                # Weighting filter outputs
+                message_passing = (tf.matmul(self.spatial_ker_weights,
+                                             tf.reshape(spatial_out, (c, -1))) +
+                                   tf.matmul(self.bilateral_ker_weights,
+                                             tf.reshape(bilateral_out, (c, -1))))
+
+                # Compatibility transform
+                pairwise = tf.matmul(self.compatibility_matrix, message_passing)
+
+                # Adding unary potentials
+                pairwise = tf.reshape(pairwise, (c, h, w))
+                q_values = unaries - pairwise
+                continue
+            outputs.append(tf.transpose(tf.reshape(q_values, (1, c, h, w)), perm=(0, 2, 3, 1)))
+            continue
+        outputs = tf.concat(outputs, axis=0)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return input_shape