Skip to content

Commit e95e492

Browse files
evaluation
1 parent ec11ce8 commit e95e492

21 files changed

+1352
-151
lines changed

PlaneSetGeneration/train_planenet.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
MOVING_AVERAGE_DECAY = 0.99
3333

3434
deepSupervisionLayers=['res4b22_relu']
35-
deepSupervisionLayers=[]
35+
#deepSupervisionLayers=[]
3636

3737
def build_graph(img_inp_train, img_inp_val, plane_gt_train, plane_gt_val, validating_inp, is_training=True, numOutputPlanes=20, gpu_id = 0, useCRF= 0, suffix='forward'):
3838
if suffix == '12_22':
@@ -731,7 +731,7 @@ def test(gpu_id, dumpdir, logdir, testdir, keyname, restore, numOutputPlanes=20,
731731
ranges = np.array([urange / imageWidth * 640 / focalLength, np.ones(urange.shape), -vrange / imageHeight * 480 / focalLength]).transpose([1, 2, 0])
732732

733733

734-
for index in xrange(100):
734+
for index in xrange(10):
735735
print(('image', index))
736736
t0=time.time()
737737
#im, planes, depth, normal, boundary, s_8, p_8, b_8, s_16, p_16, b_16, s_32, p_32, b_32 = sess.run([img_inp, plane_gt, depth_gt, normal_gt, boundary_gt, s_8_gt, p_8_gt, b_8_gt, s_16_gt, p_16_gt, b_16_gt, s_32_gt, p_32_gt, b_32_gt])

code/RecordWriterRGBD.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ def writeRecordFile(tfrecords_filename, imagePaths):
100100
if __name__=='__main__':
101101
imagePaths = glob.glob('/home/chenliu/Projects/Data/NYU_RGBD/raw/train/color_*.png')
102102
imagePaths = [{'image': imagePath, 'depth': imagePath.replace('color', 'depth')} for imagePath in imagePaths]
103-
#print(len(imagePaths))
103+
print(len(imagePaths))
104104
#exit(1)
105105
random.shuffle(imagePaths)
106-
writeRecordFile('../planes_nyu_rgbd_train_temp.tfrecords', imagePaths)
106+
writeRecordFile('../planes_nyu_rgbd_train.tfrecords', imagePaths)
107107
#reader.readRecordFile()
108108

109109

code/evaluate.py

+395
Large diffs are not rendered by default.

code/kaffe/__init__.pyc

0 Bytes
Binary file not shown.

code/kaffe/caffe/__init__.pyc

0 Bytes
Binary file not shown.

code/kaffe/caffe/resolver.pyc

0 Bytes
Binary file not shown.

code/kaffe/errors.pyc

0 Bytes
Binary file not shown.

code/kaffe/graph.pyc

0 Bytes
Binary file not shown.

code/kaffe/layers.pyc

0 Bytes
Binary file not shown.

code/kaffe/shapes.pyc

0 Bytes
Binary file not shown.

code/kaffe/tensorflow/__init__.pyc

0 Bytes
Binary file not shown.

code/kaffe/tensorflow/network.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def __init__(self, inputs, options, trainable=True, is_training=False):
5151
shape=[],
5252
name='use_dropout')
5353

54-
self.setup(True, options=options)
54+
self.setup(is_training, options=options)
5555

5656
def setup(self, is_training):
5757
'''Construct the network. '''

code/kaffe/tensorflow/network.pyc

-5 Bytes
Binary file not shown.

code/kaffe/tensorflow/transformer.pyc

0 Bytes
Binary file not shown.

code/kaffe/transformers.pyc

0 Bytes
Binary file not shown.

code/modules.py

+29
Original file line numberDiff line numberDiff line change
@@ -945,3 +945,32 @@ def depthToNormalModule(depth):
945945

946946
normal = normal / tf.norm(normal, axis=3, keep_dims=True)
947947
return normal
948+
949+
def findBoundaryModule(depth, normal, segmentation, plane_mask, max_depth_diff = 0.1, max_normal_diff = np.sqrt(2 * (1 - np.cos(np.deg2rad(20))))):
950+
kernel_size = 3
951+
padding = (kernel_size - 1) / 2
952+
neighbor_kernel_array = gaussian(kernel_size, kernel_size)
953+
neighbor_kernel_array[(kernel_size - 1) / 2][(kernel_size - 1) / 2] = 0
954+
neighbor_kernel_array /= neighbor_kernel_array.sum()
955+
neighbor_kernel_array *= -1
956+
neighbor_kernel_array[(kernel_size - 1) / 2][(kernel_size - 1) / 2] = 1
957+
neighbor_kernel = tf.constant(neighbor_kernel_array.reshape(-1), shape=neighbor_kernel_array.shape, dtype=tf.float32)
958+
neighbor_kernel = tf.reshape(neighbor_kernel, [kernel_size, kernel_size, 1, 1])
959+
960+
depth_diff = tf.abs(tf.nn.depthwise_conv2d(depth, neighbor_kernel, strides=[1, 1, 1, 1], padding='VALID'))
961+
depth_diff = tf.pad(depth_diff, paddings = [[0, 0], [padding, padding], [padding, padding], [0, 0]])
962+
max_depth_diff = 0.1
963+
depth_boundary = tf.greater(depth_diff, max_depth_diff)
964+
965+
normal_diff = tf.norm(tf.nn.depthwise_conv2d(normal, tf.tile(neighbor_kernel, [1, 1, 3, 1]), strides=[1, 1, 1, 1], padding='VALID'), axis=3, keep_dims=True)
966+
normal_diff = tf.pad(normal_diff, paddings = [[0, 0], [padding, padding], [padding, padding], [0, 0]])
967+
max_normal_diff = np.sqrt(2 * (1 - np.cos(np.deg2rad(20))))
968+
normal_boundary = tf.greater(normal_diff, max_normal_diff)
969+
970+
plane_region = tf.nn.max_pool(plane_mask, ksize=[1, kernel_size, kernel_size, 1], strides=[1, 1, 1, 1], padding='SAME', name='max_pool')
971+
segmentation_eroded = 1 - tf.nn.max_pool(1 - segmentation, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='max_pool')
972+
plane_region -= tf.reduce_max(segmentation_eroded, axis=3, keep_dims=True)
973+
boundary = tf.cast(tf.logical_or(depth_boundary, normal_boundary), tf.float32)
974+
smooth_boundary = tf.cast(tf.logical_and(normal_boundary, tf.less_equal(depth_diff, max_depth_diff)), tf.float32) * plane_region
975+
boundary_gt = tf.concat([smooth_boundary, boundary - smooth_boundary], axis=3)
976+
return boundary_gt

code/tf_nndistance_so.so

48 Bytes
Binary file not shown.

code/train_planenet.py

+428-95
Large diffs are not rendered by default.

code/train_planenet_backup.py

+36-15
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from planenet import PlaneNet
2121
from RecordReader import *
2222
from RecordReaderRGBD import *
23+
from RecordReaderScanNet import *
2324

2425
#training_flag: toggle dropout and batch normalization mode
2526
#it's true for training and false for validation, testing, prediction
@@ -29,7 +30,8 @@
2930
def build_graph(img_inp_train, img_inp_val, img_inp_rgbd_train, img_inp_rgbd_val, training_flag, options):
3031
with tf.device('/gpu:%d'%options.gpu_id):
3132
img_inp_rgbd = tf.cond(tf.equal(training_flag % 2, 0), lambda: img_inp_rgbd_train, lambda: img_inp_rgbd_val)
32-
img_inp = tf.cond(tf.less(training_flag, 2), lambda: tf.cond(tf.equal(training_flag % 2, 0), lambda: img_inp_train, lambda: img_inp_val), lambda: img_inp_rgbd)
33+
img_inp = tf.cond(tf.equal(training_flag % 2, 0), lambda: img_inp_train, lambda: img_inp_val)
34+
img_inp = tf.cond(tf.less(training_flag, 2), lambda: img_inp, lambda: img_inp_rgbd)
3335

3436
net = PlaneNet({'img_inp': img_inp}, is_training=tf.equal(training_flag % 2, 0), options=options)
3537

@@ -94,6 +96,10 @@ def build_loss_rgbd(global_pred_dict, local_pred_dict, deep_pred_dicts, global_g
9496
validDepthMask = tf.cast(tf.greater(global_gt_dict['depth'], 1e-4), tf.float32)
9597
depth_loss = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(all_depths, global_gt_dict['depth']) * all_segmentations_softmax, axis=3, keep_dims=True) * validDepthMask) * 1000
9698

99+
if options.predictPixelwise == 1:
100+
depth_loss += tf.reduce_mean(tf.squared_difference(global_pred_dict['non_plane_mask'], global_gt_dict['depth']) * validDepthMask) * 1000
101+
pass
102+
97103
#non plane mask loss
98104
segmentation_loss = tf.reduce_mean(tf.slice(all_segmentations_softmax, [0, 0, 0, options.numOutputPlanes], [options.batchSize, HEIGHT, WIDTH, 1])) * 100
99105

@@ -148,7 +154,7 @@ def build_loss(global_pred_dict, local_pred_dict, deep_pred_dicts, global_gt_dic
148154

149155
plane_gt_shuffled = tf.transpose(tf.matmul(global_gt_dict['plane'], forward_map, transpose_a=True), [0, 2, 1]) / tf.maximum(num_matches, 1e-4)
150156
plane_confidence_gt = tf.cast(num_matches > 0.5, tf.float32)
151-
plane_loss += tf.reduce_mean(tf.squared_difference(pred_dict['plane'], plane_gt_shuffled) * plane_confidence_gt) * 1000
157+
plane_loss += tf.reduce_mean(tf.squared_difference(pred_dict['plane'], plane_gt_shuffled) * plane_confidence_gt) * 10000
152158

153159

154160
#all segmentations is the concatenation of plane segmentations and non plane mask
@@ -200,8 +206,13 @@ def build_loss(global_pred_dict, local_pred_dict, deep_pred_dicts, global_gt_dic
200206
validDepthMask = tf.cast(tf.greater(global_gt_dict['depth'], 1e-4), tf.float32)
201207
depth_loss = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(all_depths, global_gt_dict['depth']) * all_segmentations_softmax, axis=3, keep_dims=True) * validDepthMask) * 1000
202208

203-
#normal loss for non-plane region
204-
normal_loss = tf.reduce_mean(tf.squared_difference(global_pred_dict['non_plane_normal'], global_gt_dict['normal']) * (1 - plane_mask)) * 1000
209+
if options.predictPixelwise == 1:
210+
depth_loss += tf.reduce_mean(tf.squared_difference(global_pred_dict['non_plane_mask'], global_gt_dict['depth']) * validDepthMask) * 1000
211+
normal_loss = tf.reduce_mean(tf.squared_difference(global_pred_dict['non_plane_normal'], global_gt_dict['normal']) * validDepthMask) * 1000
212+
else:
213+
#normal loss for non-plane region
214+
normal_loss = tf.reduce_mean(tf.squared_difference(global_pred_dict['non_plane_normal'], global_gt_dict['normal']) * (1 - plane_mask)) * 1000
215+
pass
205216

206217

207218
#local loss
@@ -293,14 +304,6 @@ def build_loss(global_pred_dict, local_pred_dict, deep_pred_dicts, global_gt_dic
293304
#we predict boundaries directly for post-processing purpose
294305
boundary_loss += tf.reduce_mean(tf.losses.sigmoid_cross_entropy(logits=global_pred_dict['boundary'], multi_class_labels=boundary_gt, weights=tf.maximum(global_gt_dict['boundary'] * 3, 1))) * 1000
295306

296-
297-
if options.diverseLoss:
298-
plane_diff = tf.reduce_sum(tf.pow(tf.expand_dims(global_pred_dict['plane'], 1) - tf.expand_dims(global_pred_dict['plane'], 2), 2), axis=3)
299-
plane_diff = tf.matrix_set_diag(plane_diff, tf.ones((options.batchSize, options.numOutputPlanes)))
300-
minPlaneDiff = 0.1
301-
diverse_loss += tf.reduce_mean(tf.clip_by_value(1 - plane_diff / minPlaneDiff, 0, 1)) * 10000
302-
pass
303-
304307

305308
#regularization
306309
l2_losses = tf.add_n([options.l2Weight * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name])
@@ -392,7 +395,7 @@ def main(options):
392395
sess.run(init_op)
393396
if options.restore == 0:
394397
#fine-tune from DeepLab model
395-
var_to_restore = [v for v in var_to_restore if 'res5d' not in v.name and 'segmentation' not in v.name and 'plane' not in v.name and 'deep_supervision' not in v.name and 'local' not in v.name and 'boundary' not in v.name and 'degridding' not in v.name]
398+
var_to_restore = [v for v in var_to_restore if 'res5d' not in v.name and 'segmentation' not in v.name and 'plane' not in v.name and 'deep_supervision' not in v.name and 'local' not in v.name and 'boundary' not in v.name and 'degridding' not in v.name and 'res2a_branch2a' not in v.name and 'res2a_branch1' not in v.name]
396399
pretrained_model_loader = tf.train.Saver(var_to_restore)
397400
pretrained_model_loader.restore(sess,"../pretrained_models/deeplab_resnet.ckpt")
398401
elif options.restore == 1:
@@ -406,8 +409,11 @@ def main(options):
406409
loader = tf.train.Saver(var_to_restore)
407410
loader.restore(sess,"%s/checkpoint.ckpt"%(options.checkpoint_dir))
408411
sess.run(batchno.assign(1))
409-
elif options.restore == 3:
412+
elif options.restore == 3:
410413
#restore the same model from standard training
414+
if options.predictBoundary == 1:
415+
var_to_restore = [v for v in var_to_restore if 'boundary' not in v.name]
416+
pass
411417
if options.predictConfidence == 1:
412418
var_to_restore = [v for v in var_to_restore if 'confidence' not in v.name]
413419
pass
@@ -454,6 +460,7 @@ def main(options):
454460
batchType = 0
455461
pass
456462

463+
457464
_, total_loss, losses, losses_rgbd, summary_str = sess.run([train_op, loss, loss_dict, loss_dict_rgbd, summary_op], feed_dict = {training_flag: batchType})
458465
writers[batchType].add_summary(summary_str, bno)
459466
ema[batchType] = ema[batchType] * MOVING_AVERAGE_DECAY + total_loss
@@ -467,6 +474,14 @@ def main(options):
467474
pass
468475

469476
print bno,'train', ema[0] / ema_acc[0], 'val', ema[1] / ema_acc[1], 'train rgbd', ema[2] / ema_acc[2], 'val rgbd', ema[3] / ema_acc[3], 'loss', total_loss, 'time', time.time()-t0
477+
478+
if np.random.random() < 0.01:
479+
if batchType < 2:
480+
print(losses)
481+
else:
482+
print(losses_rgbd)
483+
pass
484+
pass
470485
continue
471486

472487
except tf.errors.OutOfRangeError:
@@ -1056,7 +1071,10 @@ def parse_args():
10561071
default=0, type=int)
10571072
parser.add_argument('--predictConfidence', dest='predictConfidence',
10581073
help='whether predict plane confidence or not: [0, 1]',
1059-
default=0, type=int)
1074+
default=0, type=int)
1075+
parser.add_argument('--predictPixelwise', dest='predictPixelwise',
1076+
help='whether predict pixelwise depth or not: [0, 1]',
1077+
default=0, type=int)
10601078
parser.add_argument('--fineTuningCheckpoint', dest='fineTuningCheckpoint',
10611079
help='specify the model for fine-tuning',
10621080
default='../PlaneSetGeneration/dump_planenet_diverse/train_planenet_diverse.ckpt', type=str)
@@ -1105,6 +1123,9 @@ def parse_args():
11051123
if args.predictConfidence == 1:
11061124
args.keyname += '_pc'
11071125
pass
1126+
if args.predictPixelwise == 1:
1127+
args.keyname += '_pp'
1128+
pass
11081129
if args.sameMatching == 0:
11091130
args.keyname += '_sm0'
11101131
pass

code/train_planenet_confidence.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ def build_graph(img_inp_train, img_inp_val, img_inp_rgbd_train, img_inp_rgbd_val
3333
img_inp_rgbd = tf.cond(tf.equal(training_flag % 2, 0), lambda: img_inp_rgbd_train, lambda: img_inp_rgbd_val)
3434
img_inp = tf.cond(tf.less(training_flag, 2), lambda: tf.cond(tf.equal(training_flag % 2, 0), lambda: img_inp_train, lambda: img_inp_val), lambda: img_inp_rgbd)
3535

36-
#net = PlaneNet({'img_inp': img_inp}, is_training=tf.equal(training_flag % 2, 0), options=options)
37-
net = PlaneNet({'img_inp': img_inp}, is_training=tf.equal(0, 0), options=options)
36+
net = PlaneNet({'img_inp': img_inp}, is_training=tf.equal(training_flag % 2, 0), options=options)
37+
#net = PlaneNet({'img_inp': img_inp}, is_training=tf.equal(0, 0), options=options)
3838
#global predictions
3939
plane_pred = net.layers['plane_pred']
4040

@@ -707,10 +707,12 @@ def test(options):
707707
loader = tf.train.Saver(var_to_restore)
708708
loader.restore(sess, '../PlaneSetGeneration/dump_planenet_pixelwise/train_planenet_pixelwise.ckpt')
709709
else:
710+
#var_to_restore = [v for v in var_to_restore if 'res4b22_relu_non_plane' not in v.name]
710711
loader = tf.train.Saver(var_to_restore)
711712
loader.restore(sess, "%s/checkpoint.ckpt"%(options.checkpoint_dir))
713+
#loader.restore(sess, options.fineTuningCheckpoint)
712714
pass
713-
#loader.restore(sess, options.fineTuningCheckpoint)
715+
714716

715717
coord = tf.train.Coordinator()
716718
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
@@ -732,7 +734,7 @@ def test(options):
732734
ranges = np.array([urange / imageWidth * 640 / focalLength, np.ones(urange.shape), -vrange / imageHeight * 480 / focalLength]).transpose([1, 2, 0])
733735

734736

735-
for index in xrange(100):
737+
for index in xrange(10):
736738
print(('image', index))
737739
t0=time.time()
738740

@@ -996,7 +998,7 @@ def test(options):
996998
planeMasks = np.array(planeMasks)
997999
#predMasks = np.array(predMasks)
9981000
evaluateDepths(predDepths, gtDepths, np.ones(planeMasks.shape, dtype=np.bool), planeMasks)
999-
print(lossSum / num)
1001+
#print(lossSum / num)
10001002
exit(1)
10011003

10021004
except tf.errors.OutOfRangeError:
@@ -1423,7 +1425,8 @@ def parse_args():
14231425
args = parser.parse_args()
14241426
args.keyname = os.path.basename(__file__).rstrip('.py')
14251427
args.keyname = args.keyname.replace('train_', '')
1426-
1428+
args.keyname = args.keyname.replace('_confidence', '')
1429+
14271430
if args.numOutputPlanes != 20:
14281431
args.keyname += '_np' + str(args.numOutputPlanes)
14291432
pass

0 commit comments

Comments
 (0)