facebookresearch
diff --git a/‎IO.py
Lines changed: 2 additions & 1 deletion b/‎IO.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 39 additions & 6 deletions b/‎README.md
Lines changed: 39 additions & 6 deletions
diff --git a/‎datasets/NYUv2base.py
Lines changed: 5 additions & 38 deletions b/‎datasets/NYUv2base.py
Lines changed: 5 additions & 38 deletions
diff --git a/‎datasets/NYUv2dataset.py
Lines changed: 6 additions & 12 deletions b/‎datasets/NYUv2dataset.py
Lines changed: 6 additions & 12 deletions
diff --git a/‎datasets/SimSINbase.py
Lines changed: 0 additions & 8 deletions b/‎datasets/SimSINbase.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎datasets/SimSINdataset.py
Lines changed: 0 additions & 3 deletions b/‎datasets/SimSINdataset.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎datasets/UniSINbase.py
Lines changed: 0 additions & 8 deletions b/‎datasets/UniSINbase.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎datasets/UniSINdataset.py
Lines changed: 0 additions & 8 deletions b/‎datasets/UniSINdataset.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎datasets/VAbase.py
Lines changed: 1 addition & 3 deletions b/‎datasets/VAbase.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎datasets/VAdataset.py
Lines changed: 0 additions & 4 deletions b/‎datasets/VAdataset.py
Lines changed: 0 additions & 4 deletions
@@ -4,6 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
+import imageio
 import os
 import re
 import numpy as np
@@ -12,7 +13,7 @@
 import numpy as np
 from PIL import Image
 import sys
-import imageio
+
 
 def read(file):
     if file.endswith('.float3'): return readFloat(file)
 
@@ -31,17 +31,15 @@ Install packages
 
 2. Install pre-requisite common packages. Go to https://pytorch.org/get-started/locally/ and install pytorch that is compatible to your computer. We test on pytorch v1.9.0 and cudatoolkit-11.1. (The codes should work under other v1.0+ versions)
 
-    ```conda install pytorch==1.9.0 torchvision==0.10.0 torchaudio==0.9.0 cudatoolkit=11.3 -c pytorch -c conda-forge ```
-
 3. Install other dependencies: opencv-python and matplotlib, imageio, Pillow, augly, tensorboardX
 
    ``` pip install opencv-python, matplotlib, imageio, Pillow, augly, tensorboardX ```
 
 Download pretrained models
 
-4. Download pretrained models [<a href="https://drive.google.com/file/d/1N3UAeSR5sa7KcMJAeKU961KUNBZ6vIgi/view?usp=sharing">here</a>] (ResNet152, 246MB).
+4. Download pretrained models [<a href="https://drive.google.com/file/d/1N3UAeSR5sa7KcMJAeKU961KUNBZ6vIgi/view?usp=sharing">here</a>] (ResNet152, 246MB, illustation for averagely good in-the-wild indoor scenes).
 
-5. Move the downloaded item under this folder, and then unzip it. You should be able to see a new folder 'ckpts' that contains the pretrained models.
+5. Unzip under the root. 'ckpts' that contains the pretrained models is created.
 
 6. Run
 
@@ -87,15 +85,50 @@ Download VA (8G) first. Extract under the root folder.
             ├── gt_depth_rectify
                ├── cam0_frame0000.depth.pfm 
                ......
-         ├── VA_left_all.txt
+            ├── VA_left_all.txt
 
 Run   ``` bash eval.sh ```   The performances will be saved under the root folder.
 
-To visualize the predicted depth maps, 
+To visualize the predicted depth maps in a minibatch: 
 
 ```shell
 python execute.py --exe eval_save --log_dir='./tmp' --data_path VA --dataset VA  --batch_size 1 --load_weights_folder <path to weights> --models_to_load encoder depth  --width 256 --height 256 --max_depth 10 --frame_ids 0 --num_layers 152 ```
 ```
+
+To visualize the predicted depth maps for all testing data on the list: 
+
+```shell
+python execute.py --exe eval_save_all --log_dir='./tmp' --data_path VA --dataset VA  --batch_size 1 --load_weights_folder <path to weights> --models_to_load encoder depth  --width 256 --height 256 --max_depth 10 --frame_ids 0 --num_layers 152 ```
+```
+
+Evaluation on NYUv2
+
+Prepare <a href="https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html">NYUv2</a> data.
+
+      .
+      ├── NYUv2
+            ├── img_val
+               ├── 00001.png
+               ......
+            ├── depth_val
+               ├── 00001.npy
+               ......
+               ......
+            ├── NYUv2.txt
+
+| Name | Arch | Expert | MAE | AbsRel | RMSE | acc@ 1.25 | acc@ 1.25^2 | acc@ 1.25^3 | Download |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| DistDepth-finetuned | ResNet152 | DPT on NYUv2 | 0.308 | 0.113 | 0.444 | 87.3 | 97.3 | 99.3 | [model](https://drive.google.com/file/d/1kLJBuMOf0xSpYq7DtxnPpBTxMwW0ylGm/view?usp=sharing) |
+| DistDepth-SimSIN | ResNet152 | DPT | 0.411 | 0.163 | 0.563 | 78.0 | 93.6 | 98.1 | [model](https://drive.google.com/file/d/1Hf_WPaBGMpPBFymCwmN8Xh1blXXZU1cd/view?usp=sharing) |
+
+Change train_filenames (dummy) and val_filenames in execute_func.py to NYUv2. Then,
+
+```shell
+python execute.py --exe eval_measure --log_dir='./tmp' --data_path NYUv2 --dataset NYUv2  --batch_size 1 --load_weights_folder <path to weights> --models_to_load encoder depth  --width 256 --height 256 --max_depth 12 --frame_ids 0 --num_layers 152
+```
+
+## <div align=""> Visualization</div>
+
 ## <div align="">Depth-aware AR effects</div>
 
 Virtual object insertion:
 
@@ -9,16 +9,12 @@
 import os
 import random
 import numpy as np
-import copy
 from PIL import Image
 
 import torch
 import torch.utils.data as data
 from torchvision import transforms
 
-import augly.image as imaugs
-
-
 def pil_loader(path):
     with open(path, 'rb') as f:
         with Image.open(f) as img:
@@ -71,8 +67,6 @@ def __init__(self,
             self.resize[i] = transforms.Resize((self.height // s, self.width // s),
                                                interpolation=self.interp)
 
-        self.load_depth = self.check_depth()
-
         self.mapping = {'l':'0', 'r':'1'}
         self.rev_mapping = {'l':'1', 'r':'0'}
 
@@ -128,32 +122,16 @@ def __getitem__(self, index):
 
         do_color_aug = self.is_train and random.random() > 0.5
         do_flip = self.is_train and random.random() > 0.5
-        do_encoding = self.is_train and random.random() > 0.5
 
         line = self.filenames[index].split()
         folder = line[0]
 
-        if len(line) == 3:
-            frame_index = int(line[1])
-        else:
-            frame_index = 0
-
-        if len(line) == 3:
-            side = line[2]
-        else:
-            side = None
-
         for i in self.frame_idxs:
-            if i == "s":
-                path =  os.path.join(self.data_path, folder[:-1]+self.rev_mapping[side]+'.jpg')
+            if i == 0:
+                path =  os.path.join(self.data_path, folder + '.png')
                 inputs[("color", i, -1)] = self.get_color(path, do_flip)
             else:
-                if not self.is_train:
-                    path =  os.path.join(self.data_path, folder+'.jpg')
-                else:
-                    folder_idx = folder.rsplit('/',1)[0]
-                    path =  os.path.join(self.data_path, folder_idx +'/'+ str(frame_index+1*i) + '.jpg')
-                inputs[("color", i, -1)] = self.get_color(path, do_flip)
+                raise NotImplementedError("NYUv2 only supports current frame")
 
         # adjusting intrinsics to match each scale in the pyramid
         for scale in range(self.num_scales):
@@ -178,7 +156,6 @@ def __getitem__(self, index):
         else:
             color_aug = (lambda x: x)
 
-        
         self.preprocess(inputs, color_aug)
 
         for i in self.frame_idxs:
@@ -187,26 +164,16 @@ def __getitem__(self, index):
 
         #if it's not training (testing time), load the gt depth for calculating the error
         if not self.is_train:
-            path =  os.path.join(self.data_path, folder+'.png')
+            gt_folder = f'{folder}.npy'.replace('img','depth')
+            path =  os.path.join(self.data_path, gt_folder)
             depth_gt = self.get_depth(path, do_flip)
             inputs["depth_gt"] = np.expand_dims(depth_gt, 0)
             inputs["depth_gt"] = torch.from_numpy(inputs["depth_gt"].astype(np.float32))
 
-        if "s" in self.frame_idxs:
-            stereo_T = np.eye(4, dtype=np.float32)
-            baseline_sign = -1 if do_flip else 1
-            side_sign = -1 if side == "l" else 1
-            stereo_T[0, 3] = side_sign * baseline_sign * 0.1
-
-            inputs["stereo_T"] = torch.from_numpy(stereo_T)
-
         return inputs
 
     def get_color(self, folder, frame_index, side, do_flip):
         raise NotImplementedError
 
-    def check_depth(self):
-        raise NotImplementedError
-
     def get_depth(self, folder, frame_index, side, do_flip):
         raise NotImplementedError
@@ -6,12 +6,9 @@
 
 from __future__ import absolute_import, division, print_function
 
-import cv2
-import h5py
 import numpy as np
 import os
 import PIL.Image as pil
-import skimage.transform
 
 from .NYUv2base import NYUv2Base
 
@@ -24,29 +21,26 @@ def __init__(self, *args, **kwargs):
                            [0, 0, 0, 1]], dtype=np.float32)
 
         width, height = 640, 480
-        new_width, new_height = 608, 448 #448, 448
+        new_width, new_height = 608, 448
         self.left = (width - new_width)//2
         self.top = (height - new_height)//2
         self.right = (width + new_width)//2
         self.bottom = (height + new_height)//2
 
-        self.full_res_shape = (640, 480)
-
-    def check_depth(self):
-        return True
-
     def get_color(self, path, do_flip):
         color = self.loader(path)
+        
         # center crop
-        #color = color.crop((self.left, self.top, self.right , self.bottom))
+        color = color.crop((self.left, self.top, self.right , self.bottom))
         if do_flip:
             color = color.transpose(pil.FLIP_LEFT_RIGHT)
         return color
 
     def get_depth(self, path, do_flip):
         depth_path = os.path.join(path)
-        depth_gt = cv2.imread(depth_path,-1)#[::2,::2]
-        depth_gt = depth_gt[16:-16, 16:-16] # the calculation is different for PIL and numpy indexing
+        depth_gt = np.load(depth_path)
+        depth_gt = depth_gt[self.top:self.bottom, self.left:self.right]
         if do_flip:
             depth_gt = np.fliplr(depth_gt)
+
         return depth_gt
@@ -9,22 +9,17 @@
 import os
 import random
 import numpy as np
-import copy
 from PIL import Image
 
 import torch
 import torch.utils.data as data
 from torchvision import transforms
-import torch.nn.functional as F
-import augly.image as imaugs
-
 
 def pil_loader(path):
     with open(path, 'rb') as f:
         with Image.open(f) as img:
             return img.convert('RGB')
 
-
 class SimSINBase(data.Dataset):
     def __init__(self,
                  data_path,
@@ -198,8 +193,5 @@ def __getitem__(self, index):
     def get_color(self, folder, frame_index, side, do_flip):
         raise NotImplementedError
 
-    def check_depth(self):
-        raise NotImplementedError
-
     def get_depth(self, folder, frame_index, side, do_flip):
         raise NotImplementedError
@@ -28,9 +28,6 @@ def __init__(self, *args, **kwargs):
 
         self.full_res_shape = (512, 512)
 
-    def check_depth(self):
-        return True
-
     def get_color(self, path, do_flip):
         color = self.loader(path)
         if do_flip:
 
@@ -9,15 +9,12 @@
 import os
 import random
 import numpy as np
-import copy
 from PIL import Image
 
 import torch
 import torch.utils.data as data
 from torchvision import transforms
 
-import augly.image as imaugs
-
 
 def pil_loader(path):
     with open(path, 'rb') as f:
@@ -71,8 +68,6 @@ def __init__(self,
             self.resize[i] = transforms.Resize((self.height // s, self.width // s),
                                                interpolation=self.interp)
 
-        self.load_depth = self.check_depth()
-
         self.mapping = {'l':'0', 'r':'1'}
         self.rev_mapping = {'l':'1', 'r':'0'}
 
@@ -195,8 +190,5 @@ def __getitem__(self, index):
     def get_color(self, folder, frame_index, side, do_flip):
         raise NotImplementedError
 
-    def check_depth(self):
-        raise NotImplementedError
-
     def get_depth(self, folder, frame_index, side, do_flip):
         raise NotImplementedError
@@ -6,12 +6,9 @@
 
 from __future__ import absolute_import, division, print_function
 
-import cv2
-import h5py
 import numpy as np
 import os
 import PIL.Image as pil
-import skimage.transform
 
 from .UniSINbase import UniSINBase
 from IO import read
@@ -29,21 +26,16 @@ def __init__(self, *args, **kwargs):
 
         self.full_res_shape = (1280, 720)
 
-    def check_depth(self):
-        return True
-
     def get_color(self, path, do_flip):
         color = self.loader(path)
 
         if do_flip:
             color = color.transpose(pil.FLIP_LEFT_RIGHT)
-
         return color
 
     def get_depth(self, path, do_flip):
         depth_path = os.path.join(path)
         depth_gt = read(depth_path)
         if do_flip:
             depth_gt = np.fliplr(depth_gt)
-
         return depth_gt
@@ -9,13 +9,11 @@
 import os
 import random
 import numpy as np
-import copy
-from PIL import Image  # using pillow-simd for increased speed
+from PIL import Image
 
 import torch
 import torch.utils.data as data
 from torchvision import transforms
-import augly.image as imaugs
 
 
 def pil_loader(path):
 
@@ -7,11 +7,9 @@
 from __future__ import absolute_import, division, print_function
 
 import cv2
-import h5py
 import numpy as np
 import os
 import PIL.Image as pil
-import skimage.transform
 
 from .VAbase import VABase
 
@@ -38,7 +36,6 @@ def get_color(self, path, do_flip):
 
         if do_flip:
             color = color.transpose(pil.FLIP_LEFT_RIGHT)
-
         return color
 
     def get_depth(self, path, do_flip):
@@ -47,5 +44,4 @@ def get_depth(self, path, do_flip):
         depth_gt = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
         if do_flip:
             depth_gt = np.fliplr(depth_gt)
-
         return depth_gt