changes everything over to use PIL image for both mask and depth get_image. also cleans up lots of dataset code

peteflorence · peteflorence · commit ea3347ff8014 · 2018-04-13T16:51:27.000-04:00
diff --git a/dense_correspondence/correspondence_tools/correspondence_augmentation.py b/dense_correspondence/correspondence_tools/correspondence_augmentation.py
@@ -15,13 +15,14 @@
 import random
 
 
-def random_image_and_indices_mutation(image, uv_pixel_positions):
+def random_image_and_indices_mutation(images, uv_pixel_positions):
     """
-    This function takes an image and a list of pixel positions in the image, 
+    This function takes a list of images and a list of pixel positions in the image, 
     and picks some subset of available mutations.
 
-    :param image: image for which to augment
-    :type  image: PIL.image.image
+    :param images: a list of images (for example the rgb, depth, and mask) for which the 
+                        **same** mutation will be applied
+    :type  images: list of PIL.image.image
 
     :param uv_pixel_positions: pixel locations (u, v) in the image. 
     	See doc/coordinate_conventions.md for definition of (u, v)
@@ -35,34 +36,34 @@ def random_image_and_indices_mutation(image, uv_pixel_positions):
     	Note: aim is to support both torch.LongTensor and torch.FloatTensor,
     	      and return the mutated_uv_pixel_positions with same type
 
-    :return mutated_image, mutated_uv_pixel_positions
-    	:rtype: PIL.image.image, tuple of torch Tensors
+    :return mutated_image_list, mutated_uv_pixel_positions
+    	:rtype: list of PIL.image.image, tuple of torch Tensors
 
     """
-    mutated_image, mutated_uv_pixel_positions = random_flip_vertical(image, uv_pixel_positions)
-    mutated_image, mutated_uv_pixel_positions = random_flip_horizontal(mutated_image, mutated_uv_pixel_positions)
-    return mutated_image, mutated_uv_pixel_positions
+    mutated_images, mutated_uv_pixel_positions = random_flip_vertical(images, uv_pixel_positions)
+    mutated_images, mutated_uv_pixel_positions = random_flip_horizontal(mutated_images, mutated_uv_pixel_positions)
+    return mutated_images, mutated_uv_pixel_positions
 
 
-def random_flip_vertical(image, uv_pixel_positions):
+def random_flip_vertical(images, uv_pixel_positions):
     """
-    Randomly flip the image and the pixel positions vertically (flip up/down)
+    Randomly flip the images and the pixel positions vertically (flip up/down)
 
     See random_image_and_indices_mutation() for documentation of args and return types.
 
     """
 
     if random.random() < 0.5:
-        return image, uv_pixel_positions  # Randomly do not apply
+        return images, uv_pixel_positions  # Randomly do not apply
 
     print "Flip vertically"
-    mutated_image = ImageOps.flip(image)
+    mutated_images = [ImageOps.flip(image) for image in images]
     v_pixel_positions = uv_pixel_positions[1]
     mutated_v_pixel_positions = image.height - v_pixel_positions
     mutated_uv_pixel_positions = (uv_pixel_positions[0], mutated_v_pixel_positions)
-    return mutated_image, mutated_uv_pixel_positions
+    return mutated_images, mutated_uv_pixel_positions
 
-def random_flip_horizontal(image, uv_pixel_positions):
+def random_flip_horizontal(images, uv_pixel_positions):
     """
     Randomly flip the image and the pixel positions horizontall (flip left/right)
 
@@ -71,11 +72,11 @@ def random_flip_horizontal(image, uv_pixel_positions):
     """
 
     if random.random() < 0.5:
-        return image, uv_pixel_positions  # Randomly do not apply
+        return images, uv_pixel_positions  # Randomly do not apply
 
     print "Flip left and right"
-    mutated_image = ImageOps.mirror(image)
+    mutated_images = [ImageOps.mirror(image) for image in images]
     u_pixel_positions = uv_pixel_positions[0]
     mutated_u_pixel_positions = image.width - u_pixel_positions
     mutated_uv_pixel_positions = (mutated_u_pixel_positions, uv_pixel_positions[1])
-    return mutated_image, mutated_uv_pixel_positions
+    return mutated_images, mutated_uv_pixel_positions
diff --git a/dense_correspondence/dataset/dense_correspondence_dataset_masked.py b/dense_correspondence/dataset/dense_correspondence_dataset_masked.py
@@ -61,26 +61,23 @@ def __getitem__(self, index):
         dtype_long = torch.LongTensor
 
         # pick a scene
-        scene_directory = self.get_random_scene_directory()
         scene_name = self.get_random_scene_name()
 
-
-
         # image a
-        img_a_idx = self.get_random_image_index(scene_name, )
-        image_a_rgb, image_a_depth, image_a_mask, image_a_pose = self.get_rgbd_mask_pose(scene_name, img_a_idx)
+        image_a_idx = self.get_random_image_index(scene_name)
+        image_a_rgb, image_a_depth, image_a_mask, image_a_pose = self.get_rgbd_mask_pose(scene_name, image_a_idx)
 
         # image b
-        img_b_idx = self.get_img_idx_with_different_pose(scene_name, image_a_pose, num_attempts=50)
+        image_b_idx = self.get_img_idx_with_different_pose(scene_name, image_a_pose, num_attempts=50)
 
-        if img_b_idx is None:
+        if image_b_idx is None:
             logging.info("no frame with sufficiently different pose found, returning")
             print "no frame with sufficiently different pose found, returning"
             return "matches", image_a_rgb, image_a_rgb, torch.zeros(1).type(dtype_long), torch.zeros(1).type(
                 dtype_long), torch.zeros(1).type(dtype_long), torch.zeros(1).type(dtype_long)
 
 
-        image_b_rgb, image_b_depth, image_b_mask, image_b_pose = self.get_rgbd_mask_pose(scene_name, img_b_idx)
+        image_b_rgb, image_b_depth, image_b_mask, image_b_pose = self.get_rgbd_mask_pose(scene_name, image_b_idx)
 
 
         num_attempts = 50000
@@ -97,7 +94,6 @@ def __getitem__(self, index):
                                                                            image_b_depth_numpy, image_b_pose, 
                                                                            num_attempts=num_attempts, img_a_mask=np.asarray(image_a_mask))
 
-
         if uv_a is None:
             print "No matches this time"
             return "matches", image_a_rgb, image_b_rgb, torch.zeros(1).type(dtype_long), torch.zeros(1).type(dtype_long), torch.zeros(1).type(dtype_long), torch.zeros(1).type(dtype_long)
@@ -152,8 +148,6 @@ def __getitem__(self, index):
         if self.tensor_transform is not None:
             image_a_rgb, image_b_rgb = self.both_to_tensor([image_a_rgb, image_b_rgb])
 
-
-
         uv_a_long = (torch.t(uv_a[0].repeat(num_non_matches_per_match, 1)).contiguous().view(-1,1), 
                      torch.t(uv_a[1].repeat(num_non_matches_per_match, 1)).contiguous().view(-1,1))
         uv_b_non_matches_long = (uv_b_non_matches[0].view(-1,1), uv_b_non_matches[1].view(-1,1) )
@@ -166,28 +160,17 @@ def __getitem__(self, index):
         uv_a_long = uv_a_long.squeeze(1)
         uv_b_non_matches_long = uv_b_non_matches_long.squeeze(1)
 
-
         return "matches", image_a_rgb, image_b_rgb, uv_a, uv_b, uv_a_long, uv_b_non_matches_long
 
-    def get_random_rgbd_with_pose(self, scene_directory):
-        rgb_filename   = self.get_random_rgb_image_filename(scene_directory)
-        depth_filename = self.get_depth_filename(rgb_filename) 
-
-        rgb   = self.get_rgb_image(rgb_filename)
-        depth = self.get_depth_image(depth_filename)
-        pose  = self.get_pose(rgb_filename)
-
-        return rgb, depth, pose
-
     def get_rgbd_mask_pose(self, scene_name, img_idx):
         """
         Returns rgb image, depth image, mask and pose.
         :param scene_name:
         :type scene_name: str
         :param img_idx:
         :type img_idx: int
-        :return:
-        :rtype:
+        :return: rgb, depth, mask, pose
+        :rtype: PIL.Image.Image, PIL.Image.Image, PIL.Image.Image, a 4x4 numpy array
         """
         rgb_file = self.get_image_filename(scene_name, img_idx, ImageType.RGB)
         rgb = self.get_rgb_image(rgb_file)
@@ -202,27 +185,6 @@ def get_rgbd_mask_pose(self, scene_name, img_idx):
 
         return rgb, depth, mask, pose
 
-    def get_random_rgbd_with_pose_and_mask(self, scene_directory):
-        rgb_filename   = self.get_random_rgb_image_filename(scene_directory)
-        depth_filename = self.get_depth_filename(rgb_filename)
-        mask_filename  = self.get_mask_filename(rgb_filename)
-
-        rgb   = self.get_rgb_image(rgb_filename)
-        depth = self.get_depth_image(depth_filename)
-        pose  = self.get_pose(rgb_filename)
-        mask  = self.get_mask_image(mask_filename)
-
-        return rgb, depth, pose, mask
-
-    def get_random_rgb_with_mask(self, scene_directory):
-        rgb_filename   = self.get_random_rgb_image_filename(scene_directory)
-        mask_filename  = self.get_mask_filename(rgb_filename)
-
-        rgb   = self.get_rgb_image(rgb_filename)
-        mask  = self.get_mask_image(mask_filename)
-
-        return rgb, mask
-
     def get_img_idx_with_different_pose(self, scene_name, pose_a, threshold=0.2, num_attempts=10):
         """
         Try to get an image with a different pose to the one passed in. If one can't be found
@@ -235,8 +197,8 @@ def get_img_idx_with_different_pose(self, scene_name, pose_a, threshold=0.2, num
         :type threshold:
         :param num_attempts:
         :type num_attempts:
-        :return:
-        :rtype:
+        :return: an index with a different-enough pose
+        :rtype: int or None
         """
 
         counter = 0
@@ -252,41 +214,11 @@ def get_img_idx_with_different_pose(self, scene_name, pose_a, threshold=0.2, num
         return None
 
 
-    def get_different_rgbd_with_pose(self, scene_directory, image_a_pose):
-        # try to get a far-enough-away pose
-        # if can't, then just return last sampled pose
-        num_attempts = 0
-        while num_attempts < 10:
-            rgb_filename   = self.get_random_rgb_image_filename(scene_directory)
-            depth_filename = self.get_depth_filename(rgb_filename)
-            pose           = self.get_pose(rgb_filename) 
-            if self.different_enough(image_a_pose, pose):
-                break
-            num_attempts += 1
-
-        rgb   = self.get_rgb_image(rgb_filename)
-        depth = self.get_depth_image(depth_filename)
-        return rgb, depth, pose
-
-    def get_different_rgbd_with_pose_and_mask(self, scene_directory, image_a_pose):
-        # try to get a far-enough-away pose
-        # if can't, then just return last sampled pose
-        num_attempts = 0
-        while num_attempts < 10:
-            rgb_filename   = self.get_random_rgb_image_filename(scene_directory)
-            depth_filename = self.get_depth_filename(rgb_filename)
-            pose           = self.get_pose(rgb_filename) 
-            mask_filename  = self.get_mask_filename(rgb_filename)
-            if self.different_enough(image_a_pose, pose):
-                break
-            num_attempts += 1
-
-        rgb   = self.get_rgb_image(rgb_filename)
-        depth = self.get_depth_image(depth_filename)
-        mask  = self.get_mask_image(mask_filename) 
-        return rgb, depth, pose, mask
-
     def get_rgb_image(self, rgb_filename):
+        """
+        :param depth_filename: string of full path to depth image
+        :return: PIL.Image.Image, in particular an 'RGB' PIL image
+        """
         return Image.open(rgb_filename).convert('RGB')
 
     def get_rgb_image_from_scene_name_and_idx(self, scene_name, img_idx):
@@ -300,20 +232,17 @@ def get_rgb_image_from_scene_name_and_idx(self, scene_name, img_idx):
         return self.get_rgb_image(img_filename)
 
     def get_depth_image(self, depth_filename):
-        return Image.open(depth_filename)
-
-    def get_depth_image_from_scene_name_and_idx(self, scene_name, img_idx):
         """
-        Returns a depth image given a scene_name and image index
-        :param scene_name:
-        :param img_idx: str or int
+        :param depth_filename: string of full path to depth image
         :return: PIL.Image.Image
         """
-
-        img_filename = self.get_image_filename(scene_name, img_idx, ImageType.DEPTH)
-        return self.get_depth_image(img_filename)
+        return Image.open(depth_filename)
 
     def get_mask_image(self, mask_filename):
+        """
+        :param mask_filename: string of full path to mask image
+        :return: PIL.Image.Image
+        """
         return Image.open(mask_filename)
 
     def get_mask_image_from_scene_name_and_idx(self, scene_name, img_idx):
@@ -344,23 +273,6 @@ def get_random_rgb_image_filename(self, scene_directory):
         random_rgb_image = random.choice(all_rgb_images_in_scene)
         return random_rgb_image
 
-    def get_specific_rgbd_with_pose(self, scene_name, img_index):
-        """
-        Returns a rgbd image along with the camera pose for a specific image
-        in a specific scene
-        :param scene_name:
-        :param img_index:
-        :return:
-        """
-        rgb_filename   = self.get_specific_rgb_image_filename(scene_name, img_index)
-        depth_filename = self.get_depth_filename(rgb_filename) 
-
-        rgb   = self.get_rgb_image(rgb_filename)
-        depth = self.get_depth_image(depth_filename)
-        pose  = self.get_pose(rgb_filename)
-
-        return rgb, depth, pose
-
     def get_specific_rgb_image_filname(self, scene_name, img_index):
         DeprecationWarning("use get_specific_rgb_image_filename instead")
         return self.get_specific_rgb_image_filename(scene_name, img_index)
diff --git a/dense_correspondence/evaluation/evaluation.py b/dense_correspondence/evaluation/evaluation.py