Update the methods to load datasets

thibo73800 · thibo73800 · commit 8035aa6492e8 · 2021-01-11T16:52:30.000+01:00
diff --git a/README.md b/README.md
@@ -68,11 +68,15 @@ As well as the logging board on wandb https://wandb.ai/thibault-neveu/detr-tenso
 
 ## Evaluation
 
-Run the following to evaluate the model using the pre-trained weights:
+Run the following to evaluate the model using the pre-trained weights. 
+- **data_dir** is your coco dataset folder
+- **img_dir** is the image folder relative to the data_dir
+- **ann_file** is the validation annotation file relative to the data_dir
 
+Checkout ✍ [DETR Tensorflow - How to load a dataset.ipynb](https://github.com/Visual-Behavior/detr-tensorflow/blob/main/notebooks/How%20to%20load%20a%20dataset.ipynb) for more information about the supported dataset ans their usage.
 
 ```
-python eval.py --datadir /path/to/coco
+python eval.py --data_dir /path/to/coco/dataset --img_dir val2017 --ann_file annotations/instances_val2017.json
 ```
 
 Outputs:
@@ -99,31 +103,45 @@ detr = get_detr_model(config, include_top=False, nb_class=3, weights="detr", num
 detr.summary()
 
 # Load your dataset
-train_dt, class_names = load_tfcsv_dataset("train", config.batch_size, config, augmentation=True)
+train_dt, class_names = load_tfcsv_dataset(config, config.batch_size, augmentation=True)
 
 # Setup the optimziers and the trainable variables
-optimzers = setup_optimizers(detr, config
+optimzers = setup_optimizers(detr, config)
 
 # Train the model
 training.fit(detr, train_dt, optimzers, config, epoch_nb, class_names)
 ```
-The following commands gives some examples to finetune the model on new datasets:  (VOC) and (The Hard hat dataset), with a real ```batch_size``` of 8 and a virtual ```target_batch``` size (gradient aggregate) of 32. ```--log``` is used for logging the training into wandb. 
+The following commands gives some examples to finetune the model on new datasets:  (Pacal VOC) and (The Hard hat dataset), with a real ```batch_size``` of 8 and a virtual ```target_batch``` size (gradient aggregate) of 32. ```--log``` is used for logging the training into wandb. 
+
+- **data_dir** is your voc dataset folder
+- **img_dir** is the image folder relative to the data_dir
+- **ann_file** is the validation annotation file relative to the data_dir
+
 ```
-python finetune_voc.py --datadir /path/to/VOCdevkit/VOC2012 --batch_size 8 --target_batch 32  --log
+python finetune_voc.py --data_dir /home/thibault/data/VOCdevkit/VOC2012 --img_dir JPEGImages --ann_dir Annotations --batch_size 8 --target_batch 32  --log
+
 ```
+- **data_dir** is the hardhatcsv dataset folder
+- **img_dir** and  **ann_file** set in the training file to load the training and validation differently
+
+Checkout ✍ [DETR Tensorflow - How to load a dataset.ipynb](https://github.com/Visual-Behavior/detr-tensorflow/blob/main/notebooks/How%20to%20load%20a%20dataset.ipynb) for more information about the supported dataset ans their usage.
+
 ```
-python  finetune_hardhat.py --datadir /path/to/hardhat/dataset --batch_size 8 --target_batch 32 --log
+python  finetune_hardhat.py --data_dir /home/thibault/data/hardhat --batch_size 8 --target_batch 32 --log
 ```
 
 ## Training
 
 (Multi GPU training comming soon)
 
+
+- **data_dir** is the coco dataset folder
+- **img_dir** and  **ann_file** set in the training file to load the training and validation differently.
+
 ```
-python train_coco.py --datadir /path/to/COCO --batch_size 8  --target_batch 32 --log
+python train_coco.py --data_dir /path/to/COCO --batch_size 8  --target_batch 32 --log
 ```
 
-
 ## Inference
 
 Here is an example of running an inference with the model on your webcam.
diff --git a/detr_tf/data/coco.py b/detr_tf/data/coco.py
@@ -57,13 +57,13 @@ def get_coco_labels(coco, img_id, image_shape, augmentation):
     return bbox.astype(np.float32), t_class.astype(np.int32), crowd_bbox
 
 
-def get_coco_from_id(coco_id, coco_dir, coco, train_val, augmentation, config):
+def get_coco_from_id(coco_id, coco, augmentation, config, img_dir):
     # Load imag
     img = coco.loadImgs([coco_id])[0]
     # Load image
-    data_type = "train2017" if train_val == "train" else "val2017"
+    #data_type = "train2017" if train_val == "train" else "val2017"
     filne_name = img['file_name']
-    image_path = f"{coco_dir}/{data_type}/{filne_name}"
+    image_path = os.path.join(img_dir, filne_name) #f"{config.}/{data_type}/{filne_name}"
     image = imageio.imread(image_path)
     # Graycale to RGB if needed
     if len(image.shape) == 2: image = gray2rgb(image)
@@ -81,15 +81,15 @@ def get_coco_from_id(coco_id, coco_dir, coco, train_val, augmentation, config):
     return image, t_bbox, t_class, is_crowd
 
 
-def load_coco_dataset(train_val, batch_size, config, augmentation=False):
+def load_coco_dataset(config, batch_size, augmentation=False, ann_dir=None, ann_file=None, img_dir=None):
     """ Load a coco dataset
     """
-    # Set the coco background class on the config
-    config.background_class = 91
+    ann_dir = config.data.ann_dir if ann_dir is None else ann_dir
+    ann_file = config.data.ann_file if ann_file is None else ann_file
+    img_dir = config.data.img_dir if img_dir is None else img_dir
+
+
 
-    # Open annotation file and setup the coco object
-    data_type = "train2017" if train_val == "train" else "val2017"
-    ann_file = f"{config.datadir}/annotations/instances_{data_type}.json"
     coco = COCO(ann_file)
 
     # Extract CLASS names
@@ -112,7 +112,7 @@ def load_coco_dataset(train_val, batch_size, config, augmentation=False):
     # Retrieve img and labels
     outputs_types=(tf.float32, tf.float32, tf.int64, tf.int64)
     dataset = dataset.map(lambda idx: processing.numpy_fc(
-        idx, get_coco_from_id, outputs_types=outputs_types, coco_dir=config.datadir, coco=coco, train_val=train_val, augmentation=augmentation, config=config)
+        idx, get_coco_from_id, outputs_types=outputs_types, coco=coco, augmentation=augmentation, config=config, img_dir=img_dir)
     , num_parallel_calls=tf.data.experimental.AUTOTUNE)
     dataset = dataset.filter(lambda imgs, tbbox, tclass, iscrowd: tf.shape(tbbox)[0] > 0 and iscrowd != 1)
     dataset = dataset.map(lambda imgs, tbbox, tclass, iscrowd: (imgs, tbbox, tclass), num_parallel_calls=tf.data.experimental.AUTOTUNE)
diff --git a/detr_tf/data/tfcsv.py b/detr_tf/data/tfcsv.py
@@ -9,9 +9,9 @@
 from .transformation import detr_transform
 from .. import bbox
 
-def load_data_from_index(index, class_names, filenames, train_val, anns, config, augmentation):
+def load_data_from_index(index, class_names, filenames, anns, config, augmentation, img_dir):
     # Open the image
-    image = imageio.imread(os.path.join(config.datadir, f"{train_val}", filenames[index]))
+    image = imageio.imread(os.path.join(config.data.data_dir, img_dir, filenames[index]))
     # Select all the annotatiom (bbox and class) on this image
     image_anns = anns[anns["filename"] == filenames[index]]    
     
@@ -35,25 +35,34 @@ def load_data_from_index(index, class_names, filenames, train_val, anns, config,
     return image.astype(np.float32), t_bbox.astype(np.float32), np.expand_dims(t_class, axis=-1).astype(np.int64)
 
 
-def load_tfcsv_dataset(train_val, batch_size, config, augmentation=False, exclude=[]):
+def load_tfcsv_dataset(config, batch_size, augmentation=False, exclude=[], ann_dir=None, ann_file=None, img_dir=None):
     """ Load the hardhat dataset
     """
-    anns = pd.read_csv(os.path.join(config.datadir, f'{train_val}/_annotations.csv'))
+    ann_dir = config.data.ann_dir if ann_dir is None else ann_dir
+    ann_file = config.data.ann_file if ann_file is None else ann_file
+    img_dir = config.data.img_dir if img_dir is None else img_dir
+
+    anns = pd.read_csv(os.path.join(config.data.data_dir, ann_file))
     for name  in exclude:
         anns = anns[anns["class"] != name]
 
     unique_class = anns["class"].unique()
     unique_class.sort()
+    
+
+    # Set the background class to 0
+    config.background_class = 0
     class_names = ["background"] + unique_class.tolist()
 
+
     filenames = anns["filename"].unique().tolist()
     indexes = list(range(0, len(filenames)))
     shuffle(indexes)
 
     dataset = tf.data.Dataset.from_tensor_slices(indexes)
     dataset = dataset.map(lambda idx: processing.numpy_fc(
         idx, load_data_from_index, 
-        class_names=class_names, filenames=filenames, train_val=train_val, anns=anns, config=config, augmentation=augmentation)
+        class_names=class_names, filenames=filenames, anns=anns, config=config, augmentation=augmentation, img_dir=img_dir)
     ,num_parallel_calls=tf.data.experimental.AUTOTUNE)
     
 
diff --git a/detr_tf/data/voc.py b/detr_tf/data/voc.py
@@ -21,7 +21,7 @@
 
 def load_voc_labels(img_id, class_names, voc_dir, augmentation, config):
 
-    anno_path = os.path.join(voc_dir, 'Annotations', img_id + '.xml')
+    anno_path = os.path.join(voc_dir, config.data.ann_dir, img_id + '.xml')
     objects = ET.parse(anno_path).findall('object')
     size = ET.parse(anno_path).find('size')
     width = float(size.find("width").text)
@@ -55,10 +55,10 @@ def load_voc_labels(img_id, class_names, voc_dir, augmentation, config):
     return t_bbox, t_class
 
 
-def load_voc_from_id(img_id, class_names, voc_dir, augmentation, config):
+def load_voc_from_id(img_id, class_names, voc_dir, augmentation, config, img_dir):
     img_id = str(img_id.decode())
     # Load image
-    img_path = os.path.join(voc_dir, 'JPEGImages', img_id + '.jpg')
+    img_path = os.path.join(voc_dir, config.data.img_dir, img_id + '.jpg')
     image = imageio.imread(img_path)
     # Load labels
     t_bbox, t_class = load_voc_labels(img_id, class_names, voc_dir, augmentation, config)
@@ -76,18 +76,37 @@ def load_voc_from_id(img_id, class_names, voc_dir, augmentation, config):
     return (image, t_bbox, t_class)
 
 
-def load_voc_dataset(train_val, class_names, batch_size, config, augmentation=False):
+def load_voc_dataset(config, batch_size, augmentation=False, ann_dir=None, ann_file=None, img_dir=None):
     """
     """
+    ann_dir = config.data.ann_dir if ann_dir is None else ann_dir
+    ann_file = config.data.ann_file if ann_file is None else ann_file
+    img_dir = config.data.img_dir if img_dir is None else img_dir
+
     # Set the background class to 0
     config.background_class = 0
-    class_names = ["back"] + class_names
 
-    image_dir = os.path.join(config.datadir, 'JPEGImages')
-    anno_dir = os.path.join(config.datadir, 'Annotations')
+    image_dir = os.path.join(config.data.data_dir, img_dir)
+    anno_dir = os.path.join(config.data.data_dir, ann_dir)
+    # ids lists
+    ids = list(map(lambda x: x[:-4], os.listdir(image_dir)))
+
+    # Retrieve the class names in the dataset
+    class_names = ['back']
+    for img_id in ids:
+        anno_path = os.path.join(config.data.data_dir, anno_dir, img_id + '.xml')
+        for obj in ET.parse(anno_path).findall('object'):
+            # Open bbox and retrieve info
+            name = obj.find('name').text.lower().strip()
+            if name not in class_names:
+                try: # Faster than checking
+                    class_names[name]
+                except:
+                    class_names.append(name)
+
     ids = list(map(lambda x: x[:-4], os.listdir(image_dir)))
     
-    ids = ids[:int(len(ids) * 0.75)] if train_val == "train" else ids[int(len(ids) * 0.75):]
+    #ids = ids[:int(len(ids) * 0.75)] if train_val == "train" else ids[int(len(ids) * 0.75):]
     # Shuffle all the dataset
     shuffle(ids)
 
@@ -96,7 +115,7 @@ def load_voc_dataset(train_val, class_names, batch_size, config, augmentation=Fa
     dataset = dataset.shuffle(1000)
     # Retrieve img and labels
     dataset = dataset.map(lambda idx: processing.numpy_fc(idx, load_voc_from_id, 
-        class_names=class_names, voc_dir=config.datadir, augmentation=augmentation, config=config)
+        class_names=class_names, voc_dir=config.data.data_dir, augmentation=augmentation, config=config, img_dir=img_dir)
     , num_parallel_calls=tf.data.experimental.AUTOTUNE)
     # Filter labels to be sure to keep only sample with at least one bbox
     dataset = dataset.filter(lambda imgs, tbbox, tclass: tf.shape(tbbox)[0] > 0)
diff --git a/detr_tf/training_config.py b/detr_tf/training_config.py
@@ -1,5 +1,6 @@
 import tensorflow as tf
 import argparse
+import os
 
 
 def training_config_parser():
@@ -8,7 +9,11 @@ def training_config_parser():
     parser = argparse.ArgumentParser()
 
     # Dataset info
-    parser.add_argument("--datadir",  type=str, required=False, help="/path/to/the/dataset")
+    parser.add_argument("--data_dir",  type=str, required=False, help="Path to the dataset directory")
+    parser.add_argument("--img_dir",  type=str, required=False, help="Image directory relative to data_dir")
+    parser.add_argument("--ann_file",  type=str, required=False, help="Annotation file relative to data_dir")
+    parser.add_argument("--ann_dir",  type=str, required=False, help="Annotation directory relative to data_dir")
+
     parser.add_argument("--background_class",  type=int, required=False, default=0, help="Default background class")
 
     # What to train
@@ -38,7 +43,8 @@ class TrainingConfig():
     def __init__(self):
 
         # Dataset info
-        self.datadir = None
+        self.data_dir, self.img_dir, self.ann_dir, self.ann_file = None, None, None, None
+        self.data = DataConfig(data_dir=None, img_dir=None, ann_file=None, ann_dir=None)
         self.background_class = 0
         self.image_size = 376, 672
 
@@ -85,8 +91,25 @@ def update_from_args(self, args):
                 getattr(self, key).assign(args[key])
             else:
                 setattr(self, key, args[key])
+        
+        # Set the config on the data class
+
+
+        self.data = DataConfig(
+            data_dir=self.data_dir,
+            img_dir=self.img_dir,
+            ann_file=self.ann_file,
+            ann_dir=self.ann_dir
+        )
+
 
+class DataConfig():
 
+    def __init__(self, data_dir=None, img_dir=None, ann_file=None, ann_dir=None):
+        self.data_dir = data_dir
+        self.img_dir = os.path.join(data_dir, img_dir) if data_dir is not None and img_dir is not None else None
+        self.ann_file = os.path.join(self.data_dir, ann_file) if ann_file is not None else None
+        self.ann_dir = os.path.join(self.data_dir, ann_dir) if ann_dir is not None else None
 
 
 if __name__ == "__main__":
diff --git a/eval.py b/eval.py
@@ -73,7 +73,7 @@ def eval_model(model, config, class_names, valid_dt):
     # Load the model with the new layers to finetune
     detr = build_model(config)
 
-    valid_dt, class_names = load_coco_dataset("val", 1, config, augmentation=None)
+    valid_dt, class_names = load_coco_dataset(config, 1, augmentation=None)
 
     # Run training
     eval_model(detr, config, class_names, valid_dt)
diff --git a/finetune_hardhat.py b/finetune_hardhat.py
@@ -49,8 +49,10 @@ def run_finetuning(config):
     detr = build_model(config)
 
     # Load the training and validation dataset and exclude the person class
-    train_dt, class_names = load_tfcsv_dataset("train", config.batch_size, config, augmentation=True, exclude=["person"])
-    valid_dt, _ = load_tfcsv_dataset("test", 4, config, augmentation=False, exclude=["person"])
+    train_dt, class_names = load_tfcsv_dataset(
+        config, config.batch_size, augmentation=True, exclude=["person"], ann_file="train/_annotations.csv", img_dir="train")
+    valid_dt, _ = load_tfcsv_dataset(
+        config, 4, augmentation=False, exclude=["person"], ann_file="test/_annotations.csv", img_dir="test")
 
     # Train/finetune the transformers only
     config.train_backbone = tf.Variable(False)
diff --git a/finetune_voc.py b/finetune_voc.py
@@ -67,9 +67,10 @@ def run_finetuning(config):
     # Load the model with the new layers to finetune
     detr = build_model(config)
 
-    # Load the training and validation dataset
-    train_dt, class_names = load_voc_dataset("train", VOC_CLASS_NAME, config.batch_size, config, augmentation=True)
-    valid_dt, _ = load_voc_dataset("val", VOC_CLASS_NAME, 1, config, augmentation=False)
+    # Load the training and validation dataset (for the purpose of this example we're gonna load the training
+    # as the validation, but in practise you should have different folder loader for the training and the validation)
+    train_dt, class_names = load_voc_dataset(config,  config.batch_size, augmentation=True)
+    valid_dt, _ = load_voc_dataset(config, 1, augmentation=False)
 
     # Train/finetune the transformers only
     config.train_backbone = tf.Variable(False)
diff --git a/notebooks/DETR Tensorflow -  How to setup a custom dataset.ipynb b/notebooks/DETR Tensorflow -  How to setup a custom dataset.ipynb
@@ -859,7 +859,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/How to load a dataset.ipynb b/notebooks/How to load a dataset.ipynb
diff --git a/train_coco.py b/train_coco.py

Original file line number	Diff line number	Diff line change
`@@ -859,7 +859,7 @@`
`859`	`859`	`"name": "python",`
`860`	`860`	`"nbconvert_exporter": "python",`
`861`	`861`	`"pygments_lexer": "ipython3",`
`862`		`- "version": "3.7.7"`
	`862`	`+ "version": "3.8.3"`
`863`	`863`	`}`
`864`	`864`	`},`
`865`	`865`	`"nbformat": 4,`