Skip to content

Commit 8035aa6

Browse files
committed
Update the methods to load datasets
1 parent 5e0f9cd commit 8035aa6

11 files changed

+155
-91
lines changed

README.md

+27-9
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,15 @@ As well as the logging board on wandb https://wandb.ai/thibault-neveu/detr-tenso
6868

6969
## Evaluation
7070

71-
Run the following to evaluate the model using the pre-trained weights:
71+
Run the following to evaluate the model using the pre-trained weights.
72+
- **data_dir** is your coco dataset folder
73+
- **img_dir** is the image folder relative to the data_dir
74+
- **ann_file** is the validation annotation file relative to the data_dir
7275

76+
Checkout ✍ [DETR Tensorflow - How to load a dataset.ipynb](https://github.com/Visual-Behavior/detr-tensorflow/blob/main/notebooks/How%20to%20load%20a%20dataset.ipynb) for more information about the supported dataset ans their usage.
7377

7478
```
75-
python eval.py --datadir /path/to/coco
79+
python eval.py --data_dir /path/to/coco/dataset --img_dir val2017 --ann_file annotations/instances_val2017.json
7680
```
7781

7882
Outputs:
@@ -99,31 +103,45 @@ detr = get_detr_model(config, include_top=False, nb_class=3, weights="detr", num
99103
detr.summary()
100104

101105
# Load your dataset
102-
train_dt, class_names = load_tfcsv_dataset("train", config.batch_size, config, augmentation=True)
106+
train_dt, class_names = load_tfcsv_dataset(config, config.batch_size, augmentation=True)
103107

104108
# Setup the optimziers and the trainable variables
105-
optimzers = setup_optimizers(detr, config
109+
optimzers = setup_optimizers(detr, config)
106110

107111
# Train the model
108112
training.fit(detr, train_dt, optimzers, config, epoch_nb, class_names)
109113
```
110-
The following commands gives some examples to finetune the model on new datasets: (VOC) and (The Hard hat dataset), with a real ```batch_size``` of 8 and a virtual ```target_batch``` size (gradient aggregate) of 32. ```--log``` is used for logging the training into wandb.
114+
The following commands gives some examples to finetune the model on new datasets: (Pacal VOC) and (The Hard hat dataset), with a real ```batch_size``` of 8 and a virtual ```target_batch``` size (gradient aggregate) of 32. ```--log``` is used for logging the training into wandb.
115+
116+
- **data_dir** is your voc dataset folder
117+
- **img_dir** is the image folder relative to the data_dir
118+
- **ann_file** is the validation annotation file relative to the data_dir
119+
111120
```
112-
python finetune_voc.py --datadir /path/to/VOCdevkit/VOC2012 --batch_size 8 --target_batch 32 --log
121+
python finetune_voc.py --data_dir /home/thibault/data/VOCdevkit/VOC2012 --img_dir JPEGImages --ann_dir Annotations --batch_size 8 --target_batch 32 --log
122+
113123
```
124+
- **data_dir** is the hardhatcsv dataset folder
125+
- **img_dir** and **ann_file** set in the training file to load the training and validation differently
126+
127+
Checkout ✍ [DETR Tensorflow - How to load a dataset.ipynb](https://github.com/Visual-Behavior/detr-tensorflow/blob/main/notebooks/How%20to%20load%20a%20dataset.ipynb) for more information about the supported dataset ans their usage.
128+
114129
```
115-
python finetune_hardhat.py --datadir /path/to/hardhat/dataset --batch_size 8 --target_batch 32 --log
130+
python finetune_hardhat.py --data_dir /home/thibault/data/hardhat --batch_size 8 --target_batch 32 --log
116131
```
117132

118133
## Training
119134

120135
(Multi GPU training comming soon)
121136

137+
138+
- **data_dir** is the coco dataset folder
139+
- **img_dir** and **ann_file** set in the training file to load the training and validation differently.
140+
122141
```
123-
python train_coco.py --datadir /path/to/COCO --batch_size 8 --target_batch 32 --log
142+
python train_coco.py --data_dir /path/to/COCO --batch_size 8 --target_batch 32 --log
124143
```
125144

126-
127145
## Inference
128146

129147
Here is an example of running an inference with the model on your webcam.

detr_tf/data/coco.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ def get_coco_labels(coco, img_id, image_shape, augmentation):
5757
return bbox.astype(np.float32), t_class.astype(np.int32), crowd_bbox
5858

5959

60-
def get_coco_from_id(coco_id, coco_dir, coco, train_val, augmentation, config):
60+
def get_coco_from_id(coco_id, coco, augmentation, config, img_dir):
6161
# Load imag
6262
img = coco.loadImgs([coco_id])[0]
6363
# Load image
64-
data_type = "train2017" if train_val == "train" else "val2017"
64+
#data_type = "train2017" if train_val == "train" else "val2017"
6565
filne_name = img['file_name']
66-
image_path = f"{coco_dir}/{data_type}/{filne_name}"
66+
image_path = os.path.join(img_dir, filne_name) #f"{config.}/{data_type}/{filne_name}"
6767
image = imageio.imread(image_path)
6868
# Graycale to RGB if needed
6969
if len(image.shape) == 2: image = gray2rgb(image)
@@ -81,15 +81,15 @@ def get_coco_from_id(coco_id, coco_dir, coco, train_val, augmentation, config):
8181
return image, t_bbox, t_class, is_crowd
8282

8383

84-
def load_coco_dataset(train_val, batch_size, config, augmentation=False):
84+
def load_coco_dataset(config, batch_size, augmentation=False, ann_dir=None, ann_file=None, img_dir=None):
8585
""" Load a coco dataset
8686
"""
87-
# Set the coco background class on the config
88-
config.background_class = 91
87+
ann_dir = config.data.ann_dir if ann_dir is None else ann_dir
88+
ann_file = config.data.ann_file if ann_file is None else ann_file
89+
img_dir = config.data.img_dir if img_dir is None else img_dir
90+
91+
8992

90-
# Open annotation file and setup the coco object
91-
data_type = "train2017" if train_val == "train" else "val2017"
92-
ann_file = f"{config.datadir}/annotations/instances_{data_type}.json"
9393
coco = COCO(ann_file)
9494

9595
# Extract CLASS names
@@ -112,7 +112,7 @@ def load_coco_dataset(train_val, batch_size, config, augmentation=False):
112112
# Retrieve img and labels
113113
outputs_types=(tf.float32, tf.float32, tf.int64, tf.int64)
114114
dataset = dataset.map(lambda idx: processing.numpy_fc(
115-
idx, get_coco_from_id, outputs_types=outputs_types, coco_dir=config.datadir, coco=coco, train_val=train_val, augmentation=augmentation, config=config)
115+
idx, get_coco_from_id, outputs_types=outputs_types, coco=coco, augmentation=augmentation, config=config, img_dir=img_dir)
116116
, num_parallel_calls=tf.data.experimental.AUTOTUNE)
117117
dataset = dataset.filter(lambda imgs, tbbox, tclass, iscrowd: tf.shape(tbbox)[0] > 0 and iscrowd != 1)
118118
dataset = dataset.map(lambda imgs, tbbox, tclass, iscrowd: (imgs, tbbox, tclass), num_parallel_calls=tf.data.experimental.AUTOTUNE)

detr_tf/data/tfcsv.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from .transformation import detr_transform
1010
from .. import bbox
1111

12-
def load_data_from_index(index, class_names, filenames, train_val, anns, config, augmentation):
12+
def load_data_from_index(index, class_names, filenames, anns, config, augmentation, img_dir):
1313
# Open the image
14-
image = imageio.imread(os.path.join(config.datadir, f"{train_val}", filenames[index]))
14+
image = imageio.imread(os.path.join(config.data.data_dir, img_dir, filenames[index]))
1515
# Select all the annotatiom (bbox and class) on this image
1616
image_anns = anns[anns["filename"] == filenames[index]]
1717

@@ -35,25 +35,34 @@ def load_data_from_index(index, class_names, filenames, train_val, anns, config,
3535
return image.astype(np.float32), t_bbox.astype(np.float32), np.expand_dims(t_class, axis=-1).astype(np.int64)
3636

3737

38-
def load_tfcsv_dataset(train_val, batch_size, config, augmentation=False, exclude=[]):
38+
def load_tfcsv_dataset(config, batch_size, augmentation=False, exclude=[], ann_dir=None, ann_file=None, img_dir=None):
3939
""" Load the hardhat dataset
4040
"""
41-
anns = pd.read_csv(os.path.join(config.datadir, f'{train_val}/_annotations.csv'))
41+
ann_dir = config.data.ann_dir if ann_dir is None else ann_dir
42+
ann_file = config.data.ann_file if ann_file is None else ann_file
43+
img_dir = config.data.img_dir if img_dir is None else img_dir
44+
45+
anns = pd.read_csv(os.path.join(config.data.data_dir, ann_file))
4246
for name in exclude:
4347
anns = anns[anns["class"] != name]
4448

4549
unique_class = anns["class"].unique()
4650
unique_class.sort()
51+
52+
53+
# Set the background class to 0
54+
config.background_class = 0
4755
class_names = ["background"] + unique_class.tolist()
4856

57+
4958
filenames = anns["filename"].unique().tolist()
5059
indexes = list(range(0, len(filenames)))
5160
shuffle(indexes)
5261

5362
dataset = tf.data.Dataset.from_tensor_slices(indexes)
5463
dataset = dataset.map(lambda idx: processing.numpy_fc(
5564
idx, load_data_from_index,
56-
class_names=class_names, filenames=filenames, train_val=train_val, anns=anns, config=config, augmentation=augmentation)
65+
class_names=class_names, filenames=filenames, anns=anns, config=config, augmentation=augmentation, img_dir=img_dir)
5766
,num_parallel_calls=tf.data.experimental.AUTOTUNE)
5867

5968

detr_tf/data/voc.py

+28-9
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
def load_voc_labels(img_id, class_names, voc_dir, augmentation, config):
2323

24-
anno_path = os.path.join(voc_dir, 'Annotations', img_id + '.xml')
24+
anno_path = os.path.join(voc_dir, config.data.ann_dir, img_id + '.xml')
2525
objects = ET.parse(anno_path).findall('object')
2626
size = ET.parse(anno_path).find('size')
2727
width = float(size.find("width").text)
@@ -55,10 +55,10 @@ def load_voc_labels(img_id, class_names, voc_dir, augmentation, config):
5555
return t_bbox, t_class
5656

5757

58-
def load_voc_from_id(img_id, class_names, voc_dir, augmentation, config):
58+
def load_voc_from_id(img_id, class_names, voc_dir, augmentation, config, img_dir):
5959
img_id = str(img_id.decode())
6060
# Load image
61-
img_path = os.path.join(voc_dir, 'JPEGImages', img_id + '.jpg')
61+
img_path = os.path.join(voc_dir, config.data.img_dir, img_id + '.jpg')
6262
image = imageio.imread(img_path)
6363
# Load labels
6464
t_bbox, t_class = load_voc_labels(img_id, class_names, voc_dir, augmentation, config)
@@ -76,18 +76,37 @@ def load_voc_from_id(img_id, class_names, voc_dir, augmentation, config):
7676
return (image, t_bbox, t_class)
7777

7878

79-
def load_voc_dataset(train_val, class_names, batch_size, config, augmentation=False):
79+
def load_voc_dataset(config, batch_size, augmentation=False, ann_dir=None, ann_file=None, img_dir=None):
8080
"""
8181
"""
82+
ann_dir = config.data.ann_dir if ann_dir is None else ann_dir
83+
ann_file = config.data.ann_file if ann_file is None else ann_file
84+
img_dir = config.data.img_dir if img_dir is None else img_dir
85+
8286
# Set the background class to 0
8387
config.background_class = 0
84-
class_names = ["back"] + class_names
8588

86-
image_dir = os.path.join(config.datadir, 'JPEGImages')
87-
anno_dir = os.path.join(config.datadir, 'Annotations')
89+
image_dir = os.path.join(config.data.data_dir, img_dir)
90+
anno_dir = os.path.join(config.data.data_dir, ann_dir)
91+
# ids lists
92+
ids = list(map(lambda x: x[:-4], os.listdir(image_dir)))
93+
94+
# Retrieve the class names in the dataset
95+
class_names = ['back']
96+
for img_id in ids:
97+
anno_path = os.path.join(config.data.data_dir, anno_dir, img_id + '.xml')
98+
for obj in ET.parse(anno_path).findall('object'):
99+
# Open bbox and retrieve info
100+
name = obj.find('name').text.lower().strip()
101+
if name not in class_names:
102+
try: # Faster than checking
103+
class_names[name]
104+
except:
105+
class_names.append(name)
106+
88107
ids = list(map(lambda x: x[:-4], os.listdir(image_dir)))
89108

90-
ids = ids[:int(len(ids) * 0.75)] if train_val == "train" else ids[int(len(ids) * 0.75):]
109+
#ids = ids[:int(len(ids) * 0.75)] if train_val == "train" else ids[int(len(ids) * 0.75):]
91110
# Shuffle all the dataset
92111
shuffle(ids)
93112

@@ -96,7 +115,7 @@ def load_voc_dataset(train_val, class_names, batch_size, config, augmentation=Fa
96115
dataset = dataset.shuffle(1000)
97116
# Retrieve img and labels
98117
dataset = dataset.map(lambda idx: processing.numpy_fc(idx, load_voc_from_id,
99-
class_names=class_names, voc_dir=config.datadir, augmentation=augmentation, config=config)
118+
class_names=class_names, voc_dir=config.data.data_dir, augmentation=augmentation, config=config, img_dir=img_dir)
100119
, num_parallel_calls=tf.data.experimental.AUTOTUNE)
101120
# Filter labels to be sure to keep only sample with at least one bbox
102121
dataset = dataset.filter(lambda imgs, tbbox, tclass: tf.shape(tbbox)[0] > 0)

detr_tf/training_config.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import tensorflow as tf
22
import argparse
3+
import os
34

45

56
def training_config_parser():
@@ -8,7 +9,11 @@ def training_config_parser():
89
parser = argparse.ArgumentParser()
910

1011
# Dataset info
11-
parser.add_argument("--datadir", type=str, required=False, help="/path/to/the/dataset")
12+
parser.add_argument("--data_dir", type=str, required=False, help="Path to the dataset directory")
13+
parser.add_argument("--img_dir", type=str, required=False, help="Image directory relative to data_dir")
14+
parser.add_argument("--ann_file", type=str, required=False, help="Annotation file relative to data_dir")
15+
parser.add_argument("--ann_dir", type=str, required=False, help="Annotation directory relative to data_dir")
16+
1217
parser.add_argument("--background_class", type=int, required=False, default=0, help="Default background class")
1318

1419
# What to train
@@ -38,7 +43,8 @@ class TrainingConfig():
3843
def __init__(self):
3944

4045
# Dataset info
41-
self.datadir = None
46+
self.data_dir, self.img_dir, self.ann_dir, self.ann_file = None, None, None, None
47+
self.data = DataConfig(data_dir=None, img_dir=None, ann_file=None, ann_dir=None)
4248
self.background_class = 0
4349
self.image_size = 376, 672
4450

@@ -85,8 +91,25 @@ def update_from_args(self, args):
8591
getattr(self, key).assign(args[key])
8692
else:
8793
setattr(self, key, args[key])
94+
95+
# Set the config on the data class
96+
97+
98+
self.data = DataConfig(
99+
data_dir=self.data_dir,
100+
img_dir=self.img_dir,
101+
ann_file=self.ann_file,
102+
ann_dir=self.ann_dir
103+
)
104+
88105

106+
class DataConfig():
89107

108+
def __init__(self, data_dir=None, img_dir=None, ann_file=None, ann_dir=None):
109+
self.data_dir = data_dir
110+
self.img_dir = os.path.join(data_dir, img_dir) if data_dir is not None and img_dir is not None else None
111+
self.ann_file = os.path.join(self.data_dir, ann_file) if ann_file is not None else None
112+
self.ann_dir = os.path.join(self.data_dir, ann_dir) if ann_dir is not None else None
90113

91114

92115
if __name__ == "__main__":

eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def eval_model(model, config, class_names, valid_dt):
7373
# Load the model with the new layers to finetune
7474
detr = build_model(config)
7575

76-
valid_dt, class_names = load_coco_dataset("val", 1, config, augmentation=None)
76+
valid_dt, class_names = load_coco_dataset(config, 1, augmentation=None)
7777

7878
# Run training
7979
eval_model(detr, config, class_names, valid_dt)

finetune_hardhat.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ def run_finetuning(config):
4949
detr = build_model(config)
5050

5151
# Load the training and validation dataset and exclude the person class
52-
train_dt, class_names = load_tfcsv_dataset("train", config.batch_size, config, augmentation=True, exclude=["person"])
53-
valid_dt, _ = load_tfcsv_dataset("test", 4, config, augmentation=False, exclude=["person"])
52+
train_dt, class_names = load_tfcsv_dataset(
53+
config, config.batch_size, augmentation=True, exclude=["person"], ann_file="train/_annotations.csv", img_dir="train")
54+
valid_dt, _ = load_tfcsv_dataset(
55+
config, 4, augmentation=False, exclude=["person"], ann_file="test/_annotations.csv", img_dir="test")
5456

5557
# Train/finetune the transformers only
5658
config.train_backbone = tf.Variable(False)

finetune_voc.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,10 @@ def run_finetuning(config):
6767
# Load the model with the new layers to finetune
6868
detr = build_model(config)
6969

70-
# Load the training and validation dataset
71-
train_dt, class_names = load_voc_dataset("train", VOC_CLASS_NAME, config.batch_size, config, augmentation=True)
72-
valid_dt, _ = load_voc_dataset("val", VOC_CLASS_NAME, 1, config, augmentation=False)
70+
# Load the training and validation dataset (for the purpose of this example we're gonna load the training
71+
# as the validation, but in practise you should have different folder loader for the training and the validation)
72+
train_dt, class_names = load_voc_dataset(config, config.batch_size, augmentation=True)
73+
valid_dt, _ = load_voc_dataset(config, 1, augmentation=False)
7374

7475
# Train/finetune the transformers only
7576
config.train_backbone = tf.Variable(False)

notebooks/DETR Tensorflow - How to setup a custom dataset.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@
859859
"name": "python",
860860
"nbconvert_exporter": "python",
861861
"pygments_lexer": "ipython3",
862-
"version": "3.7.7"
862+
"version": "3.8.3"
863863
}
864864
},
865865
"nbformat": 4,

0 commit comments

Comments
 (0)