diff --git a/README.md b/README.md index 9f64852ca..9cd3e3272 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ + +
+ +
@@ -246,4 +250,4 @@ It is hoped that every AI practitioner in the world will stick to the concept of
没有孙剑博士的指导,YOLOX也不会问世并开源给社区使用。 孙剑博士的离去是CV领域的一大损失,我们在此特别添加了这个部分来表达对我们的“船长”孙老师的纪念和哀思。 -希望世界上的每个AI从业者秉持着“持续创新拓展认知边界,非凡科技成就产品价值”的观念,一路向前。 \ No newline at end of file +希望世界上的每个AI从业者秉持着“持续创新拓展认知边界,非凡科技成就产品价值”的观念,一路向前。 diff --git a/datasets/pedestrian_coco/annotations/train_annotations.json b/datasets/pedestrian_coco/annotations/train_annotations.json new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_coco/annotations/valid_annotations.json b/datasets/pedestrian_coco/annotations/valid_annotations.json new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_coco/train/README.md b/datasets/pedestrian_coco/train/README.md new file mode 100644 index 000000000..7b9800885 --- /dev/null +++ b/datasets/pedestrian_coco/train/README.md @@ -0,0 +1 @@ +put the train images \ No newline at end of file diff --git a/datasets/pedestrian_coco/valid/README.md b/datasets/pedestrian_coco/valid/README.md new file mode 100644 index 000000000..657f11b45 --- /dev/null +++ b/datasets/pedestrian_coco/valid/README.md @@ -0,0 +1 @@ +put the valid images \ No newline at end of file diff --git a/datasets/pedestrian_voc/Annotations/README.md b/datasets/pedestrian_voc/Annotations/README.md new file mode 100644 index 000000000..2b9cfa086 --- /dev/null +++ b/datasets/pedestrian_voc/Annotations/README.md @@ -0,0 +1 @@ +put the train and valid annotations \ No newline at end of file diff --git a/datasets/pedestrian_voc/ImageSets/Main/train.txt b/datasets/pedestrian_voc/ImageSets/Main/train.txt new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_voc/ImageSets/Main/valid.txt b/datasets/pedestrian_voc/ImageSets/Main/valid.txt new file mode 100644 index 000000000..e69de29bb diff --git a/datasets/pedestrian_voc/JPEGImages/README.md b/datasets/pedestrian_voc/JPEGImages/README.md new file mode 100644 index 000000000..62ac03c99 --- /dev/null +++ b/datasets/pedestrian_voc/JPEGImages/README.md @@ -0,0 +1 @@ +put the train and valid images \ No newline at end of file diff --git a/exps/example/custom/nano.py b/exps/example/custom/coco_format/yolox_nano.py similarity index 100% rename from exps/example/custom/nano.py rename to exps/example/custom/coco_format/yolox_nano.py diff --git a/exps/example/custom/yolox_s.py b/exps/example/custom/coco_format/yolox_s.py similarity index 100% rename from exps/example/custom/yolox_s.py rename to exps/example/custom/coco_format/yolox_s.py diff --git a/exps/example/custom/voc_format/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc b/exps/example/custom/voc_format/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc new file mode 100644 index 000000000..0b3dc1929 Binary files /dev/null and b/exps/example/custom/voc_format/yolox_voc_nano/__pycache__/yolox_voc_nano.cpython-38.pyc differ diff --git a/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py new file mode 100644 index 000000000..0fb62da78 --- /dev/null +++ b/exps/example/custom/voc_format/yolox_voc_nano/yolox_voc_nano.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch +import torch.distributed as dist + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 20 + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 + self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py b/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py new file mode 100644 index 000000000..11b82f646 --- /dev/null +++ b/exps/example/custom/voc_format/yolox_voc_nano_adam/yolox_voc_nano_adam.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch +import torch.distributed as dist + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 20 + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.eps = 1e-8 + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 + self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_optimizer(self, batch_size): + if "optimizer" not in self.__dict__: + if self.warmup_epochs > 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.Adam( + pg0, lr=lr, eps=self.eps, amsgrad=False + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + return self.optimizer + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/custom/voc_format/yolox_voc_s/__pycache__/yolox_voc_s.cpython-38.pyc b/exps/example/custom/voc_format/yolox_voc_s/__pycache__/yolox_voc_s.cpython-38.pyc new file mode 100644 index 000000000..cea337ed0 Binary files /dev/null and b/exps/example/custom/voc_format/yolox_voc_s/__pycache__/yolox_voc_s.cpython-38.pyc differ diff --git a/exps/example/yolox_voc/yolox_voc_s.py b/exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py similarity index 100% rename from exps/example/yolox_voc/yolox_voc_s.py rename to exps/example/custom/voc_format/yolox_voc_s/yolox_voc_s.py diff --git a/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py new file mode 100644 index 000000000..d8ffbdac9 --- /dev/null +++ b/exps/example/custom/voc_format/yolox_voc_tiny/yolox_voc_tiny.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch +import torch.distributed as dist + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.375 + self.scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 + self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'trainval'), ('2012', 'trainval')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"), + image_sets=[('2007', 'test')], + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/yolox_pedestrian/coco_format/__pycache__/nano.cpython-38.pyc b/exps/example/yolox_pedestrian/coco_format/__pycache__/nano.cpython-38.pyc new file mode 100644 index 000000000..c5a2714b6 Binary files /dev/null and b/exps/example/yolox_pedestrian/coco_format/__pycache__/nano.cpython-38.pyc differ diff --git a/exps/example/yolox_pedestrian/coco_format/yolox_nano.py b/exps/example/yolox_pedestrian/coco_format/yolox_nano.py new file mode 100644 index 000000000..90cc639ad --- /dev/null +++ b/exps/example/yolox_pedestrian/coco_format/yolox_nano.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch.nn as nn + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False + + # Define yourself dataset path + self.data_dir = "datasets/pedestrian_coco" + self.train_ann = "train_annotations.json" + self.val_ann = "valid_annotations.json" + + self.num_classes = 1 + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model diff --git a/exps/example/yolox_pedestrian/coco_format/yolox_s.py b/exps/example/yolox_pedestrian/coco_format/yolox_s.py new file mode 100644 index 000000000..97291a30d --- /dev/null +++ b/exps/example/yolox_pedestrian/coco_format/yolox_s.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + # Define yourself dataset path + self.data_dir = "datasets/pedestrian_coco" + self.train_ann = "train_annotations.json" + self.val_ann = "valid_annotations.json" + + self.num_classes = 1 + + self.max_epoch = 10 + self.data_num_workers = 4 + self.eval_interval = 1 diff --git a/exps/example/yolox_pedestrian/voc_format/__pycache__/yolox_voc_nano.cpython-38.pyc b/exps/example/yolox_pedestrian/voc_format/__pycache__/yolox_voc_nano.cpython-38.pyc new file mode 100644 index 000000000..4472ab2e8 Binary files /dev/null and b/exps/example/yolox_pedestrian/voc_format/__pycache__/yolox_voc_nano.cpython-38.pyc differ diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py new file mode 100644 index 000000000..c38679cca --- /dev/null +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch +import torch.distributed as dist + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.25 + #self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + #self.test_size = (416, 416) + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 + self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py new file mode 100644 index 000000000..fb5cb1e60 --- /dev/null +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_nano_adam.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +import torch +import torch.distributed as dist + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.25 + self.input_size = (416, 416) + self.mosaic_scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.eps = 1e-8 + self.warmup_epochs = 1 + # ---------- transform config ------------ # + #self.mosaic_prob = 1.0 + self.enable_mixup = False + #self.mixup_prob = 1.0 + #self.hsv_prob = 1.0 + #self.flip_prob = 0.5 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_optimizer(self, batch_size): + if "optimizer" not in self.__dict__: + if self.warmup_epochs > 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.Adam( + pg0, lr=lr, eps=self.eps, amsgrad=False + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + return self.optimizer + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py b/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py new file mode 100644 index 000000000..4801f559d --- /dev/null +++ b/exps/example/yolox_pedestrian/voc_format/yolox_voc_s.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os + +import torch +import torch.distributed as dist + +from yolox.data import get_yolox_datadir +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.50 + self.warmup_epochs = 1 + + # ---------- transform config ------------ # + self.mosaic_prob = 1.0 + self.mixup_prob = 1.0 + self.hsv_prob = 1.0 + self.flip_prob = 0.5 + + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False): + from yolox.data import ( + VOCDetection, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('train')], + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + cache=cache_img, + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=120, + flip_prob=self.flip_prob, + hsv_prob=self.hsv_prob), + degrees=self.degrees, + translate=self.translate, + mosaic_scale=self.mosaic_scale, + mixup_scale=self.mixup_scale, + shear=self.shear, + enable_mixup=self.enable_mixup, + mosaic_prob=self.mosaic_prob, + mixup_prob=self.mixup_prob, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.data import VOCDetection, ValTransform + + valdataset = VOCDetection( + data_dir=os.path.join(get_yolox_datadir(), "pedestrian_voc"), + image_sets=[('valid')], + img_size=self.test_size, + preproc=ValTransform(legacy=legacy), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False): + from yolox.evaluators import VOCEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy) + evaluator = VOCEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + ) + return evaluator diff --git a/requirements.txt b/requirements.txt index 7227f09b4..46efe646f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,6 @@ tabulate # verified versions # pycocotools corresponds to https://github.com/ppwwyyxx/cocoapi pycocotools>=2.0.2 -onnx==1.8.1 -onnxruntime==1.8.0 +onnx>=1.8.1 +onnxruntime>=1.8.0 onnx-simplifier==0.3.5 diff --git a/tools/demo.py b/tools/demo.py index b16598d5f..dc065ca20 100644 --- a/tools/demo.py +++ b/tools/demo.py @@ -12,7 +12,7 @@ import torch from yolox.data.data_augment import ValTransform -from yolox.data.datasets import COCO_CLASSES +from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES from yolox.exp import get_exp from yolox.utils import fuse_model, get_model_info, postprocess, vis @@ -169,6 +169,29 @@ def visual(self, output, img_info, cls_conf=0.35): ratio = img_info["ratio"] img = img_info["raw_img"] if output is None: + font = cv2.FONT_HERSHEY_SIMPLEX + class_count = {} + class_AP = {} + for i in self.cls_names: + class_count[i] = 0 + class_AP[i] = 0.0 + x0 = 15 + y0 = 0 + row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) return img output = output.cpu() diff --git a/tools/demo_sliding_window.py b/tools/demo_sliding_window.py new file mode 100644 index 000000000..7a0e20f00 --- /dev/null +++ b/tools/demo_sliding_window.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import argparse +import os +import time +from loguru import logger + +import cv2 + +import torch + +from yolox.data.data_augment import preproc, sliding_window +from yolox.data.datasets import COCO_CLASSES, VOC_CLASSES +from yolox.exp import get_exp +from yolox.utils import fuse_model, get_model_info, postprocess, vis + +IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Demo!") + parser.add_argument( + "demo", default="image", help="demo type, eg. image, video and webcam" + ) + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + parser.add_argument( + "--path", default="./assets/dog.jpg", help="path to images or video" + ) + parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id") + parser.add_argument( + "--save_result", + action="store_true", + help="whether to save the inference result of image/video", + ) + + # exp file + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="please input your experiment description file", + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument( + "--device", + default="cpu", + type=str, + help="device to run our model, can either be cpu or gpu", + ) + parser.add_argument("--conf", default=0.3, type=float, help="test conf") + parser.add_argument("--nms", default=0.3, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + """ + parser.add_argument( + "--legacy", + dest="legacy", + default=False, + action="store_true", + help="To be compatible with older versions", + ) + """ + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + return parser + + +def get_image_list(path): + image_names = [] + for maindir, subdir, file_name_list in os.walk(path): + for filename in file_name_list: + apath = os.path.join(maindir, filename) + ext = os.path.splitext(apath)[1] + if ext in IMAGE_EXT: + image_names.append(apath) + return image_names + + +class Predictor(object): + def __init__( + self, + model, + exp, + cls_names=COCO_CLASSES, + trt_file=None, + decoder=None, + device="cpu", + #fp16=False, + #legacy=False, + ): + self.model = model + self.cls_names = cls_names + self.decoder = decoder + self.num_classes = exp.num_classes + self.confthre = exp.test_conf + self.nmsthre = exp.nmsthre + self.test_size = exp.test_size + self.device = device + #self.fp16 = fp16 + #self.preproc = ValTransform(legacy=legacy) + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() + self.model(x) + self.model = model_trt + self.rgb_means = (0.485, 0.456, 0.406) + self.std = (0.229, 0.224, 0.225) + + + def inference(self, img): + img_info = {"id": 0} + if isinstance(img, str): + img_info["file_name"] = os.path.basename(img) + img = cv2.imread(img) + else: + img_info["file_name"] = None + + height, width = img.shape[:2] + img_info["height"] = height + img_info["width"] = width + img_info["raw_img"] = img + + #img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,value=(0,0,0)) + #ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1]) + #img_info["ratio"] = ratio + """ + if (img.shape[0]>exp.test_size[0]): + h_r = (img.shape[0]//exp.test_size[0]+1)*exp.test_size[0]-img.shape[0] + elif(img.shape[0]exp.test_size[1]): + w_r = (img.shape[1]//exp.test_size[1]+1)*exp.test_size[1]-img.shape[1] + elif(img.shape[1]=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + return img + output = output.cpu() + + bboxes = output[:, 0:4] + + # preprocessing: resize + #bboxes /= ratio + + cls = output[:, 6] + scores = output[:, 4] * output[:, 5] + + vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names) + return vis_res + + +def image_demo(predictor, vis_folder, path, current_time, save_result): + if os.path.isdir(path): + files = get_image_list(path) + else: + files = [path] + files.sort() + for image_name in files: + outputs, img_info = predictor.inference(image_name) + result_image = predictor.visual(outputs[0], img_info, predictor.confthre) + if save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + save_file_name = os.path.join(save_folder, os.path.basename(image_name)) + logger.info("Saving detection result in {}".format(save_file_name)) + cv2.imwrite(save_file_name, result_image) + ch = cv2.waitKey(0) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + + +def imageflow_demo(predictor, vis_folder, current_time, args): + cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float + fps = cap.get(cv2.CAP_PROP_FPS) + if args.save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + if args.demo == "video": + save_path = os.path.join(save_folder, os.path.basename(args.path)) + else: + save_path = os.path.join(save_folder, "camera.mp4") + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) + while True: + ret_val, frame = cap.read() + if ret_val: + outputs, img_info = predictor.inference(frame) + result_frame = predictor.visual(outputs[0], img_info, predictor.confthre) + if args.save_result: + vid_writer.write(result_frame) + else: + cv2.namedWindow("yolox", cv2.WINDOW_NORMAL) + cv2.imshow("yolox", result_frame) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + else: + break + + +def main(exp, args): + if not args.experiment_name: + args.experiment_name = exp.exp_name + + file_name = os.path.join(exp.output_dir, args.experiment_name) + os.makedirs(file_name, exist_ok=True) + + vis_folder = None + if args.save_result: + vis_folder = os.path.join(file_name, "vis_res") + os.makedirs(vis_folder, exist_ok=True) + + if args.trt: + args.device = "gpu" + + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + + if args.device == "gpu": + model.cuda() + if args.fp16: + model.half() # to FP16 + model.eval() + + if not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + ckpt = torch.load(ckpt_file, map_location="cpu") + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert not args.fuse, "TensorRT model is not support model fusing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run python3 tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + logger.info("Using TensorRT to inference") + else: + trt_file = None + decoder = None + + predictor = Predictor( + model, exp, COCO_CLASSES, trt_file, decoder, + args.device#, args.fp16, args.legacy, + ) + current_time = time.localtime() + if args.demo == "image": + image_demo(predictor, vis_folder, args.path, current_time, args.save_result) + elif args.demo == "video" or args.demo == "webcam": + imageflow_demo(predictor, vis_folder, current_time, args) + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + + main(exp, args) diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py index 21cd7b56d..f4da7580b 100644 --- a/yolox/data/data_augment.py +++ b/yolox/data/data_augment.py @@ -157,6 +157,13 @@ def preproc(img, input_size, swap=(2, 0, 1)): padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r +def sliding_window(image, YstepSize, XstepSize, windowSize): + # slide a window across the image + for y in range(0, image.shape[0], YstepSize): + for x in range(0, image.shape[1], XstepSize): + # yield the current window + yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]]) + class TrainTransform: def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): diff --git a/yolox/data/datasets/__init__.py b/yolox/data/datasets/__init__.py index dee2c9f48..6ea2be14f 100644 --- a/yolox/data/datasets/__init__.py +++ b/yolox/data/datasets/__init__.py @@ -6,4 +6,5 @@ from .coco_classes import COCO_CLASSES from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset from .mosaicdetection import MosaicDetection +from .voc_classes import VOC_CLASSES from .voc import VOCDetection diff --git a/yolox/data/datasets/coco.py b/yolox/data/datasets/coco.py index 4fbdf8836..5ead905e4 100644 --- a/yolox/data/datasets/coco.py +++ b/yolox/data/datasets/coco.py @@ -40,8 +40,8 @@ class COCODataset(Dataset): def __init__( self, data_dir=None, - json_file="instances_train2017.json", - name="train2017", + json_file="train_annotations.json", + name="train", img_size=(416, 416), preproc=None, cache=False, @@ -57,7 +57,7 @@ def __init__( """ super().__init__(img_size) if data_dir is None: - data_dir = os.path.join(get_yolox_datadir(), "COCO") + data_dir = os.path.join(get_yolox_datadir(), "pedestrian_coco") self.data_dir = data_dir self.json_file = json_file diff --git a/yolox/data/datasets/coco_classes.py b/yolox/data/datasets/coco_classes.py index 17f5cbe6e..760945eaf 100644 --- a/yolox/data/datasets/coco_classes.py +++ b/yolox/data/datasets/coco_classes.py @@ -2,6 +2,10 @@ # -*- coding:utf-8 -*- # Copyright (c) Megvii, Inc. and its affiliates. +COCO_CLASSES = ( + "pedestrian", +) +""" COCO_CLASSES = ( "person", "bicycle", @@ -84,3 +88,4 @@ "hair drier", "toothbrush", ) +""" diff --git a/yolox/data/datasets/voc.py b/yolox/data/datasets/voc.py index 56675a297..09e9833de 100644 --- a/yolox/data/datasets/voc.py +++ b/yolox/data/datasets/voc.py @@ -120,6 +120,13 @@ def __init__( self._imgpath = os.path.join("%s", "JPEGImages", "%s.jpg") self._classes = VOC_CLASSES self.ids = list() + for name in image_sets: + rootpath = self.root + for line in open( + os.path.join(rootpath, "ImageSets", "Main", name + ".txt") + ): + self.ids.append((rootpath, line.strip())) + """ for (year, name) in image_sets: self._year = year rootpath = os.path.join(self.root, "VOC" + year) @@ -127,7 +134,7 @@ def __init__( os.path.join(rootpath, "ImageSets", "Main", name + ".txt") ): self.ids.append((rootpath, line.strip())) - + """ self.annotations = self._load_coco_annotations() self.imgs = None if cache: @@ -279,7 +286,9 @@ def evaluate_detections(self, all_boxes, output_dir=None): def _get_voc_results_file_template(self): filename = "comp4_det_test" + "_{:s}.txt" - filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") + #filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") + filedir = os.path.join(self.root, "results") + #filedir = os.path.join(self.root, "results", "VOC" + self._year, "Main") if not os.path.exists(filedir): os.makedirs(filedir) path = os.path.join(filedir, filename) @@ -311,12 +320,14 @@ def _write_voc_results_file(self, all_boxes): ) def _do_python_eval(self, output_dir="output", iou=0.5): - rootpath = os.path.join(self.root, "VOC" + self._year) + #rootpath = os.path.join(self.root, "VOC" + self._year) + rootpath = self.root name = self.image_set[0][1] annopath = os.path.join(rootpath, "Annotations", "{:s}.xml") imagesetfile = os.path.join(rootpath, "ImageSets", "Main", name + ".txt") cachedir = os.path.join( - self.root, "annotations_cache", "VOC" + self._year, name + #self.root, "annotations_cache", "VOC" + self._year, name + self.root, "annotations_cache" ) if not os.path.exists(cachedir): os.makedirs(cachedir) diff --git a/yolox/data/datasets/voc_classes.py b/yolox/data/datasets/voc_classes.py index 89354b3fd..438c5b78b 100644 --- a/yolox/data/datasets/voc_classes.py +++ b/yolox/data/datasets/voc_classes.py @@ -3,6 +3,11 @@ # Copyright (c) Megvii, Inc. and its affiliates. # VOC_CLASSES = ( '__background__', # always index 0 + +VOC_CLASSES = ( + "pedestrian", +) +""" VOC_CLASSES = ( "aeroplane", "bicycle", @@ -25,3 +30,4 @@ "train", "tvmonitor", ) +""" diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 6e52e6eac..c96e195d0 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -275,7 +275,8 @@ def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=Fals valdataset = COCODataset( data_dir=self.data_dir, json_file=self.val_ann if not testdev else self.test_ann, - name="val2017" if not testdev else "test2017", + #name="val2017" if not testdev else "test2017", + name="valid" if not testdev else "test", img_size=self.test_size, preproc=ValTransform(legacy=legacy), ) diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py index e714a3ee7..16aa9dee5 100644 --- a/yolox/utils/visualize.py +++ b/yolox/utils/visualize.py @@ -9,6 +9,11 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): + class_count = {} + class_AP = {} + for j in class_names: + class_count[j] = 0 + class_AP[j] = 0 for i in range(len(boxes)): box = boxes[i] @@ -22,7 +27,7 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): y1 = int(box[3]) color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() - text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100) + text = '{:.1f}%'.format(score * 100)#'{}:{:.1f}%'.format(class_names[cls_id], score * 100) txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) font = cv2.FONT_HERSHEY_SIMPLEX @@ -37,8 +42,28 @@ def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): txt_bk_color, -1 ) + class_count[class_names[cls_id]] = class_count[class_names[cls_id]]+1 + class_AP[class_names[cls_id]] = class_AP[class_names[cls_id]]+float('{:.1f}'.format(score * 100)) cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) - + + x0 = 15 + y0 = 0 + row = 0 + for k in class_count: + if((y0+row+50)>=img.shape[0]): + x0 = x0+200 + y0 = 25 + row = 0 + else: + row = row+25 + cv2.putText(img, str(k)+": "+str(class_count[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + if class_count[k] !=0: + class_AP[k]=class_AP[k]/class_count[k] + else: + class_AP[k]=0.0 + row = row+25 + cv2.putText(img, "AP"+": "+'{:.1f}%'.format(class_AP[k]), (x0,y0+row), font, 0.8, (0, 255, 255), thickness=2) + return img