Add evaluation code.

sfwang · sfwang · commit 94011c624281 · 2019-05-02T13:45:39.000-04:00
diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
@@ -10,62 +10,30 @@
 
 import numpy as np
 
-from core.inference import get_max_preds
+from scipy.optimize import linear_sum_assignment
 
+from lib.core.inference import get_final_preds
 
-def calc_dists(preds, target, normalize):
+
+def calc_dists(preds, target):
     preds = preds.astype(np.float32)
     target = target.astype(np.float32)
-    dists = np.zeros((preds.shape[1], preds.shape[0]))
-    for n in range(preds.shape[0]):
-        for c in range(preds.shape[1]):
-            if target[n, c, 0] > 1 and target[n, c, 1] > 1:
-                normed_preds = preds[n, c, :] / normalize[n]
-                normed_targets = target[n, c, :] / normalize[n]
-                dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
-            else:
-                dists[c, n] = -1
-    return dists
 
+    dists = np.sqrt(((preds.reshape(preds.shape[0], 1, preds.shape[1]) - \
+        target.reshape(1, target.shape[0], target.shape[1])) ** 2) \
+        .sum(axis=-1))
+
+    return dists
 
-def dist_acc(dists, thr=0.5):
-    ''' Return percentage below threshold while ignoring values with a -1 '''
-    dist_cal = np.not_equal(dists, -1)
-    num_dist_cal = dist_cal.sum()
-    if num_dist_cal > 0:
-        return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
-    else:
-        return -1
 
+def calc_tp_fp_fn(preds, target, hit_thr=2):
 
-def accuracy(output, target, hm_type='gaussian', thr=0.5):
-    '''
-    Calculate accuracy according to PCK,
-    but uses ground truth heatmap rather than x,y locations
-    First value to be returned is average accuracy across 'idxs',
-    followed by individual accuracies
-    '''
-    idx = list(range(output.shape[1]))
-    norm = 1.0
-    if hm_type == 'gaussian':
-        pred, _ = get_max_preds(output)
-        target, _ = get_max_preds(target)
-        h = output.shape[2]
-        w = output.shape[3]
-        norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
-    dists = calc_dists(pred, target, norm)
+    dists = calc_dists(preds, target)
 
-    acc = np.zeros((len(idx) + 1))
-    avg_acc = 0
-    cnt = 0
+    row_ind, col_ind = linear_sum_assignment(dists)
 
-    for i in range(len(idx)):
-        acc[i + 1] = dist_acc(dists[idx[i]])
-        if acc[i + 1] >= 0:
-            avg_acc = avg_acc + acc[i + 1]
-            cnt += 1
+    tp = np.sum(dists[row_ind, col_ind] <= hit_thr)
+    fp = dists.shape[0] - tp
+    fn = dists.shape[1] - tp
 
-    avg_acc = avg_acc / cnt if cnt != 0 else 0
-    if cnt != 0:
-        acc[0] = avg_acc
-    return acc, avg_acc, cnt, pred
+    return tp, fp, fn
diff --git a/lib/core/function.py b/lib/core/function.py
@@ -17,8 +17,8 @@
 import torch.autograd as autograd
 
 from lib.core.config import get_model_name
-# from lib.core.evaluate import accuracy
-# from core.inference import get_final_preds, get_final_integral_preds
+from lib.core.evaluate import calc_tp_fp_fn
+from lib.core.inference import get_final_preds
 # from utils.transforms import flip_back
 # from utils.vis import save_debug_images
 # from utils.vis_plain_keypoint import vis_mpii_keypoints
@@ -113,6 +113,9 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
     # switch to evaluate mode
     model.eval()
 
+    all_preds = []
+    all_gts = []
+
     with torch.no_grad():
         end = time.time()
         for i, (input, target, meta) in enumerate(val_loader):
@@ -127,6 +130,13 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
             losses.update(loss.item(), num_images)
             # _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
             #                                  target.cpu().numpy())
+            preds = get_final_preds(output.detach().cpu().numpy())
+            all_preds.extend(preds)
+
+            sources = meta['sources'].clone().detach().cpu().numpy()
+            valid_source_nums = meta['valid_source_num'].clone().detach().cpu().numpy()
+            for j, gt in enumerate(sources):
+                all_gts.append(gt[:valid_source_nums[j], :])
 
             # acc.update(avg_acc, cnt)
 
@@ -168,12 +178,26 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
 
                 # prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i)
 
-        perf_indicator = losses.avg
+        # perf_indicator = losses.avg
+        total_tp = total_fp = total_fn = 0
+        for preds, target in zip(all_preds, all_gts):
+            tp, fp, fn = calc_tp_fp_fn(preds, target)
+            total_tp += tp
+            total_fp += fp
+            total_fn += fn
+
+        recall = total_tp / (total_tp + total_fn)
+        prec = total_tp / (total_tp + total_fp)
+
+        perf_indicator = 2 * prec * recall / (prec + recall)
 
         if writer_dict:
             writer = writer_dict['writer']
             global_steps = writer_dict['valid_global_steps']
             writer.add_scalar('valid_loss', losses.avg, global_steps)
+            writer.add_scalar('recall', recall, global_steps)
+            writer.add_scalar('precision', prec, global_steps)
+            writer.add_scalar('f_score', perf_indicator, global_steps)
 
             writer_dict['valid_global_steps'] = global_steps + 1
 
diff --git a/lib/core/inference.py b/lib/core/inference.py
@@ -0,0 +1,67 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import cv2
+
+import numpy as np
+
+def get_final_preds(batch_heatmaps, score_thresh=0.5):
+    '''
+    get predictions from score maps
+    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
+    '''
+    assert isinstance(batch_heatmaps, np.ndarray), \
+        'batch_heatmaps should be numpy.ndarray'
+    assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+    assert batch_heatmaps.shape[1] == 1, 'batch_images must be single channel'
+
+    batch_size = batch_heatmaps.shape[0]
+    height = batch_heatmaps.shape[2]
+    width = batch_heatmaps.shape[3]
+
+    # local_max_heatmaps = np.zeros((batch_size, height, width, 1), dtype=float32)
+    batch_preds = []
+
+    for batch_idx in range(batch_size):
+        heatmaps = batch_heatmaps[batch_idx]
+        heatmaps = heatmaps.squeeze()
+
+        heatmaps_padded = cv2.copyMakeBorder(heatmaps, 1, 1, 1, 1, cv2.BORDER_REPLICATE)
+
+        local_max = np.ones(heatmaps.shape, dtype=np.bool)
+
+        for n_idx in range(9):
+            if n_idx == 4:
+                continue
+
+            neighbors = heatmaps_padded[
+                (n_idx % 3):(n_idx % 3 + height),
+                (n_idx // 3):(n_idx // 3 + width)
+            ]
+
+            local_max = np.logical_and(
+                local_max,
+                neighbors <= heatmaps
+            )
+
+        # heatmaps[np.logical_not(local_max)] = 0.0
+
+        # local_max_heatmaps[batch_idx] = heatmaps
+
+        rows, cols = np.where(np.logical_and(local_max, heatmaps > score_thresh))
+
+        preds = np.vstack((cols, rows)).transpose()
+
+        batch_preds.append(preds)
+
+
+    return batch_preds
diff --git a/lib/dataset/SimulatedDataset.py b/lib/dataset/SimulatedDataset.py
@@ -7,12 +7,12 @@
 
 from torch.utils.data import Dataset
 
-from lib.utils.utils import get_source, evalpotential, generate_heatmaps
+from lib.utils.utils import get_source, get_mesh, evalpotential, generate_heatmaps
 
 logger = logging.getLogger(__name__)
 
 class SimulatedDataset(Dataset):
-    def __init__(self, cfg, root, image_set, is_train, transform):
+    def __init__(self, cfg, root, image_set, is_train, transform, n_sources=30):
         self.cfg = cfg
         self.is_train = is_train
 
@@ -23,9 +23,13 @@ def __init__(self, cfg, root, image_set, is_train, transform):
 
         self.transform = transform
 
+        self.n_sources = n_sources  # only used at test time
+
         self.patch_width = cfg.MODEL.IMAGE_SIZE[0]
         self.patch_height = cfg.MODEL.IMAGE_SIZE[1]
 
+        self.mesh = get_mesh(self.patch_width, self.patch_height)
+
         self.db = self._get_db()
         self.db_length = len(self.db)
 
@@ -34,7 +38,7 @@ def __init__(self, cfg, root, image_set, is_train, transform):
     def __getitem__(self, idx):
         the_db = copy.deepcopy(self.db[idx])
 
-        image = evalpotential(the_db['mesh'], the_db['sources'])
+        image = evalpotential(self.mesh, the_db['sources'])
 
         image = image.reshape((self.patch_height, self.patch_width)) + \
             self.cfg.MODEL.VAR_NOISE*np.random.randn(self.patch_height, self.patch_width)
@@ -53,31 +57,30 @@ def __getitem__(self, idx):
             self.cfg.MODEL.OUTPUT_SIZE[1]
         )
 
+        valid_source_num = unnormalized_sources.shape[0]
+
+        if valid_source_num < 64:
+            unnormalized_sources = np.concatenate(
+                (unnormalized_sources,
+                -np.ones((64 - valid_source_num, 2))),
+                axis=0
+            )
+
         meta = {
-            'sources': unnormalized_sources
+            'sources': unnormalized_sources,
+            'valid_source_num': valid_source_num
         }
 
         return image, heatmap_target, meta
 
     def _get_db(self):
         gt_db = []
         self.db_length = int(self.cfg.TRAIN.NUM_SAMPLES) if self.is_train else int(self.cfg.TEST.NUM_SAMPLES)
-        n_sources = 30
         for i in range(self.db_length):
-            mesh, sources = get_source(self.patch_width, self.patch_height,
-                self.cfg.MODEL.DEPTH, n_sources, self.cfg.MODEL.VAR_NOISE)
-
-            # image.view(-1).repeat(3).view(self.patch_width, self.patch_height, 3)
-            # image = np.expand_dims(image, axis=-1).repeat(3, axis=-1)
-
-            # image = self.transform(image)
-
-            # sources = sources.transpose()
-
-            # heatmap_target = generate_heatmaps(sources, self.patch_height, self.patch_width)
+            n_sources = np.random.randint(1, 65) if self.is_train else self.n_sources
+            sources = get_source(self.cfg.MODEL.DEPTH, n_sources, self.cfg.MODEL.VAR_NOISE)
 
             gt_db.append({
-                'mesh': mesh,
                 'sources': sources
             })
 
diff --git a/lib/utils/utils.py b/lib/utils/utils.py
@@ -120,7 +120,7 @@ def evalpotential(sites_locations, sources):
     #     recordings[i] = sum(sources[0,:]/dis_sources);
     return recordings
 
-def get_source(x_mesh, y_mesh, depth, n_sources, var_noise):
+def get_mesh(x_mesh, y_mesh):
 
     d = x_mesh*y_mesh
     x = np.linspace(0, 1, x_mesh)
@@ -129,12 +129,16 @@ def get_source(x_mesh, y_mesh, depth, n_sources, var_noise):
     Xsim = np.reshape(Xsim, [d])
     Ysim = np.reshape(Ysim, [d])
     mesh = np.array([Xsim,Ysim,np.zeros(d)]);
+    return mesh
+
+def get_source(depth, n_sources, var_noise):
+
     sources = np.random.rand(4, n_sources);
     sources[3, :] = depth
     sources[0, :] = 2*np.floor(2*sources[0, :])-1;
     # image = evalpotential(mesh, sources);
     # image = image.reshape((y_mesh, x_mesh)) + var_noise*np.random.randn(y_mesh, x_mesh)
-    return mesh, sources
+    return sources
 
 def generate_heatmaps(keypoints, im_height, im_width):
     heatmaps = np.zeros((1, int(im_height), int(im_width)), dtype=np.float32)
diff --git a/source_detection/train.py b/source_detection/train.py
@@ -163,7 +163,7 @@ def main():
         pin_memory=True
     )
 
-    best_perf = 1000.0
+    best_perf = 0.0
     best_model = False
     for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
         lr_scheduler.step()
@@ -178,7 +178,7 @@ def main():
                                   criterion, final_output_dir, tb_log_dir,
                                   writer_dict)
 
-        if perf_indicator < best_perf:
+        if perf_indicator > best_perf:
             best_perf = perf_indicator
             best_model = True
         else:
diff --git a/source_detection/validate.py b/source_detection/validate.py