in models/vision/detection/awsdet/core/evaluation/mean_ap.py [0:0]
def eval_map(det_results,
annotations,
scale_ranges=None,
iou_thr=0.5,
dataset=None,
logger=None,
nproc=4):
"""Evaluate mAP of a dataset.
Args:
det_results (list[list]): [[cls1_det, cls2_det, ...], ...].
The outer list indicates images, and the inner list indicates
per-class detected bboxes.
annotations (list[dict]): Ground truth annotations where each item of
the list indicates an image. Keys of annotations are:
- "bboxes": numpy array of shape (n, 4)
- "labels": numpy array of shape (n, )
- "bboxes_ignore" (optional): numpy array of shape (k, 4)
- "labels_ignore" (optional): numpy array of shape (k, )
scale_ranges (list[tuple] | None): Range of scales to be evaluated,
in the format [(min1, max1), (min2, max2), ...]. A range of
(32, 64) means the area range between (32**2, 64**2).
Default: None.
iou_thr (float): IoU threshold to be considered as matched.
Default: 0.5.
dataset (list[str] | str | None): Dataset name or dataset classes,
there are minor differences in metrics for different datsets, e.g.
"voc07", "imagenet_det", etc. Default: None.
logger (logging.Logger | str | None): The way to print the mAP
summary. See `awsdet.utils.print_log()` for details. Default: None.
nproc (int): Processes used for computing TP and FP.
Default: 4.
Returns:
tuple: (mAP, [dict, dict, ...])
"""
assert len(det_results) == len(annotations)
num_imgs = len(det_results)
num_scales = len(scale_ranges) if scale_ranges is not None else 1
num_classes = len(det_results[0]) # positive class num
area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges]
if scale_ranges is not None else None)
pool = Pool(nproc)
eval_results = []
for i in range(num_classes):
# get gt and det bboxes of this class
cls_dets, cls_gts, cls_gts_ignore = get_cls_results(
det_results, annotations, i)
# choose proper function according to datasets to compute tp and fp
if dataset in ['det', 'vid']:
tpfp_func = tpfp_imagenet
else:
tpfp_func = tpfp_default
# compute tp and fp for each image with multiple processes
tpfp = pool.starmap(
tpfp_func,
zip(cls_dets, cls_gts, cls_gts_ignore,
[iou_thr for _ in range(num_imgs)],
[area_ranges for _ in range(num_imgs)]))
tp, fp = tuple(zip(*tpfp))
# calculate gt number of each scale
# ignored gts or gts beyond the specific scale are not counted
num_gts = np.zeros(num_scales, dtype=int)
for j, bbox in enumerate(cls_gts):
if area_ranges is None:
num_gts[0] += bbox.shape[0]
else:
gt_areas = (bbox[:, 2] - bbox[:, 0]) * (
bbox[:, 3] - bbox[:, 1])
for k, (min_area, max_area) in enumerate(area_ranges):
num_gts[k] += np.sum((gt_areas >= min_area)
& (gt_areas < max_area))
# sort all det bboxes by score, also sort tp and fp
cls_dets = np.vstack(cls_dets)
num_dets = cls_dets.shape[0]
sort_inds = np.argsort(-cls_dets[:, -1])
tp = np.hstack(tp)[:, sort_inds]
fp = np.hstack(fp)[:, sort_inds]
# calculate recall and precision with tp and fp
tp = np.cumsum(tp, axis=1)
fp = np.cumsum(fp, axis=1)
eps = np.finfo(np.float32).eps
recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)
precisions = tp / np.maximum((tp + fp), eps)
# calculate AP
if scale_ranges is None:
recalls = recalls[0, :]
precisions = precisions[0, :]
num_gts = num_gts.item()
mode = 'area' if dataset != 'voc07' else '11points'
ap = average_precision(recalls, precisions, mode)
eval_results.append({
'num_gts': num_gts,
'num_dets': num_dets,
'recall': recalls,
'precision': precisions,
'ap': ap
})
if scale_ranges is not None:
# shape (num_classes, num_scales)
all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results])
all_num_gts = np.vstack(
[cls_result['num_gts'] for cls_result in eval_results])
mean_ap = []
for i in range(num_scales):
if np.any(all_num_gts[:, i] > 0):
mean_ap.append(all_ap[all_num_gts[:, i] > 0, i].mean())
else:
mean_ap.append(0.0)
else:
aps = []
for cls_result in eval_results:
if cls_result['num_gts'] > 0:
aps.append(cls_result['ap'])
mean_ap = np.array(aps).mean().item() if aps else 0.0
print_map_summary(
mean_ap, eval_results, dataset, area_ranges, logger=logger)
return mean_ap, eval_results