in mlebench/competitions/siim-covid19-detection/grade.py [0:0]
def calculate_map(submission_preds: List[List[Tuple]], answers_preds: List[List[Tuple]]) -> float:
"""
Calculate mean Average Precision (mAP) for object detection.
> The challenge uses the standard PASCAL VOC 2010 mean Average Precision (mAP) at IoU > 0.5.
Note that the linked document describes VOC 2012, which differs in some minor ways
(e.g. there is no concept of "difficult" classes in VOC 2010).
The P/R curve and AP calculations remain the same.
(www.kaggle.com/competitions/siim-covid19-detection/overview/evaluation)
Some choices made here that were not explicitly specified in the challenge description:
1. Treating "none" or "negative" as prediction classes of their own, instead of as the non-positive class
- Justification: Treating them as their own classes is implied by the data format:
- Study level - train_study_level.csv has 4 binary classes including the "negative" class instead of 3
- Image level - We're asked to predict "none" with full bounding boxes instead of withholding a prediction
Also, 3rd place winner says "It probably treats the six classes equally", where the six classes are
"negative", "typical", "indeterminate", "atypical", "none" and "opacity".
(https://www.kaggle.com/competitions/siim-covid19-detection/discussion/240363)
2. Rules for populating y_pairs (see comments below), in particular the (0, 0) case
- Justification: The general rules follow the descriptions of the PASCAL VOC 2010 mAP documented online.
The only custom addition is handling the edge case of (0, 0) false negatives, which was necessary because
if we don't include (0, 0) pairs, both the sample submission and gold submission end up with all values of
y_true being 1, so the AP is undefined.
Behavior of our implementation is consistent with this comment from the organizers:
https://www.kaggle.com/competitions/siim-covid19-detection/discussion/248467#1362916
"""
aps = []
# Group predictions by class - the general idea is to calculate AP for each class separately
# and then average them to get mAP
classes = sorted(list(set(pred[0] for preds in answers_preds for pred in preds)))
for cls in classes:
# We will populate y_true and y_scores with (y_true, y_score) of:
# (1, confidence) for every predicted box that matches a ground truth box
# (0, confidence) for every predicted box that does not match a ground truth box
# (1, 0) for every ground truth box that is not matched to a predicted box
# (0, 0) when there are neither predicted nor ground truth boxes
y_pairs = [] # List of (y_true, y_score) pairs
# Gather all predictions and ground truth boxes related to this class from all samples
for img_preds, img_gts in zip(submission_preds, answers_preds):
y_pairs_ = []
# Get ground truth boxes for this class
gt_boxes = [gt[2:] for gt in img_gts if gt[0] == cls]
# Sort img_preds by confidence
img_preds.sort(key=lambda x: x[1], reverse=True)
# For each prediction of this class
matched_gt = [False] * len(gt_boxes) # Initialize all ground truths as unmatched
for pred in img_preds:
if pred[0] == cls:
# Find the best matching ground truth box
best_iou = 0
best_gt_idx = -1
for i, gt in enumerate(gt_boxes):
if matched_gt[i]: # Don't reuse matched ground truths
continue
iou = calculate_iou(pred[2:], gt)
if iou > best_iou and iou > 0.5:
best_iou = iou
best_gt_idx = i
pred_confidence = pred[1]
if best_gt_idx != -1:
y_pairs_.append((1, pred_confidence)) # True positive
matched_gt[best_gt_idx] = True
else:
y_pairs_.append((0, pred_confidence)) # False positive
# Add false negatives for unmatched ground truths
y_pairs_.extend([(1, 0)] * matched_gt.count(False))
if len(y_pairs_) == 0:
# A true negative
y_pairs_.append((0, 0))
y_pairs.extend(y_pairs_)
y_true = [pair[0] for pair in y_pairs]
y_scores = [pair[1] for pair in y_pairs]
if len(y_true) > 0:
assert not all(
y == 1 for y in y_true
), "y_true is all 1s; this shouldn't happen and will result in undefined AP"
ap = average_precision_score(y_true, y_scores)
aps.append(ap)
else:
raise ValueError(f"Unexpected error: y_true is empty for class {cls}")
# Calculate mAP
return np.mean(aps) if aps else 0.0