in downstream/semseg/datasets/evaluation/scannet_benchmark_utils/scripts/evaluate_semantic_instance.py [0:0]
def evaluate_matches(self, matches):
# results: class x overlap
ap = np.zeros( (len(self.distance_threshes) , len(self.CLASS_LABELS) , len(self.overlaps)) , np.float )
for di, (min_region_size, distance_thresh, distance_conf) in enumerate(zip(self.min_region_sizes, self.distance_threshes, self.distance_confs)):
for oi, overlap_th in enumerate(self.overlaps):
pred_visited = {}
for m in matches:
for p in matches[m]['pred']:
for label_name in self.CLASS_LABELS:
for p in matches[m]['pred'][label_name]:
if 'filename' in p:
pred_visited[p['filename']] = False
for li, label_name in enumerate(self.CLASS_LABELS):
y_true = np.empty(0)
y_score = np.empty(0)
hard_false_negatives = 0
has_gt = False
has_pred = False
for m in matches:
pred_instances = matches[m]['pred'][label_name]
gt_instances = matches[m]['gt'][label_name]
# filter groups in ground truth
gt_instances = [ gt for gt in gt_instances if gt['instance_id']>=1000 and gt['vert_count']>=min_region_size and gt['med_dist']<=distance_thresh and gt['dist_conf']>=distance_conf ]
if gt_instances:
has_gt = True
if pred_instances:
has_pred = True
cur_true = np.ones ( len(gt_instances) )
cur_score = np.ones ( len(gt_instances) ) * (-float("inf"))
cur_match = np.zeros( len(gt_instances) , dtype=np.bool )
# collect matches
for (gti,gt) in enumerate(gt_instances):
found_match = False
num_pred = len(gt['matched_pred'])
for pred in gt['matched_pred']:
# greedy assignments
if pred_visited[pred['filename']]:
continue
overlap = float(pred['intersection']) / (gt['vert_count']+pred['vert_count']-pred['intersection'])
if overlap > overlap_th:
confidence = pred['confidence']
# if already have a prediction for this gt,
# the prediction with the lower score is automatically a false positive
if cur_match[gti]:
max_score = max( cur_score[gti] , confidence )
min_score = min( cur_score[gti] , confidence )
cur_score[gti] = max_score
# append false positive
cur_true = np.append(cur_true,0)
cur_score = np.append(cur_score,min_score)
cur_match = np.append(cur_match,True)
# otherwise set score
else:
found_match = True
cur_match[gti] = True
cur_score[gti] = confidence
pred_visited[pred['filename']] = True
if not found_match:
hard_false_negatives += 1
# remove non-matched ground truth instances
cur_true = cur_true [ cur_match==True ]
cur_score = cur_score[ cur_match==True ]
# collect non-matched predictions as false positive
for pred in pred_instances:
found_gt = False
for gt in pred['matched_gt']:
overlap = float(gt['intersection']) / (gt['vert_count']+pred['vert_count']-gt['intersection'])
if overlap > overlap_th:
found_gt = True
break
if not found_gt:
num_ignore = pred['void_intersection']
for gt in pred['matched_gt']:
# group?
if gt['instance_id'] < 1000:
num_ignore += gt['intersection']
# small ground truth instances
if gt['vert_count'] < min_region_size or gt['med_dist']>distance_thresh or gt['dist_conf']<distance_conf:
num_ignore += gt['intersection']
proportion_ignore = float(num_ignore)/pred['vert_count']
# if not ignored append false positive
if proportion_ignore <= overlap_th:
cur_true = np.append(cur_true,0)
confidence = pred["confidence"]
cur_score = np.append(cur_score,confidence)
# append to overall results
y_true = np.append(y_true,cur_true)
y_score = np.append(y_score,cur_score)
# compute average precision
if has_gt and has_pred:
# compute precision recall curve first
# sorting and cumsum
score_arg_sort = np.argsort(y_score)
y_score_sorted = y_score[score_arg_sort]
y_true_sorted = y_true[score_arg_sort]
y_true_sorted_cumsum = np.cumsum(y_true_sorted)
# unique thresholds
(thresholds,unique_indices) = np.unique( y_score_sorted , return_index=True )
num_prec_recall = len(unique_indices) + 1
# prepare precision recall
num_examples = len(y_score_sorted)
try:
num_true_examples = y_true_sorted_cumsum[-1]
except:
num_true_examples = 0
precision = np.zeros(num_prec_recall)
recall = np.zeros(num_prec_recall)
# deal with the first point
y_true_sorted_cumsum = np.append( y_true_sorted_cumsum , 0 )
# deal with remaining
for idx_res,idx_scores in enumerate(unique_indices):
cumsum = y_true_sorted_cumsum[idx_scores-1]
tp = num_true_examples - cumsum
fp = num_examples - idx_scores - tp
fn = cumsum + hard_false_negatives
p = float(tp)/(tp+fp)
r = float(tp)/(tp+fn)
precision[idx_res] = p
recall [idx_res] = r
# first point in curve is artificial
precision[-1] = 1.
recall [-1] = 0.
# compute average of precision-recall curve
recall_for_conv = np.copy(recall)
recall_for_conv = np.append(recall_for_conv[0], recall_for_conv)
recall_for_conv = np.append(recall_for_conv, 0.)
stepWidths = np.convolve(recall_for_conv,[-0.5,0,0.5],'valid')
# integrate is now simply a dot product
ap_current = np.dot(precision, stepWidths)
elif has_gt:
ap_current = 0.0
else:
ap_current = float('nan')
ap[di,li,oi] = ap_current
return ap