def calculate_average_precision()

in mapillary_vistas/evaluation/instance_specific_instance_level.py [0:0]


def calculate_average_precision(instance_specific_instance_information, labels, args):
    """
    Using the instance specific information, calculate the average precision
    over all images for each label.
    """

    # we ignore ground truths smaller than 100 pixels.
    min_size = 100
    # calculate AP for minimal overlap from 50% to 95% in 5% steps
    thresholds = np.arange(.5, 1, .05).tolist()
    label_ids = []
    for index, label in enumerate(labels):
        if not label['evaluate']:
            continue
        if not label['instances']:
            continue

        label_ids += [index]

    precisions = {}
    precisions_50 = {}

    iterator = itertools.product(thresholds, label_ids)
    # loop count for progress bar
    iteration_count = len(thresholds) * len(label_ids)
    for threshold, label_id in progress(iterator, total=iteration_count):
        # the metric is calculated independently for every label
        # In the inner loop we have to go over each image and check for instances of this label
        # Lastly we need to loop over all instances of the current label in the current image
        # However, to find the best match with the current overlap threshold, we need to iterate
        # over each ground truth - prediction combination, so there are another two loops.

        current_overlap_infos = []
        missed_ground_truths = 0
        found_ground_truths = 0
        found_predictions = 0
        for image_information in instance_specific_instance_information:
            # this image does not contain the current label
            if label_id not in image_information:
                continue

            image_information = image_information[label_id]
            ground_truth_ids = []
            for ground_truth_id in image_information['ground_truths'].keys():
                ground_truth_size = image_information['ground_truths'][ground_truth_id]['size']

                # check if the current ground truth object is big enough to matter
                if ground_truth_size >= min_size:
                    ground_truth_ids += [ground_truth_id]

            prediction_ids = image_information['predictions'].keys()

            # keep track of instance count
            found_ground_truths += len(ground_truth_ids)
            found_predictions += len(image_information['predictions'])

            # keep track of assigned predictions to determine false positives
            assigned_prediction_ids = []
            for ground_truth_id in ground_truth_ids:
                ground_truth_size = image_information['ground_truths'][ground_truth_id]['size']
                # if we do not have any overlaps for the current ground truth,
                # then it's definitely a false negative.
                if ground_truth_id not in image_information['ground_truth_overlaps']:
                    missed_ground_truths += 1
                    continue

                current_result = {}
                # for each gt instance check if any overlapping pred overlaps enough
                for prediction_id in image_information['ground_truth_overlaps'][ground_truth_id].keys():
                    overlap_information = image_information['ground_truth_overlaps'][ground_truth_id][prediction_id]
                    iou = overlap_information['iou']
                    confidence = image_information['predictions'][prediction_id]['confidence']
                    if iou < threshold:
                        continue

                    # keep track of assignments in current_result
                    # assigned stores whether a prediction is 'chosen' for the current ground truth
                    # in case another prediction matches with a higher confidence, assigned is set to False
                    is_best_match = True
                    for matching_prediction_id in current_result.keys():
                        if current_result[matching_prediction_id]['confidence'] < confidence:
                            current_result[matching_prediction_id]['assigned'] = False
                        else:
                            is_best_match = False

                    current_result[prediction_id] = {
                        'assigned': is_best_match,
                        'confidence': confidence,
                    }

                # there are overlaps, but no good ones..
                if len(current_result) == 0:
                    missed_ground_truths += 1

                # accumulate assignments and infos for all ground truth labels in the current image
                for prediction_id in prediction_ids:
                    if prediction_id in current_result:
                        result = current_result[prediction_id]
                        current_overlap_infos.append((result['assigned'], result['confidence']))
                        assigned_prediction_ids.append(prediction_id)

            # analyse the missed predictions if they are missed for a reason
            for prediction_id in prediction_ids:
                if prediction_id not in assigned_prediction_ids:
                    ignored_pixels = image_information['predictions'][prediction_id]['ignore_pixel_count']
                    prediction_size = image_information['predictions'][prediction_id]['size']

                    if prediction_id in image_information['prediction_overlaps']:
                        for ground_truth_id in image_information['prediction_overlaps'][prediction_id].keys():
                            ground_truth_size = image_information['ground_truths'][ground_truth_id]['size']
                            if ground_truth_size < min_size:
                                ignored_pixels += image_information['prediction_overlaps'][prediction_id][ground_truth_id]['intersection']

                    ignored_part = ignored_pixels / float(prediction_size)
                    if ignored_part < threshold:
                        current_overlap_infos.append((False, image_information['predictions'][prediction_id]['confidence']))

        if label_id not in precisions:
            precisions[label_id] = []

        if label_id not in precisions_50:
            precisions_50[label_id] = []

        if len(current_overlap_infos) == 0:
            if found_predictions > 0 or found_ground_truths > 0:
                precisions[label_id] += [0.0]
                if threshold == .5:
                    precisions_50[label_id] += [0.0]
            continue

        # sort matches by confidence
        current_overlap_infos.sort(key=lambda entry: entry[1])

        precision = []
        recall = []

        _, indices = np.unique([entry[1] for entry in current_overlap_infos], return_index=True)
        indices = list(indices)

        positives = sum([int(entry[0]) for entry in current_overlap_infos])

        # incides always points to the first of the unique elements
        # we want the highest (=last) positive count for each unique element,
        # so we shift the whole array by one
        true_positives_up_to = [0] + list(np.cumsum([entry[0] for entry in current_overlap_infos]))

        for index in indices:
            true_positives = positives - true_positives_up_to[index]
            false_positives = len(current_overlap_infos) - index - true_positives
            false_negatives = missed_ground_truths + true_positives_up_to[index]

            if true_positives + false_positives == 0:
                current_precision = 0
            else:
                current_precision = true_positives / float(true_positives + false_positives)

            if true_positives + false_negatives == 0:
                current_recall = 0
            else:
                current_recall = true_positives / float(true_positives + false_negatives)

            precision += [current_precision]
            recall += [current_recall]

        # insert "if you don't do anything, you cannot do it wrong" point
        precision.append(1)
        recall.append(0)

        # recall needs to be sorted ascending for auc
        precision.reverse()
        recall.reverse()

        # AP is the area under curve (auc) of recall-precision
        average_precision = sklearn.metrics.auc(recall, precision)

        if args.plot:
            label_name = labels[label_id]['name']
            plot_precision_recall(
                precision,
                recall,
                average_precision,
                threshold,
                label_name,
                os.path.join(args.plot_dir, '{}_{:d}.{}'.format(label_name, int(threshold * 100), args.plot_extension))
            )

        precisions[label_id] += [average_precision]
        if threshold == .5:
            precisions_50[label_id] += [average_precision]

    for label_id in precisions.keys():
        if len(precisions[label_id]) > 0:
            precisions[label_id] = np.average(precisions[label_id])
        else:
            precisions[label_id] = float('nan')

    for label_id in precisions_50.keys():
        if len(precisions_50[label_id]) > 0:
            precisions_50[label_id] = np.average(precisions_50[label_id])
        else:
            precisions_50[label_id] = float('nan')

    return precisions, precisions_50