def evaluateMaskMatches()

in maskrcnn_benchmark/data/datasets/evaluation/cityscapes/eval_instances.py [0:0]


def evaluateMaskMatches(matches, args):
    # In the end, we need two vectors for each class and for each overlap
    # The first vector (y_true) is binary and is 1, where the ground truth says true,
    # and is 0 otherwise.
    # The second vector (y_score) is float [0...1] and represents the confidence of
    # the prediction.
    #
    # We represent the following cases as:
    #                                       | y_true |   y_score
    #   gt instance with matched prediction |    1   | confidence
    #   gt instance w/o  matched prediction |    1   |     0.0
    #          false positive prediction    |    0   | confidence
    #
    # The current implementation makes only sense for an overlap threshold >= 0.5,
    # since only then, a single prediction can either be ignored or matched, but
    # never both. Further, it can never match to two gt instances.
    # For matching, we vary the overlap and do the following steps:
    #   1.) remove all predictions that satisfy the overlap criterion with an ignore region (either void or *group)
    #   2.) remove matches that do not satisfy the overlap
    #   3.) mark non-matched predictions as false positive

    # AP
    overlaps = args.overlaps
    # region size
    minRegionSizes = args.minRegionSizes

    # only keep the first, if distances are not available
    # if not args.distanceAvailable:
    #     minRegionSizes = [ minRegionSizes[0] ]
    #     distThs        = [ distThs       [0] ]
    #     distConfs      = [ distConfs     [0] ]

    # Here we hold the results
    # First dimension is class, second overlap
    ap = np.zeros((len(minRegionSizes), len(args.instLabels), len(overlaps)), np.float)

    for dI, minRegionSize in enumerate(minRegionSizes):
        for (oI, overlapTh) in enumerate(overlaps):
            for (lI, labelName) in enumerate(args.instLabels):
                y_true = np.empty(0)
                y_score = np.empty(0)
                # count hard false negatives
                hardFns = 0
                # found at least one gt and predicted instance?
                haveGt = False
                havePred = False

                for img in matches:
                    predInstances = img["prediction"][labelName]
                    gtInstances = img["groundTruth"][labelName]
                    # filter groups in ground truth
                    gtInstances = [
                        gt for gt in gtInstances if gt["pixelCount"] >= minRegionSize
                    ]

                    if gtInstances:
                        haveGt = True
                    if predInstances:
                        havePred = True

                    curTrue = np.ones(len(gtInstances))
                    curScore = np.ones(len(gtInstances)) * (-float("inf"))
                    curMatch = np.zeros(len(gtInstances), dtype=np.bool)

                    # collect matches
                    for (gtI, gt) in enumerate(gtInstances):
                        foundMatch = False
                        for pred in gt["matchedPred"]:
                            overlap = float(pred["maskIntersection"]) / (
                                gt["pixelCount"]
                                + pred["pixelCount"]
                                - pred["maskIntersection"]
                            )
                            if overlap > overlapTh:
                                # the score
                                confidence = pred["confidence"]

                                # if we already hat a prediction for this groundtruth
                                # the prediction with the lower score is automatically a false positive
                                if curMatch[gtI]:
                                    maxScore = max(curScore[gtI], confidence)
                                    minScore = min(curScore[gtI], confidence)
                                    curScore[gtI] = maxScore
                                    # append false positive
                                    curTrue = np.append(curTrue, 0)
                                    curScore = np.append(curScore, minScore)
                                    curMatch = np.append(curMatch, True)
                                # otherwise set score
                                else:
                                    foundMatch = True
                                    curMatch[gtI] = True
                                    curScore[gtI] = confidence

                        if not foundMatch:
                            hardFns += 1

                    # remove non-matched ground truth instances
                    curTrue = curTrue[curMatch == True]
                    curScore = curScore[curMatch == True]

                    # collect non-matched predictions as false positive
                    for pred in predInstances:
                        foundGt = False
                        for gt in pred["matchedGt"]:
                            overlap = float(gt["maskIntersection"]) / (
                                gt["pixelCount"]
                                + pred["pixelCount"]
                                - gt["maskIntersection"]
                            )
                            if overlap > overlapTh:
                                foundGt = True
                                break
                        if not foundGt:
                            # collect number of void and *group pixels
                            nbIgnorePixels = 0
                            for gt in pred["matchedGt"]:
                                # small ground truth instances
                                if gt["pixelCount"] < minRegionSize:
                                    nbIgnorePixels += gt["maskIntersection"]

                            if pred["pixelCount"] <= 0:
                                proportionIgnore = 0
                            else:
                                proportionIgnore = (
                                    float(nbIgnorePixels) / pred["pixelCount"]
                                )
                            # if not ignored
                            # append false positive
                            if proportionIgnore <= overlapTh:
                                curTrue = np.append(curTrue, 0)
                                confidence = pred["confidence"]
                                curScore = np.append(curScore, confidence)

                    # append to overall results
                    y_true = np.append(y_true, curTrue)
                    y_score = np.append(y_score, curScore)

                # compute the average precision
                if haveGt and havePred:
                    # compute precision recall curve first

                    # sorting and cumsum
                    scoreArgSort = np.argsort(y_score)
                    yScoreSorted = y_score[scoreArgSort]
                    yTrueSorted = y_true[scoreArgSort]
                    yTrueSortedCumsum = np.cumsum(yTrueSorted)

                    # unique thresholds
                    (thresholds, uniqueIndices) = np.unique(
                        yScoreSorted, return_index=True
                    )

                    # since we need to add an artificial point to the precision-recall curve
                    # increase its length by 1
                    nbPrecRecall = len(uniqueIndices) + 1

                    # prepare precision recall
                    nbExamples = len(yScoreSorted)
                    nbTrueExamples = yTrueSortedCumsum[-1]
                    precision = np.zeros(nbPrecRecall)
                    recall = np.zeros(nbPrecRecall)

                    # deal with the first point
                    # only thing we need to do, is to append a zero to the cumsum at the end.
                    # an index of -1 uses that zero then
                    yTrueSortedCumsum = np.append(yTrueSortedCumsum, 0)

                    # deal with remaining
                    for idxRes, idxScores in enumerate(uniqueIndices):
                        cumSum = yTrueSortedCumsum[idxScores - 1]
                        tp = nbTrueExamples - cumSum
                        fp = nbExamples - idxScores - tp
                        fn = cumSum + hardFns
                        p = float(tp) / (tp + fp)
                        r = float(tp) / (tp + fn)
                        precision[idxRes] = p
                        recall[idxRes] = r

                    # first point in curve is artificial
                    precision[-1] = 1.0
                    recall[-1] = 0.0

                    # compute average of precision-recall curve
                    # integration is performed via zero order, or equivalently step-wise integration
                    # first compute the widths of each step:
                    # use a convolution with appropriate kernel, manually deal with the boundaries first
                    recallForConv = np.copy(recall)
                    recallForConv = np.append(recallForConv[0], recallForConv)
                    recallForConv = np.append(recallForConv, 0.0)

                    stepWidths = np.convolve(recallForConv, [-0.5, 0, 0.5], "valid")

                    # integrate is now simply a dot product
                    apCurrent = np.dot(precision, stepWidths)

                elif haveGt:
                    apCurrent = 0.0
                else:
                    apCurrent = float("nan")
                ap[dI, lI, oI] = apCurrent

    return ap