def evaluation()

in bugbug/models/regressor.py [0:0]


    def evaluation(self) -> None:
        bug_regressors = set(
            sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), [])
        )

        commits = []

        for commit_data in repository.get_commits():
            if commit_data["backedoutby"]:
                continue

            if repository.is_wptsync(commit_data):
                continue

            push_date = dateutil.parser.parse(commit_data["pushdate"])

            # Use the past two months of data (make sure it is not also used for training!).
            if push_date < datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS):
                continue

            commits.append(commit_data)

        logger.info("%d commits in the evaluation set", len(commits))
        bugs_num = len(set(commit["bug_id"] for commit in commits))
        logger.info("%d bugs in the evaluation set", bugs_num)

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])

        results = []
        for bug_id, commit_iter in itertools.groupby(commits, lambda x: x["bug_id"]):
            probs = self.classify(list(commit_iter), probabilities=True)
            results.append((max(probs[:, 1]), bug_id in bug_regressors))

        # Let's define the risk bands relatively to average risk.
        # On average, around 1 out of 8 (13%) patches cause regressions.
        # Risk band 1 - around 1 out of 15 (7%) patches within this risk band cause regressions.
        # Risk band 2 - around 1 out of 7 (15%) patches within this risk band cause regressions.
        # Risk bank 3 - around 1 out of 3 (35%) patches within this risk band cause regressions.

        # Step 1. Calculate % of patches which cause regressions.
        total_landings = len(results)
        total_regressions = sum(is_reg for _, is_reg in results)
        average_regression_rate = total_regressions / total_landings

        logger.info("Average risk is %0.2f", average_regression_rate)

        MIN_SAMPLE = 200

        # Step 2. Define risk band 1 (half than average risk).
        max_band1_prob = 1.0
        total_landings = 0
        total_regressions = 0
        results.sort(key=lambda x: x[0])
        for prob, is_reg in results:
            total_landings += 1
            if is_reg:
                total_regressions += 1

            if total_landings < MIN_SAMPLE:
                continue

            print(
                f"{total_regressions} out of {total_landings} patches with risk lower than {prob} caused regressions ({total_regressions / total_landings}"
            )

            # No need to go further, since we are interested in half than average risk.
            if (
                total_regressions / total_landings
                >= (average_regression_rate / 2) + 0.01
            ):
                max_band1_prob = prob
                break

        print("\n\n")

        # Step 3. Define risk band 3 (double than average risk).
        min_band3_prob = 0.0
        total_landings = 0
        total_regressions = 0
        results.sort(key=lambda x: x[0], reverse=True)
        for prob, is_reg in results:
            total_landings += 1
            if is_reg:
                total_regressions += 1

            if total_landings < MIN_SAMPLE:
                continue

            print(
                f"{total_regressions} out of {total_landings} patches with risk higher than {prob} caused regressions ({total_regressions / total_landings}"
            )

            # No need to go further, since we are interested in double than average risk.
            if (
                total_regressions / total_landings
                <= (average_regression_rate * 2) - 0.01
            ):
                min_band3_prob = prob
                break

        print("\n\n")

        # Step 4. Define risk band 2 (average risk).
        results.sort(key=lambda x: x[0])
        for prob_start in np.arange(max_band1_prob / 2, max_band1_prob + 0.02, 0.01):
            for prob_end in np.arange(min_band3_prob - 0.02, 0.99, 0.01):
                total_landings = 0
                total_regressions = 0
                for prob, is_reg in results:
                    if prob < prob_start or prob > prob_end:
                        continue

                    total_landings += 1
                    if is_reg:
                        total_regressions += 1

                if total_landings < MIN_SAMPLE:
                    continue

                if (
                    (average_regression_rate / 2) + 0.01
                    > total_regressions / total_landings
                    > (average_regression_rate * 2) - 0.01
                ):
                    continue

                print(
                    f"{total_regressions} out of {total_landings} patches with risk between {prob_start} and {prob_end} caused regressions ({total_regressions / total_landings}"
                )