in bugbug/models/regressor.py [0:0]
def evaluation(self) -> None:
bug_regressors = set(
sum((bug["regressed_by"] for bug in bugzilla.get_bugs()), [])
)
commits = []
for commit_data in repository.get_commits():
if commit_data["backedoutby"]:
continue
if repository.is_wptsync(commit_data):
continue
push_date = dateutil.parser.parse(commit_data["pushdate"])
# Use the past two months of data (make sure it is not also used for training!).
if push_date < datetime.utcnow() - relativedelta(months=EVALUATION_MONTHS):
continue
commits.append(commit_data)
logger.info("%d commits in the evaluation set", len(commits))
bugs_num = len(set(commit["bug_id"] for commit in commits))
logger.info("%d bugs in the evaluation set", bugs_num)
# Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
commits.sort(key=lambda x: x["bug_id"])
results = []
for bug_id, commit_iter in itertools.groupby(commits, lambda x: x["bug_id"]):
probs = self.classify(list(commit_iter), probabilities=True)
results.append((max(probs[:, 1]), bug_id in bug_regressors))
# Let's define the risk bands relatively to average risk.
# On average, around 1 out of 8 (13%) patches cause regressions.
# Risk band 1 - around 1 out of 15 (7%) patches within this risk band cause regressions.
# Risk band 2 - around 1 out of 7 (15%) patches within this risk band cause regressions.
# Risk bank 3 - around 1 out of 3 (35%) patches within this risk band cause regressions.
# Step 1. Calculate % of patches which cause regressions.
total_landings = len(results)
total_regressions = sum(is_reg for _, is_reg in results)
average_regression_rate = total_regressions / total_landings
logger.info("Average risk is %0.2f", average_regression_rate)
MIN_SAMPLE = 200
# Step 2. Define risk band 1 (half than average risk).
max_band1_prob = 1.0
total_landings = 0
total_regressions = 0
results.sort(key=lambda x: x[0])
for prob, is_reg in results:
total_landings += 1
if is_reg:
total_regressions += 1
if total_landings < MIN_SAMPLE:
continue
print(
f"{total_regressions} out of {total_landings} patches with risk lower than {prob} caused regressions ({total_regressions / total_landings}"
)
# No need to go further, since we are interested in half than average risk.
if (
total_regressions / total_landings
>= (average_regression_rate / 2) + 0.01
):
max_band1_prob = prob
break
print("\n\n")
# Step 3. Define risk band 3 (double than average risk).
min_band3_prob = 0.0
total_landings = 0
total_regressions = 0
results.sort(key=lambda x: x[0], reverse=True)
for prob, is_reg in results:
total_landings += 1
if is_reg:
total_regressions += 1
if total_landings < MIN_SAMPLE:
continue
print(
f"{total_regressions} out of {total_landings} patches with risk higher than {prob} caused regressions ({total_regressions / total_landings}"
)
# No need to go further, since we are interested in double than average risk.
if (
total_regressions / total_landings
<= (average_regression_rate * 2) - 0.01
):
min_band3_prob = prob
break
print("\n\n")
# Step 4. Define risk band 2 (average risk).
results.sort(key=lambda x: x[0])
for prob_start in np.arange(max_band1_prob / 2, max_band1_prob + 0.02, 0.01):
for prob_end in np.arange(min_band3_prob - 0.02, 0.99, 0.01):
total_landings = 0
total_regressions = 0
for prob, is_reg in results:
if prob < prob_start or prob > prob_end:
continue
total_landings += 1
if is_reg:
total_regressions += 1
if total_landings < MIN_SAMPLE:
continue
if (
(average_regression_rate / 2) + 0.01
> total_regressions / total_landings
> (average_regression_rate * 2) - 0.01
):
continue
print(
f"{total_regressions} out of {total_landings} patches with risk between {prob_start} and {prob_end} caused regressions ({total_regressions / total_landings}"
)