in scripts/regressor_finder.py [0:0]
def evaluate(bug_introducing_commits):
logger.info("Downloading commits database...")
assert db.download(repository.COMMITS_DB)
logger.info("Downloading bugs database...")
assert db.download(bugzilla.BUGS_DB)
logger.info("Building bug -> commits map...")
bug_to_commits_map = defaultdict(list)
for commit in tqdm(repository.get_commits()):
bug_to_commits_map[commit["bug_id"]].append(commit["node"])
logger.info("Loading known regressors using regressed-by information...")
known_regressors = {}
for bug in tqdm(bugzilla.get_bugs()):
if bug["regressed_by"]:
known_regressors[bug["id"]] = bug["regressed_by"]
logger.info("Loaded %d known regressors", len(known_regressors))
fix_to_regressors_map = defaultdict(list)
for bug_introducing_commit in bug_introducing_commits:
if not bug_introducing_commit["bug_introducing_rev"]:
continue
fix_to_regressors_map[bug_introducing_commit["bug_fixing_rev"]].append(
bug_introducing_commit["bug_introducing_rev"]
)
logger.info("%d fixes linked to regressors", len(fix_to_regressors_map))
logger.info(
"%d regressors linked to fixes",
sum(len(regressors) for regressors in fix_to_regressors_map.values()),
)
logger.info("Measuring how many known regressors SZZ was able to find correctly...")
all_regressors = 0
perfect_regressors = 0
found_regressors = 0
misassigned_regressors = 0
for bug_id, regressor_bugs in tqdm(known_regressors.items()):
# Get all commits which fixed the bug.
fix_commits = bug_to_commits_map[bug_id] if bug_id in bug_to_commits_map else []
if len(fix_commits) == 0:
continue
# Skip bug/regressor when we didn't analyze the commits to fix the bug (as
# certainly we can't have found the regressor in this case).
if not any(fix_commit in fix_to_regressors_map for fix_commit in fix_commits):
continue
# Get all commits linked to the regressor bug.
regressor_commits = []
for regressor_bug in regressor_bugs:
if regressor_bug not in bug_to_commits_map:
continue
regressor_commits += (
commit for commit in bug_to_commits_map[regressor_bug]
)
if len(regressor_commits) == 0:
continue
found_good = False
found_bad = False
for fix_commit in fix_commits:
# Check if we found at least a correct regressor.
if any(
regressor_commit in regressor_commits
for regressor_commit in fix_to_regressors_map[fix_commit]
):
found_good = True
# Check if we found at least a wrong regressor.
if any(
regressor_commit not in regressor_commits
for regressor_commit in fix_to_regressors_map[fix_commit]
):
found_bad = True
all_regressors += 1
if found_good and not found_bad:
perfect_regressors += 1
if found_good:
found_regressors += 1
if found_bad:
misassigned_regressors += 1
logger.info(
"Perfectly found %d regressors out of %d", perfect_regressors, all_regressors
)
logger.info("Found %d regressors out of %d", found_regressors, all_regressors)
logger.info(
"Misassigned %d regressors out of %d", misassigned_regressors, all_regressors
)