scripts/past_bugs_by_unit.py (189 lines of code) (raw):

# -*- coding: utf-8 -*- # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. import argparse import json import logging from collections import defaultdict from tqdm import tqdm from bugbug import bugzilla, db, repository from bugbug.models.regressor import BUG_FIXING_COMMITS_DB from bugbug.utils import zstd_compress logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class PastBugsCollector(object): def __init__(self) -> None: logger.info("Downloading commits database...") assert db.download(repository.COMMITS_DB) logger.info("Downloading bugs database...") assert db.download(bugzilla.BUGS_DB) logger.info("Download commit classifications...") assert db.download(BUG_FIXING_COMMITS_DB) def go(self) -> None: logger.info( "Generate map of bug ID -> bug data for all bugs which were defects" ) bug_fixing_commits = list(db.read(BUG_FIXING_COMMITS_DB)) bug_fixing_commits_nodes = set( bug_fixing_commit["rev"] for bug_fixing_commit in bug_fixing_commits if bug_fixing_commit["type"] in ("d", "r") ) logger.info("%d bug-fixing commits to analyze", len(bug_fixing_commits_nodes)) all_bug_ids = set(commit["bug_id"] for commit in repository.get_commits()) bug_map = { bug["id"]: bug for bug in bugzilla.get_bugs() if bug["id"] in all_bug_ids } logger.info( "Generate a map from files/functions to the bugs which were fixed/introduced by touching them" ) # TODO: Support "moving" past bugs between files when they are renamed and between functions when they are # moved across files. by_dimensions = ["file", "directory", "component"] def dimension_to_field(dimension: str) -> str: return f"{dimension}s" if dimension != "directory" else "directories" past_regressions_by: dict[str, dict[str, list[int]]] = defaultdict( lambda: defaultdict(list) ) past_fixed_bugs_by: dict[str, dict[str, list[int]]] = defaultdict( lambda: defaultdict(list) ) past_regression_blocked_bugs_by: dict[str, dict[str, list[int]]] = defaultdict( lambda: defaultdict(list) ) past_fixed_bug_blocked_bugs_by: dict[str, dict[str, list[int]]] = defaultdict( lambda: defaultdict(list) ) past_regressions_by_function: dict[str, dict[str, list[int]]] = defaultdict( lambda: defaultdict(list) ) past_fixed_bugs_by_function: dict[str, dict[str, list[int]]] = defaultdict( lambda: defaultdict(list) ) past_regression_blocked_bugs_by_function: dict[str, dict[str, list[int]]] = ( defaultdict(lambda: defaultdict(list)) ) past_fixed_bug_blocked_bugs_by_function: dict[str, dict[str, list[int]]] = ( defaultdict(lambda: defaultdict(list)) ) for commit in tqdm(repository.get_commits()): if commit["bug_id"] not in bug_map: continue if commit["backedoutby"]: continue bug = bug_map[commit["bug_id"]] if len(bug["regressions"]) > 0: for dimension in by_dimensions: for path in commit[dimension_to_field(dimension)]: past_regressions_by[dimension][path].extend( bug_id for bug_id in bug["regressions"] if bug_id in bug_map ) past_regression_blocked_bugs_by[dimension][path].extend( bugzilla.find_blocked_by(bug_map, bug) ) for path, f_group in commit["functions"].items(): for f in f_group: past_regressions_by_function[path][f["name"]].extend( bug_id for bug_id in bug["regressions"] if bug_id in bug_map ) past_regression_blocked_bugs_by_function[path][ f["name"] ].extend(bugzilla.find_blocked_by(bug_map, bug)) if commit["node"] in bug_fixing_commits_nodes: for dimension in by_dimensions: for path in commit[dimension_to_field(dimension)]: past_fixed_bugs_by[dimension][path].append(bug["id"]) past_fixed_bug_blocked_bugs_by[dimension][path].extend( bugzilla.find_blocked_by(bug_map, bug) ) for path, f_group in commit["functions"].items(): for f in f_group: past_fixed_bugs_by_function[path][f["name"]].append(bug["id"]) past_fixed_bug_blocked_bugs_by_function[path][f["name"]].extend( bugzilla.find_blocked_by(bug_map, bug) ) def _transform(bug_ids: list[int]) -> list[dict]: seen = set() results = [] for bug_id in bug_ids: if bug_id in seen: continue seen.add(bug_id) bug = bug_map[bug_id] results.append( { "id": bug_id, "summary": bug["summary"], "component": "{}::{}".format(bug["product"], bug["component"]), } ) return results def past_bug_ids_to_summaries( past_bugs_by: dict[str, list[int]], ) -> dict[str, list[dict]]: return {path: _transform(bug_ids) for path, bug_ids in past_bugs_by.items()} for dimension in by_dimensions: with open(f"data/past_regressions_by_{dimension}.json", "w") as f: json.dump(past_bug_ids_to_summaries(past_regressions_by[dimension]), f) zstd_compress(f"data/past_regressions_by_{dimension}.json") with open(f"data/past_fixed_bugs_by_{dimension}.json", "w") as f: json.dump(past_bug_ids_to_summaries(past_fixed_bugs_by[dimension]), f) zstd_compress(f"data/past_fixed_bugs_by_{dimension}.json") with open( f"data/past_regression_blocked_bugs_by_{dimension}.json", "w" ) as f: json.dump( past_bug_ids_to_summaries( past_regression_blocked_bugs_by[dimension] ), f, ) zstd_compress(f"data/past_regression_blocked_bugs_by_{dimension}.json") with open( f"data/past_fixed_bug_blocked_bugs_by_{dimension}.json", "w" ) as f: json.dump( past_bug_ids_to_summaries( past_fixed_bug_blocked_bugs_by[dimension] ), f, ) zstd_compress(f"data/past_fixed_bug_blocked_bugs_by_{dimension}.json") def past_function_bug_ids_to_summaries( past_bugs: dict[str, dict[str, list[int]]], ) -> dict[str, dict[str, list[dict]]]: return { path: { func: _transform(bug_ids) for func, bug_ids in funcs_bugs.items() } for path, funcs_bugs in past_bugs.items() } with open("data/past_regressions_by_function.json", "w") as f: json.dump( past_function_bug_ids_to_summaries(past_regressions_by_function), f ) zstd_compress("data/past_regressions_by_function.json") with open("data/past_fixed_bugs_by_function.json", "w") as f: json.dump( past_function_bug_ids_to_summaries(past_fixed_bugs_by_function), f ) zstd_compress("data/past_fixed_bugs_by_function.json") with open("data/past_regression_blocked_bugs_by_function.json", "w") as f: json.dump( past_function_bug_ids_to_summaries( past_regression_blocked_bugs_by_function ), f, ) zstd_compress("data/past_regression_blocked_bugs_by_function.json") with open("data/past_fixed_bug_blocked_bugs_by_function.json", "w") as f: json.dump( past_function_bug_ids_to_summaries( past_fixed_bug_blocked_bugs_by_function ), f, ) zstd_compress("data/past_fixed_bug_blocked_bugs_by_function.json") def main() -> None: description = "Find past bugs linked to given units of source code" parser = argparse.ArgumentParser(description=description) parser.parse_args() past_bugs_collector = PastBugsCollector() past_bugs_collector.go() if __name__ == "__main__": main()