def get_bugbug_labels()

in bugbug/models/defect.py [0:0]


    def get_bugbug_labels(self, kind="bug") -> dict[int, Any]:
        assert kind in ["bug", "regression", "defect_enhancement_task"]

        classes: dict[int, Any] = {}

        for bug_id, category in labels.get_labels("bug_nobug"):
            assert category in ["True", "False"], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "True" else 0
            elif kind == "regression":
                if category == "False":
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                if category == "True":
                    classes[int(bug_id)] = "defect"

        for bug_id, category in labels.get_labels("regression_bug_nobug"):
            assert category in [
                "nobug",
                "bug_unknown_regression",
                "bug_no_regression",
                "regression",
            ], f"unexpected category {category}"
            if kind == "bug":
                classes[int(bug_id)] = 1 if category != "nobug" else 0
            elif kind == "regression":
                if category == "bug_unknown_regression":
                    continue

                classes[int(bug_id)] = 1 if category == "regression" else 0
            elif kind == "defect_enhancement_task":
                if category != "nobug":
                    classes[int(bug_id)] = "defect"

        defect_enhancement_task_e = dict(labels.get_labels("defect_enhancement_task_e"))
        defect_enhancement_task_p = dict(labels.get_labels("defect_enhancement_task_p"))
        defect_enhancement_task_s = dict(labels.get_labels("defect_enhancement_task_s"))
        defect_enhancement_task_h = dict(labels.get_labels("defect_enhancement_task_h"))

        defect_enhancement_task_common = (
            (bug_id, category)
            for bug_id, category in defect_enhancement_task_p.items()
            if (
                bug_id not in defect_enhancement_task_e
                or defect_enhancement_task_e[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
            and (
                bug_id not in defect_enhancement_task_s
                or defect_enhancement_task_s[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
            and (
                bug_id not in defect_enhancement_task_h
                or defect_enhancement_task_h[bug_id]
                == defect_enhancement_task_p[bug_id]
            )
        )

        for bug_id, category in itertools.chain(
            labels.get_labels("defect_enhancement_task"), defect_enhancement_task_common
        ):
            assert category in ["defect", "enhancement", "task"]
            if kind == "bug":
                classes[int(bug_id)] = 1 if category == "defect" else 0
            elif kind == "regression":
                if category in ["enhancement", "task"]:
                    classes[int(bug_id)] = 0
            elif kind == "defect_enhancement_task":
                classes[int(bug_id)] = category

        # Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
        # And also use the new bug type field.
        bug_ids = set()
        for bug in bugzilla.get_bugs():
            bug_id = int(bug["id"])

            bug_ids.add(bug_id)

            # Ignore meta bugs, their types are not consistently set.
            if "meta" in bug["keywords"]:
                if bug_id in classes:
                    del classes[bug_id]
                continue

            if bug_id in classes:
                continue

            if (
                len(bug["regressed_by"]) > 0
                or any(
                    keyword in bug["keywords"]
                    for keyword in ["regression", "talos-regression"]
                )
                or (
                    "cf_has_regression_range" in bug
                    and bug["cf_has_regression_range"] == "yes"
                )
            ):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 1
                else:
                    classes[bug_id] = "defect"
            elif any(keyword in bug["keywords"] for keyword in ["feature"]):
                if kind in ["bug", "regression"]:
                    classes[bug_id] = 0
                else:
                    classes[bug_id] = "enhancement"
            elif kind == "regression":
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "keywords":
                            if "regression" in [
                                k.strip() for k in change["removed"].split(",")
                            ]:
                                classes[bug_id] = 0
                            elif "regression" in [
                                k.strip() for k in change["added"].split(",")
                            ]:
                                classes[bug_id] = 1

            # The conditions to use the 'defect' type are more restricted.
            can_use_type = False
            can_use_defect_type = False

            # We can use the type as a label for all bugs after the migration (https://bugzilla.mozilla.org/show_bug.cgi?id=1524738), if they are not defects.
            if bug_id > 1540807:
                can_use_type = True

            # And we can use the type as a label for bugs whose type has been modified.
            # For 'defects', we can't use them as labels unless resulting from a change, because bugs are filed by default as 'defect' and so they could be mistakes.
            def _has_type_changed(bug):
                for history in bug["history"]:
                    for change in history["changes"]:
                        if change["field_name"] == "type":
                            return True

                return False

            if not can_use_type or bug["type"] == "defect":
                can_use_type = can_use_defect_type = _has_type_changed(bug)

            if can_use_type:
                if bug["type"] == "enhancement":
                    if kind == "bug":
                        classes[bug_id] = 0
                    elif kind == "regression":
                        classes[bug_id] = 0
                    elif kind == "defect_enhancement_task":
                        classes[bug_id] = "enhancement"
                elif bug["type"] == "task":
                    if kind == "bug":
                        classes[bug_id] = 0
                    elif kind == "regression":
                        classes[bug_id] = 0
                    elif kind == "defect_enhancement_task":
                        classes[bug_id] = "task"
                elif bug["type"] == "defect" and can_use_defect_type:
                    if kind == "bug":
                        classes[bug_id] = 1
                    elif kind == "defect_enhancement_task":
                        classes[bug_id] = "defect"

        # Remove labels which belong to bugs for which we have no data.
        return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}