def transform()

in bugbug/repository.py [0:0]


def transform(hg: hglib.client, repo_dir: str, commit: Commit) -> Commit:
    hg_modified_files(hg, commit)

    if commit.ignored or len(commit.backsout) > 0 or commit.bug_id is None:
        return commit

    assert code_analysis_server is not None

    source_code_sizes = []
    other_sizes = []
    test_sizes = []
    metrics_file_count = 0

    patch = hg.export(revs=[commit.node.encode("ascii")], git=True)
    try:
        patch_data = rs_parsepatch.get_lines(patch)
    except Exception:
        logger.error(f"Exception while analyzing {commit.node}")
        raise

    for stats in patch_data:
        path = stats["filename"]

        if stats["binary"]:
            if not is_test(path):
                commit.types.add("binary")
            continue

        size = None
        after = None
        if not stats["deleted"]:
            try:
                after = hg.cat(
                    [os.path.join(repo_dir, path).encode("utf-8")],
                    rev=commit.node.encode("ascii"),
                )
                size = after.count(b"\n")
            except hglib.error.CommandError as e:
                if b"no such file in rev" not in e.err:
                    raise

        type_ = get_type(path)

        if is_test(path):
            commit.test_files_modified_num += 1

            commit.test_added += len(stats["added_lines"])
            commit.test_deleted += len(stats["deleted_lines"])

            if size is not None:
                test_sizes.append(size)

            # We don't have a 'test' equivalent of types, as most tests are JS,
            # so this wouldn't add useful information.
        elif type_ in SOURCE_CODE_TYPES_TO_EXT:
            commit.source_code_files_modified_num += 1

            commit.source_code_added += len(stats["added_lines"])
            commit.source_code_deleted += len(stats["deleted_lines"])

            if size is not None:
                source_code_sizes.append(size)

                if type_ != "IDL/IPDL/WebIDL":
                    after_metrics = code_analysis_server.metrics(
                        path, after, unit=False
                    )
                    if after_metrics.get("spaces"):
                        metrics_file_count += 1

                        before_metrics = {}
                        if not stats["new"]:
                            try:
                                before = hg.cat(
                                    [os.path.join(repo_dir, path).encode("utf-8")],
                                    rev=commit.node.encode("ascii"),
                                )

                                before_metrics = code_analysis_server.metrics(
                                    path, before, unit=False
                                )
                            except hglib.error.CommandError as e:
                                if b"no such file in rev" not in e.err:
                                    raise

                        set_commit_metrics(
                            commit,
                            path,
                            stats["deleted_lines"],
                            stats["added_lines"],
                            before_metrics,
                            after_metrics,
                        )

                    # Replace type with "Objective-C/C++" if rust-code-analysis detected this is an Objective-C/C++ file.
                    if (
                        type_ == "C/C++"
                        and after_metrics.get("language") == "obj-c/c++"
                    ):
                        type_ = "Objective-C/C++"

            commit.types.add(type_)
        else:
            commit.other_files_modified_num += 1

            commit.other_added += len(stats["added_lines"])
            commit.other_deleted += len(stats["deleted_lines"])

            if size is not None:
                other_sizes.append(size)

            if type_:
                commit.types.add(type_)

    commit.total_source_code_file_size = sum(source_code_sizes)
    commit.average_source_code_file_size = (
        commit.total_source_code_file_size / len(source_code_sizes)
        if len(source_code_sizes) > 0
        else 0
    )
    commit.maximum_source_code_file_size = max(source_code_sizes, default=0)
    commit.minimum_source_code_file_size = min(source_code_sizes, default=0)

    commit.total_other_file_size = sum(other_sizes)
    commit.average_other_file_size = (
        commit.total_other_file_size / len(other_sizes) if len(other_sizes) > 0 else 0
    )
    commit.maximum_other_file_size = max(other_sizes, default=0)
    commit.minimum_other_file_size = min(other_sizes, default=0)

    commit.total_test_file_size = sum(test_sizes)
    commit.average_test_file_size = (
        commit.total_test_file_size / len(test_sizes) if len(test_sizes) > 0 else 0
    )
    commit.maximum_test_file_size = max(test_sizes, default=0)
    commit.minimum_test_file_size = min(test_sizes, default=0)

    if metrics_file_count:
        for metric in METRIC_NAMES:
            commit.metrics[f"{metric}_avg"] = (
                commit.metrics[f"{metric}_total"] / metrics_file_count
            )
    else:
        # these values are initialized with sys.maxsize (because we take the min)
        # if no files, then reset them to 0 (it'd be stupid to have min > max)
        for metric in METRIC_NAMES:
            commit.metrics[f"{metric}_min"] = 0

    return commit