def main()

in variance-analysis/run_variance_analysis.py [0:0]


def main():
    args = variance_analysis_parser().parse_args()
    overwrite = args.overwrite
    prefix = args.prefix
    tests = args.tests

    output_dir = pathlib.Path(args.output).resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    config = None
    with pathlib.Path(args.config).open() as f:
        config = yaml.safe_load(f)

    # Get the task group IDs for the revisions
    base_revision_ids = find_task_group_id(
        args.base_revision, args.base_branch, search_crons=args.search_crons
    )
    new_revision_ids = find_task_group_id(
        args.new_revision, args.new_branch, search_crons=args.search_crons
    )

    base_task_dirs = [pathlib.Path(output_dir, revid) for revid in base_revision_ids]
    new_task_dirs = [pathlib.Path(output_dir, revid) for revid in new_revision_ids]
    if overwrite:
        for task_dir in base_task_dirs + new_task_dirs:
            if task_dir.exists():
                print("Removing existing task group folder: %s" % str(task_dir))
                shutil.rmtree(str(task_dir))

    def _search_for_paths(rev_ids):
        found_paths = []
        for rev_id in rev_ids:
            if found_paths:
                break
            # Get the paths to the directory holding the artifacts
            found_paths = list(
                get_task_data_paths(
                    rev_id, str(output_dir), artifact="perfherder-data"
                ).values()
            )
        return found_paths

    # Setup the perftest notebook
    custom_transform = config.get("custom_transform", None)
    config["file_groups"] = {}
    file_group = {
        "task_group_id": None,
        "path": None,
        "artifact": "perfherder",
        "run_number": None,
    }

    # Download the artifacts for the base revision
    base_results = []
    for base_revision_id in base_revision_ids:
        artifact_downloader(
            base_revision_id,
            output_dir=str(output_dir),
            test_suites=tests,
            platform=args.platform,
            artifact_to_get=["perfherder-data"],
            unzip_artifact=False,
            download_failures=True,
            ingest_continue=args.skip_download,
        )

        # Standardize the data
        file_group["task_group_id"] = base_revision_id
        file_group["path"] = str(pathlib.Path(output_dir).resolve())

        config["file_groups"] = {"new": file_group}
        config["output"] = str(
            pathlib.Path(output_dir, f"{prefix}base-ptnb-data-{base_revision_id}.json")
        )

        ptnb = PerftestNotebook(
            {"new": file_group},
            config,
            custom_transform=custom_transform,
            sort_files=args.sort_files,
        )
        base_results.append(ptnb.process(True))

    # Download the artifacts for the new revision
    new_results = []
    for new_revision_id in new_revision_ids:
        artifact_downloader(
            new_revision_id,
            output_dir=str(output_dir),
            test_suites=tests,
            platform=args.new_platform or args.platform,
            artifact_to_get=["perfherder-data"],
            unzip_artifact=False,
            download_failures=True,
            ingest_continue=args.skip_download,
        )

        # Standardize the data
        file_group["task_group_id"] = new_revision_id
        file_group["path"] = str(pathlib.Path(output_dir).resolve())

        config["file_groups"] = {"base": file_group}
        config["output"] = str(
            pathlib.Path(output_dir, f"{prefix}new-ptnb-data-{new_revision_id}.json")
        )

        ptnb = PerftestNotebook(
            {"base": file_group},
            config,
            custom_transform=custom_transform,
            sort_files=args.sort_files,
        )
        new_results.append(ptnb.process(True))

    # Now we have all of the perfherder-data requested and it's been standardized.
    # Combine all the standardized data within each `new`/`base` folders into a single
    # file. This handles gathering tasks from crons.
    # new_/base_results contain formatted JSON data and they all need to be within a single JSON
    results = {"base": [], "new": []}
    inds = {"base": {}, "new": {}}
    counts = {"base": 0, "new": 0}
    for blob in new_results + base_results:
        for res in blob:
            grouping = res["name"]
            subtest = res["subtest"]
            if subtest not in inds[grouping]:
                inds[grouping][subtest] = counts[grouping]
                results[grouping].append(res)
                counts[grouping] += 1
                continue

            existing_res = results[grouping][inds[grouping][subtest]]
            existing_res["data"].extend(res["data"])
            # The xaxis slightly loses its meaning with this change
            existing_res["xaxis"].extend(
                list(np.asarray(res["xaxis"]) + existing_res["xaxis"][-1])
            )

    # Analyze the data
    run_variance_analysis(
        results["base"] + results["new"],
        tests,
        args.platform,
        groupings=["new", "base"],
    )