in variance-analysis/run_variance_analysis.py [0:0]
def main():
args = variance_analysis_parser().parse_args()
overwrite = args.overwrite
prefix = args.prefix
tests = args.tests
output_dir = pathlib.Path(args.output).resolve()
output_dir.mkdir(parents=True, exist_ok=True)
config = None
with pathlib.Path(args.config).open() as f:
config = yaml.safe_load(f)
# Get the task group IDs for the revisions
base_revision_ids = find_task_group_id(
args.base_revision, args.base_branch, search_crons=args.search_crons
)
new_revision_ids = find_task_group_id(
args.new_revision, args.new_branch, search_crons=args.search_crons
)
base_task_dirs = [pathlib.Path(output_dir, revid) for revid in base_revision_ids]
new_task_dirs = [pathlib.Path(output_dir, revid) for revid in new_revision_ids]
if overwrite:
for task_dir in base_task_dirs + new_task_dirs:
if task_dir.exists():
print("Removing existing task group folder: %s" % str(task_dir))
shutil.rmtree(str(task_dir))
def _search_for_paths(rev_ids):
found_paths = []
for rev_id in rev_ids:
if found_paths:
break
# Get the paths to the directory holding the artifacts
found_paths = list(
get_task_data_paths(
rev_id, str(output_dir), artifact="perfherder-data"
).values()
)
return found_paths
# Setup the perftest notebook
custom_transform = config.get("custom_transform", None)
config["file_groups"] = {}
file_group = {
"task_group_id": None,
"path": None,
"artifact": "perfherder",
"run_number": None,
}
# Download the artifacts for the base revision
base_results = []
for base_revision_id in base_revision_ids:
artifact_downloader(
base_revision_id,
output_dir=str(output_dir),
test_suites=tests,
platform=args.platform,
artifact_to_get=["perfherder-data"],
unzip_artifact=False,
download_failures=True,
ingest_continue=args.skip_download,
)
# Standardize the data
file_group["task_group_id"] = base_revision_id
file_group["path"] = str(pathlib.Path(output_dir).resolve())
config["file_groups"] = {"new": file_group}
config["output"] = str(
pathlib.Path(output_dir, f"{prefix}base-ptnb-data-{base_revision_id}.json")
)
ptnb = PerftestNotebook(
{"new": file_group},
config,
custom_transform=custom_transform,
sort_files=args.sort_files,
)
base_results.append(ptnb.process(True))
# Download the artifacts for the new revision
new_results = []
for new_revision_id in new_revision_ids:
artifact_downloader(
new_revision_id,
output_dir=str(output_dir),
test_suites=tests,
platform=args.new_platform or args.platform,
artifact_to_get=["perfherder-data"],
unzip_artifact=False,
download_failures=True,
ingest_continue=args.skip_download,
)
# Standardize the data
file_group["task_group_id"] = new_revision_id
file_group["path"] = str(pathlib.Path(output_dir).resolve())
config["file_groups"] = {"base": file_group}
config["output"] = str(
pathlib.Path(output_dir, f"{prefix}new-ptnb-data-{new_revision_id}.json")
)
ptnb = PerftestNotebook(
{"base": file_group},
config,
custom_transform=custom_transform,
sort_files=args.sort_files,
)
new_results.append(ptnb.process(True))
# Now we have all of the perfherder-data requested and it's been standardized.
# Combine all the standardized data within each `new`/`base` folders into a single
# file. This handles gathering tasks from crons.
# new_/base_results contain formatted JSON data and they all need to be within a single JSON
results = {"base": [], "new": []}
inds = {"base": {}, "new": {}}
counts = {"base": 0, "new": 0}
for blob in new_results + base_results:
for res in blob:
grouping = res["name"]
subtest = res["subtest"]
if subtest not in inds[grouping]:
inds[grouping][subtest] = counts[grouping]
results[grouping].append(res)
counts[grouping] += 1
continue
existing_res = results[grouping][inds[grouping][subtest]]
existing_res["data"].extend(res["data"])
# The xaxis slightly loses its meaning with this change
existing_res["xaxis"].extend(
list(np.asarray(res["xaxis"]) + existing_res["xaxis"][-1])
)
# Analyze the data
run_variance_analysis(
results["base"] + results["new"],
tests,
args.platform,
groupings=["new", "base"],
)