in scripts/shadow_scheduler_stats.py [0:0]
def plot_graphs(granularity: str) -> None:
push_data_db = (
test_scheduling.PUSH_DATA_GROUP_DB
if granularity == "group"
else test_scheduling.PUSH_DATA_CONFIG_GROUP_DB
)
assert db.download(push_data_db)
regressions_by_rev = {}
for revisions, _, _, possible_regressions, likely_regressions in db.read(
push_data_db
):
regressions_by_rev[revisions[0]] = get_regressions(
granularity, likely_regressions, possible_regressions
)
scheduled_data = []
caught_data = []
for scheduler_stat in db.read(SHADOW_SCHEDULER_STATS_DB):
if len(scheduler_stat["schedulers"]) == 0:
continue
if scheduler_stat["id"] not in regressions_by_rev:
continue
obj: dict[str, Any] = {
"date": datetime.utcfromtimestamp(scheduler_stat["date"]),
}
for scheduler in scheduler_stat["schedulers"]:
obj[scheduler["name"]] = len(get_scheduled(granularity, scheduler))
scheduled_data.append(obj)
regressions = regressions_by_rev[scheduler_stat["id"]]
obj = {
"date": datetime.utcfromtimestamp(scheduler_stat["date"]),
"regressions": len(regressions),
}
for scheduler in scheduler_stat["schedulers"]:
scheduled = get_scheduled(granularity, scheduler)
obj[scheduler["name"]] = len(regressions & scheduled)
caught_data.append(obj)
scheduled_df = DataFrame(scheduled_data)
scheduled_df.index = scheduled_df["date"]
del scheduled_df["date"]
caught_df = DataFrame(caught_data)
caught_df.index = caught_df["date"]
del caught_df["date"]
df = scheduled_df.resample("W").mean()
plot_graph(
df,
f"Average number of scheduled {granularity}s",
f"average_{granularity}_scheduled.svg",
)
df = (
caught_df[caught_df.regressions > 0]
.drop(columns=["regressions"])
.clip(0, 1)
.resample("W")
.mean()
)
plot_graph(
df,
"Percentage of regressing pushes where we caught at least one regression",
f"percentage_{granularity}_caught_at_least_one.svg",
)
plot_graph(
caught_df.drop(columns=["regressions"])
.div(caught_df.regressions, axis=0)
.resample("W")
.mean(),
"Percentage of regressions we caught",
f"percentage_{granularity}_caught.svg",
)