def summarize()

in pageload-summary/summarize_old.py [0:0]
123 lines of code
27 McCabe index (conditional complexity)

def summarize(data, platforms):
    org_data = organize_data(data, platforms)

    summary = {}
    for platform, variants in org_data.items():

        for variant, apps in variants.items():

            app_summarized_vals = {}
            for app, tests in apps.items():

                platform_summary = {"tests": list(tests.keys()), "values": []}
                # Get all the push times
                all_push_times = []
                for _, info in tests.items():
                    print(info)
                    all_push_times.extend(list(info["values"].keys()))
                all_push_times = list(set(all_push_times))

                print(all_push_times)

                # Get a summary value for each push time
                summarized_vals = []
                tests_per_val = {}
                prev_time = None
                prev_test_times = {}
                for c, time in enumerate(sorted(all_push_times)):
                    if not prev_time:
                        prev_time = time
                    vals = []
                    good = True
                    testsc = []
                    testsg = []
                    for test, info in tests.items():
                        if time not in info["values"]:
                            good = False
                            testsc.append(test)
                            continue
                        if test not in prev_test_times:
                            prev_test_times[test] = time

                        vals.append(np.mean(info["values"][time]))
                        testsg.append(
                            (
                                test,
                                time,
                                prev_test_times[test],
                                np.mean(info["values"][time]),
                                np.mean(info["values"][prev_test_times[test]]),
                            )
                        )

                        prev_test_times[test] = time

                    if not good:
                        print(
                            f"Tests which failed and prevent a summary at time {time}:",
                            testsc,
                        )

                    summarized_vals.append((time, gmean(np.asarray(vals))))

                    tests_per_val[str(c)] = {
                        "good": testsg,
                        "bad": testsc,
                        "vals": vals,
                    }

                    prev_time = time
                """
                            "wikia-firefox-cold-webrender",
                            "espn-firefox-cold-webrender",
                            "cnn-firefox-cold-webrender",
                            "nytimes-firefox-cold-webrender",
                            "buzzfeed-firefox-cold-webrender",
                            "expedia-firefox-cold-webrender"
                """

                """
                            "wikia-firefox-cold-webrender",
                            "espn-firefox-cold-webrender",
                            "cnn-firefox-cold-webrender",
                            "nytimes-firefox-cold-webrender",
                            "buzzfeed-firefox-cold-webrender",
                            "expedia-firefox-cold-webrender"
                """

                import json

                print("hereeee")
                print(json.dumps(tests_per_val, indent=4))

                # Get the ratios over time
                prev_test_times = {}
                all_ratios = []
                prev_ratio = np.nan

                for time in sorted(all_push_times):

                    ratios = []
                    for test, info in tests.items():
                        if info["values"].get(time, None):
                            if prev_test_times.get(test, None):
                                ratios.append(
                                    np.mean(info["values"][time])
                                    / np.mean(info["values"][prev_test_times[test]])
                                )
                            else:
                                prev_test_times[test] = time
                        else:
                            continue

                    gmean_ratios = gmean(ratios)
                    if np.isnan(gmean_ratios) and not np.isnan(prev_ratio):
                        all_ratios.append(prev_ratio)
                        continue
                    prev_ratio = gmean_ratios
                    all_ratios.append(gmean_ratios)

                new_ratios = []
                first_good = None
                for y in all_ratios:
                    if not np.isnan(y):
                        first_good = y
                        break
                for y in all_ratios:
                    if np.isnan(y) and first_good:
                        new_ratios.append(first_good)
                    else:
                        new_ratios.append(y)
                        first_good = None

                all_ratios = new_ratios
                all_ratios = np.asarray(all_ratios)

                plt.figure()
                plt.title(platform)
                plt.plot(
                    list(
                        (
                            (all_ratios - min(all_ratios))
                            / (max(all_ratios) - min(all_ratios))
                        )
                    ),
                    label="Ratios geomean",
                )
                # plt.show()

                # plt.figure()

                x = np.asarray(
                    [y for x, y in sorted(summarized_vals, key=lambda x: x[0])]
                )
                times = np.asarray(
                    [x for x, y in sorted(summarized_vals, key=lambda x: x[0])]
                )
                sorted_summary = (x - min(x)) / (max(x) - min(x))
                print(sorted_summary)
                print(platform)

                # break
                # plt.plot([i for i in range(len(sorted_summary))], [y for x, y in sorted_summary])
                plt.plot(sorted_summary, label="Geomean")

                plt.legend()
                plt.show()

                summary.setdefault(platform, {}).setdefault(variant, {})[app] = {
                    "tests": list(tests.keys()),
                    "values-gmean": sorted_summary,
                    "values-ratio": all_ratios,
                    "times": times,
                }