def summarize()

in pageload-summary/summarize_testing.py [0:0]


def summarize(data, platforms):
    org_data = organize_data(data, platforms)

    summary = {}
    for platform, apps in org_data.items():

        for app, variants in apps.items():

            for variant, pl_types in variants.items():

                for pl_type, tests in pl_types.items():

                    platform_summary = {"tests": list(tests.keys()), "values": []}
                    # Get all the push times
                    all_push_times = []
                    for _, info in tests.items():
                        print(info)
                        all_push_times.extend(list(info["values"].keys()))
                    all_push_times = list(set(all_push_times))

                    all_push_times = temporal_aggregation(all_push_times, 24)

                    print(all_push_times)

                    # Get a summary value for each push time
                    summarized_vals = []
                    tests_per_val = {}
                    prev_time = None
                    prev_test_times = {}
                    for c, times in enumerate(sorted(all_push_times)):

                        vals = {}
                        for time in times:
                            if not prev_time:
                                prev_time = time

                            good = True
                            testsc = []
                            testsg = []
                            for test, info in tests.items():
                                if time not in info["values"]:
                                    good = False
                                    testsc.append(test)
                                    continue
                                if test not in prev_test_times:
                                    prev_test_times[test] = time

                                vals.setdefault(test, []).extend(info["values"][time])
                                testsg.append(
                                    (
                                        test,
                                        time,
                                        prev_test_times[test],
                                        np.mean(info["values"][time]),
                                        np.mean(info["values"][prev_test_times[test]]),
                                    )
                                )

                                prev_test_times[test] = time

                            if not good:
                                print(
                                    f"Tests which failed and prevent a summary at time {time}:",
                                    testsc,
                                )

                        vals = [np.mean(v) for _, v in vals.items()]
                        summarized_vals.append((times[-1], gmean(np.asarray(vals))))

                        tests_per_val[str(c)] = {
                            "good": testsg,
                            "bad": testsc,
                            "vals": vals,
                        }

                        prev_time = time
                    """
                                "wikia-firefox-cold-webrender",
                                "espn-firefox-cold-webrender",
                                "cnn-firefox-cold-webrender",
                                "nytimes-firefox-cold-webrender",
                                "buzzfeed-firefox-cold-webrender",
                                "expedia-firefox-cold-webrender"
                    """

                    """
                                "wikia-firefox-cold-webrender",
                                "espn-firefox-cold-webrender",
                                "cnn-firefox-cold-webrender",
                                "nytimes-firefox-cold-webrender",
                                "buzzfeed-firefox-cold-webrender",
                                "expedia-firefox-cold-webrender"
                    """

                    import json

                    print("hereeee")
                    print(json.dumps(tests_per_val, indent=4))

                    # # Get the ratios over time
                    # prev_test_times = {}
                    # all_ratios = []
                    # prev_ratio = np.nan

                    # for time in sorted(all_push_times):

                    #     ratios = []
                    #     for test, info in tests.items():
                    #         if info["values"].get(time, None):
                    #             if prev_test_times.get(test, None):
                    #                 ratios.append(
                    #                     np.mean(
                    #                         info["values"][time]
                    #                     ) / np.mean(
                    #                         info["values"][prev_test_times[test]]
                    #                     )
                    #                 )
                    #             else:
                    #                 prev_test_times[test] = time
                    #         else:
                    #             continue

                    #     gmean_ratios = gmean(ratios)
                    #     if np.isnan(gmean_ratios) and not np.isnan(prev_ratio):
                    #         all_ratios.append(prev_ratio)
                    #         continue
                    #     prev_ratio = gmean_ratios
                    #     all_ratios.append(gmean_ratios)

                    # new_ratios = []
                    # first_good = None
                    # for y in all_ratios:
                    #     if not np.isnan(y):
                    #         first_good = y
                    #         break
                    # for y in all_ratios:
                    #     if np.isnan(y) and first_good:
                    #         new_ratios.append(first_good)
                    #     else:
                    #         new_ratios.append(y)
                    #         first_good = None

                    # all_ratios = new_ratios
                    # all_ratios = np.asarray(all_ratios)

                    # plt.figure()
                    variant = variant if variant != "None" else "e10s"
                    plt.title(platform + f"\n{app}-{pl_type}-{variant}")
                    # plt.plot(list(((all_ratios-min(all_ratios))/(max(all_ratios)-min(all_ratios)))), label="Ratios geomean")
                    # plt.show()

                    # plt.figure()

                    x = np.asarray(
                        [y for x, y in sorted(summarized_vals, key=lambda x: x[0])]
                    )
                    times = np.asarray(
                        [x for x, y in sorted(summarized_vals, key=lambda x: x[0])]
                    )
                    sorted_summary = (x - min(x)) / (max(x) - min(x))
                    print(sorted_summary)
                    print(platform)
                    print(variant)

                    # break
                    # plt.plot([i for i in range(len(sorted_summary))], [y for x, y in sorted_summary])
                    plt.plot(x, label="Geomean")

                    plt.legend()
                    plt.show()

                    summary.setdefault(platform, {}).setdefault(variant, {}).setdefault(
                        app, {}
                    )[pl_type] = {
                        "tests": list(tests.keys()),
                        "values-gmean": x,
                        "times": times,
                    }