def aggregate_data()

in mozetl/hardware_report/summarize_json.py [0:0]


def aggregate_data(processed_data):
    """Return aggregated data."""

    def seq(acc, v):
        # The dimensions over which we want to aggregate the different values.
        keys_to_aggregate = [
            "browser_arch",
            "cpu_cores",
            "cpu_cores_speed",
            "cpu_vendor",
            "cpu_speed",
            "gfx0_vendor_name",
            "gfx0_model",
            "resolution",
            "memory_gb",
            "os",
            "os_arch",
            "has_flash",
        ]

        for key_name in keys_to_aggregate:
            # We want to know how many users have a particular configuration (e.g. using a
            # particular cpu vendor). For each dimension of interest, build a key as
            # (hw, value) and count its occurrences among the user base.
            acc_key = (key_name, v[key_name])
            acc[acc_key] = acc.get(acc_key, 0) + 1

        return acc

    def cmb(v1, v2):
        # Combine the counts from the two partial dictionaries. Hacky?
        return {k: v1.get(k, 0) + v2.get(k, 0) for k in set(v1) | set(v2)}

    return processed_data.aggregate({}, seq, cmb)