def collapse_buckets()

in mozetl/hardware_report/summarize_json.py [0:0]


def collapse_buckets(aggregated_data, count_threshold):
    """Collapse uncommon configurations in generic groups to preserve privacy.

    This takes the dictionary of aggregated results from |aggregate_data| and collapses
    entries with a value less than |count_threshold| in a generic bucket.

    Args:
        aggregated_data: The object containing aggregated data.
        count_threhold: Groups (or "configurations") containing less than this value
        are collapsed in a generic bucket.

    """
    collapsed_groups = {}
    for k, v in aggregated_data.items():
        key_type = k[0]

        # If the resolution is 0x0 (see bug 1324014), put that into the "Other"
        # bucket.
        if key_type == "resolution" and k[1] == "0x0":
            other_key = ("resolution", "Other")
            collapsed_groups[other_key] = collapsed_groups.get(other_key, 0) + v
            continue

        # Don't clump this group into the "Other" bucket if it has enough
        # users it in.
        if v > count_threshold or key_type in EXCLUSION_LIST:
            collapsed_groups[k] = v
            continue

        # If we're here, it means that the key has not enough elements.
        # Fall through the next cases and try to group things together.
        new_group_key = "Other"

        # Let's try to group similar resolutions together.
        if key_type == "resolution":
            # Extract the resolution.
            [w, h] = k[1].split("x")
            # Round to the nearest hundred.
            w = int(round(int(w), -2))
            h = int(round(int(h), -2))
            # Build up a new key.
            new_group_key = "~" + str(w) + "x" + str(h)
        elif key_type == "os":
            [os, ver] = k[1].split("-", 1)
            new_group_key = os + "-" + "Other"

        # We don't have enough data for this particular group/configuration.
        # Aggregate it with the data in the "Other" bucket
        other_key = (k[0], new_group_key)
        collapsed_groups[other_key] = collapsed_groups.get(other_key, 0) + v

    # The previous grouping might have created additional groups. Let's check
    # again.
    final_groups = {}
    for k, v in collapsed_groups.items():
        # Don't clump this group into the "Other" bucket if it has enough
        # users it in.
        if (v > count_threshold and k[1] != "Other") or k[0] in EXCLUSION_LIST:
            final_groups[k] = v
            continue

        # We don't have enough data for this particular group/configuration.
        # Aggregate it with the data in the "Other" bucket
        other_key = (k[0], "Other")
        final_groups[other_key] = final_groups.get(other_key, 0) + v

    return final_groups