in mozetl/hardware_report/summarize_json.py [0:0]
def collapse_buckets(aggregated_data, count_threshold):
"""Collapse uncommon configurations in generic groups to preserve privacy.
This takes the dictionary of aggregated results from |aggregate_data| and collapses
entries with a value less than |count_threshold| in a generic bucket.
Args:
aggregated_data: The object containing aggregated data.
count_threhold: Groups (or "configurations") containing less than this value
are collapsed in a generic bucket.
"""
collapsed_groups = {}
for k, v in aggregated_data.items():
key_type = k[0]
# If the resolution is 0x0 (see bug 1324014), put that into the "Other"
# bucket.
if key_type == "resolution" and k[1] == "0x0":
other_key = ("resolution", "Other")
collapsed_groups[other_key] = collapsed_groups.get(other_key, 0) + v
continue
# Don't clump this group into the "Other" bucket if it has enough
# users it in.
if v > count_threshold or key_type in EXCLUSION_LIST:
collapsed_groups[k] = v
continue
# If we're here, it means that the key has not enough elements.
# Fall through the next cases and try to group things together.
new_group_key = "Other"
# Let's try to group similar resolutions together.
if key_type == "resolution":
# Extract the resolution.
[w, h] = k[1].split("x")
# Round to the nearest hundred.
w = int(round(int(w), -2))
h = int(round(int(h), -2))
# Build up a new key.
new_group_key = "~" + str(w) + "x" + str(h)
elif key_type == "os":
[os, ver] = k[1].split("-", 1)
new_group_key = os + "-" + "Other"
# We don't have enough data for this particular group/configuration.
# Aggregate it with the data in the "Other" bucket
other_key = (k[0], new_group_key)
collapsed_groups[other_key] = collapsed_groups.get(other_key, 0) + v
# The previous grouping might have created additional groups. Let's check
# again.
final_groups = {}
for k, v in collapsed_groups.items():
# Don't clump this group into the "Other" bucket if it has enough
# users it in.
if (v > count_threshold and k[1] != "Other") or k[0] in EXCLUSION_LIST:
final_groups[k] = v
continue
# We don't have enough data for this particular group/configuration.
# Aggregate it with the data in the "Other" bucket
other_key = (k[0], "Other")
final_groups[other_key] = final_groups.get(other_key, 0) + v
return final_groups