in sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_histogram_aggregates_v1.sql.py [0:0]
def get_histogram_probes_and_buckets(histogram_type, processes_to_output):
"""Return relevant histogram probes."""
project = "moz-fx-data-shared-prod"
main_summary_histograms = {}
client = bigquery.Client(project)
table = client.get_table("telemetry_stable.main_v5")
main_summary_schema = [field.to_api_repr() for field in table.schema]
# Fetch the histograms field
histograms_field = []
for field in main_summary_schema:
if field["name"] != "payload":
continue
for payload_field in field["fields"]:
if payload_field["name"] == histogram_type:
histograms_field.append(
{"histograms": payload_field, "process": "parent"}
)
continue
if payload_field["name"] == "processes":
for processes_field in payload_field["fields"]:
if processes_field["name"] in ["content", "gpu"]:
process_field = processes_field["name"]
for type_field in processes_field["fields"]:
if type_field["name"] == histogram_type:
histograms_field.append(
{"histograms": type_field, "process": process_field}
)
break
if len(histograms_field) == 0:
return
for histograms_and_process in histograms_field:
for histogram in histograms_and_process["histograms"].get("fields", {}):
if "name" not in histogram:
continue
processes = main_summary_histograms.setdefault(histogram["name"], set())
if (
processes_to_output is None
or histograms_and_process["process"] in processes_to_output
):
processes.add(histograms_and_process["process"])
main_summary_histograms[histogram["name"]] = processes
with urllib.request.urlopen(PROBE_INFO_SERVICE) as url:
data = json.loads(url.read())
excluded_probes = probe_filters.get_etl_excluded_probes_quickfix("desktop")
histogram_probes = {
x.replace("histogram/", "").replace(".", "_").lower()
for x in data.keys()
if x.startswith("histogram/")
}
bucket_details = {}
relevant_probes = {
histogram: {"processes": process}
for histogram, process in main_summary_histograms.items()
if histogram in histogram_probes and histogram not in excluded_probes
}
for key in data.keys():
if not key.startswith("histogram/"):
continue
channel = "nightly"
if "nightly" not in data[key]["history"]:
channel = "beta"
if "beta" not in data[key]["history"]:
channel = "release"
data_details = data[key]["history"][channel][0]["details"]
probe = key.replace("histogram/", "").replace(".", "_").lower()
# Some keyed GPU metrics aren't correctly flagged as type
# "keyed_histograms", so we filter those out here.
if processes_to_output is None or "gpu" in processes_to_output:
if data_details["keyed"] == (histogram_type == "histograms"):
try:
del relevant_probes[probe]
except KeyError:
pass
continue
if probe in relevant_probes:
relevant_probes[probe]["type"] = data_details["kind"]
# NOTE: some probes, (e.g. POPUP_NOTIFICATION_MAINACTION_TRIGGERED_MS) have values
# in the probe info service like 80 * 25 for the value of n_buckets.
# So they do need to be evaluated as expressions.
bucket_details[probe] = {
"n_buckets": int(eval(str(data_details["n_buckets"]))),
"min": int(eval(str(data_details["low"]))),
"max": int(eval(str(data_details["high"]))),
}
return {"probes": relevant_probes, "buckets": bucket_details}