in glam/api/views.py [0:0]
def get_firefox_aggregations_from_bq(bqClient, request, req_data):
channel = req_data["channel"]
aggregation_level = req_data["aggregation_level"]
# Whether to pull aggregations by version or build_id.
if aggregation_level == "version":
build_id_filter = 'AND build_id = "*"'
# counts = _get_firefox_counts(channel, os, versions, by_build=False)
shas = {}
else:
build_id_filter = 'AND build_id != "*"'
# counts = _get_firefox_counts(channel, os, versions, by_build=True)
shas = _get_firefox_shas(channel)
labels_cache = caches["probe-labels"]
if labels_cache.get("__labels__") is None:
Probe.populate_labels_cache()
query_parameters = [
bigquery.ScalarQueryParameter("metric", "STRING", req_data["metric"]),
bigquery.ScalarQueryParameter("os", "STRING", req_data["os"]),
bigquery.ScalarQueryParameter(
"num_versions", "INT64", req_data["num_versions"]
),
]
process_filter = ""
if "process" in req_data:
query_parameters.append(
bigquery.ScalarQueryParameter("process", "STRING", req_data["process"])
)
process_filter = "AND process = @process"
table = f"glam_desktop_{channel}_aggregates"
query = f"""
WITH versions AS (
SELECT
ARRAY_AGG(DISTINCT version
ORDER BY
version DESC
LIMIT
@num_versions) AS selected_versions
FROM
`{GLAM_BQ_PROD_PROJECT}.glam_etl.{table}`
WHERE
metric = @metric
)
SELECT
* EXCEPT(selected_versions)
FROM
`{GLAM_BQ_PROD_PROJECT}.glam_etl.{table}`,
versions
WHERE
metric = @metric
AND os = @os
{process_filter}
{build_id_filter}
AND version IN UNNEST(versions.selected_versions)
"""
job_config = bigquery.QueryJobConfig(query_parameters=query_parameters)
with bqClient as client:
query_job = client.query(query, job_config=job_config)
response = []
for row in query_job:
data = {
"version": row.version,
"os": row.os,
"build_id": row.build_id,
"revision": shas.get(row.build_id, ""),
"process": row.process,
"metric": row.metric,
"metric_key": row.metric_key,
"metric_type": row.metric_type,
"total_users": (
int(row.total_users) if row.total_users else None
), # Casting, otherwise this BIGNUMERIC column is read as a string
"sample_count": (
int(row.total_sample) if row.total_sample else None
), # Casting, otherwise this BIGNUMERIC column is read as a string
"histogram": row.histogram and orjson.loads(row.histogram) or "",
"non_norm_histogram": row.non_norm_histogram
and orjson.loads(row.non_norm_histogram)
or "",
"percentiles": row.percentiles and orjson.loads(row.percentiles) or "",
"non_norm_percentiles": row.non_norm_percentiles
and orjson.loads(row.non_norm_percentiles)
or "",
}
if row.client_agg_type:
if row.metric_type == "boolean":
data["client_agg_type"] = "boolean-histogram"
else:
data["client_agg_type"] = row.client_agg_type
# Get the total distinct client IDs for this set of dimensions.
# data["total_addressable_market"] = counts.get(f"{row.version}-{row.build_id}")
response.append(data)
if response:
_log_probe_query(request)
return response