def get_firefox_aggregations_from_bq()

in glam/api/views.py [0:0]


def get_firefox_aggregations_from_bq(bqClient, request, req_data):
    channel = req_data["channel"]
    aggregation_level = req_data["aggregation_level"]
    # Whether to pull aggregations by version or build_id.
    if aggregation_level == "version":
        build_id_filter = 'AND build_id = "*"'
        # counts = _get_firefox_counts(channel, os, versions, by_build=False)
        shas = {}
    else:
        build_id_filter = 'AND build_id != "*"'
        # counts = _get_firefox_counts(channel, os, versions, by_build=True)
        shas = _get_firefox_shas(channel)

    labels_cache = caches["probe-labels"]
    if labels_cache.get("__labels__") is None:
        Probe.populate_labels_cache()

    query_parameters = [
        bigquery.ScalarQueryParameter("metric", "STRING", req_data["metric"]),
        bigquery.ScalarQueryParameter("os", "STRING", req_data["os"]),
        bigquery.ScalarQueryParameter(
            "num_versions", "INT64", req_data["num_versions"]
        ),
    ]

    process_filter = ""
    if "process" in req_data:
        query_parameters.append(
            bigquery.ScalarQueryParameter("process", "STRING", req_data["process"])
        )
        process_filter = "AND process = @process"

    table = f"glam_desktop_{channel}_aggregates"
    query = f"""
        WITH versions AS (
            SELECT
                ARRAY_AGG(DISTINCT version
                ORDER BY
                version DESC
                LIMIT
                @num_versions) AS selected_versions
            FROM
                `{GLAM_BQ_PROD_PROJECT}.glam_etl.{table}`
            WHERE
                metric = @metric
            )
            SELECT
            * EXCEPT(selected_versions)
            FROM
                `{GLAM_BQ_PROD_PROJECT}.glam_etl.{table}`,
                versions
            WHERE
                metric = @metric
                AND os = @os
                {process_filter}
                {build_id_filter}
                AND version IN UNNEST(versions.selected_versions)
    """

    job_config = bigquery.QueryJobConfig(query_parameters=query_parameters)
    with bqClient as client:
        query_job = client.query(query, job_config=job_config)

    response = []

    for row in query_job:

        data = {
            "version": row.version,
            "os": row.os,
            "build_id": row.build_id,
            "revision": shas.get(row.build_id, ""),
            "process": row.process,
            "metric": row.metric,
            "metric_key": row.metric_key,
            "metric_type": row.metric_type,
            "total_users": (
                int(row.total_users) if row.total_users else None
            ),  # Casting, otherwise this BIGNUMERIC column is read as a string
            "sample_count": (
                int(row.total_sample) if row.total_sample else None
            ),  # Casting, otherwise this BIGNUMERIC column is read as a string
            "histogram": row.histogram and orjson.loads(row.histogram) or "",
            "non_norm_histogram": row.non_norm_histogram
            and orjson.loads(row.non_norm_histogram)
            or "",
            "percentiles": row.percentiles and orjson.loads(row.percentiles) or "",
            "non_norm_percentiles": row.non_norm_percentiles
            and orjson.loads(row.non_norm_percentiles)
            or "",
        }
        if row.client_agg_type:
            if row.metric_type == "boolean":
                data["client_agg_type"] = "boolean-histogram"
            else:
                data["client_agg_type"] = row.client_agg_type

        # Get the total distinct client IDs for this set of dimensions.
        # data["total_addressable_market"] = counts.get(f"{row.version}-{row.build_id}")

        response.append(data)

    if response:
        _log_probe_query(request)

    return response