def _get_fx_most_used_probes()

in glam/api/views.py [0:0]


def _get_fx_most_used_probes(days=30, limit=9):
    """Retrieve most used probes from BQ

    :param int days: The amount of days to consider for recency
    :param int limit: The max amount of probes to retrieve
    """

    dimensions = []
    today = datetime.now()
    min_date = today - timedelta(days=days)
    dimensions.append(Q(timestamp__gte=min_date))
    dimensions.append(Q(timestamp__lte=today))

    result = UsageInstrumentation.objects.filter(*dimensions)
    most_used_probes = (
        result.values("probe_name")
        .annotate(total=Count("*"))
        .order_by(
            "-total",
        )
    )
    probe_names = [f'{p["probe_name"]}' for p in most_used_probes]

    legacy_table_name = (
        f"`{GLAM_BQ_PROD_PROJECT}.glam_etl.glam_desktop_nightly_aggregates`"
    )

    query = f"""
        WITH fx_metrics AS (
            SELECT
                metric,
                metric_key,
                metric_type,
                version,
                os,
                build_id,
                histogram
            FROM
                {legacy_table_name}),
        selected_version AS (
            SELECT
                ARRAY_AGG(DISTINCT version
                    ORDER BY version DESC
                    LIMIT 2)[SAFE_OFFSET(1)] # Second most recent version has more data
                AS v
            FROM
                fx_metrics
            WHERE
                version < 1000) # Avoids the 1024 version
        SELECT
            distinct metric,
            ARRAY_AGG(histogram)[SAFE_OFFSET(0)] AS histogram
        FROM
            fx_metrics,
            selected_version
        WHERE
            build_id='*'
            AND os='*'
            AND metric_key=''
            AND metric_type NOT IN ('boolean',
                'histogram-boolean',
                'scalar')
            AND metric IN UNNEST(@probe_names)
            AND version = selected_version.v
        GROUP BY metric
        LIMIT {limit}"""

    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ArrayQueryParameter("probe_names", "STRING", probe_names),
        ]
    )

    with bigquery.Client() as client:
        aggs = client.query(query, job_config=job_config)
    return aggs