def agg_search_data()

in mozetl/search/aggregates.py [0:0]


def agg_search_data(main_summary, grouping_cols, agg_functions):
    """Augment, Explode, and Aggregate search data

    The augmented and exploded dataset has the same columns as main_summary
    with the addition of the following:

        engine: A key in the search_counts field representing a search engine.
                e.g. 'hoolie'
        source: A key in the search_counts field representing a search source
                e.g. 'urlbar'
        tagged-sap: Sum of all searches with partner codes from an SAP
        tagged-follow-on: Sum of all searches with partner codes from a downstream query
        sap: Sum of all searches originating from a direct user interaction with the Firefox UI
        addon_version: The version of the followon-search@mozilla.com addon
    """

    exploded = explode_search_counts(main_summary)
    augmented = add_derived_columns(exploded)

    # Do all aggregations
    aggregated = augmented.groupBy(grouping_cols + ["type"]).agg(
        *(agg_functions + [sum("count").alias("count")])
    )

    # Pivot on search type
    pivoted = (
        aggregated.groupBy(
            [column for column in aggregated.columns if column not in ["type", "count"]]
        )
        .pivot(
            "type",
            [
                "organic",
                "tagged-sap",
                "tagged-follow-on",
                "sap",
                "unknown",
                "ad-click",
                "search-with-ads",
            ],
        )
        .sum("count")
        # Add convenience columns with underscores instead of hyphens.
        # This makes the table easier to query from Presto.
        .withColumn("tagged_sap", col("tagged-sap"))
        .withColumn("tagged_follow_on", col("tagged-follow-on"))
        .withColumn("ad_click", col("ad-click"))
        .withColumn("search_with_ads", col("search-with-ads"))
    )

    return pivoted