in mozetl/search/aggregates.py [0:0]
def agg_search_data(main_summary, grouping_cols, agg_functions):
"""Augment, Explode, and Aggregate search data
The augmented and exploded dataset has the same columns as main_summary
with the addition of the following:
engine: A key in the search_counts field representing a search engine.
e.g. 'hoolie'
source: A key in the search_counts field representing a search source
e.g. 'urlbar'
tagged-sap: Sum of all searches with partner codes from an SAP
tagged-follow-on: Sum of all searches with partner codes from a downstream query
sap: Sum of all searches originating from a direct user interaction with the Firefox UI
addon_version: The version of the followon-search@mozilla.com addon
"""
exploded = explode_search_counts(main_summary)
augmented = add_derived_columns(exploded)
# Do all aggregations
aggregated = augmented.groupBy(grouping_cols + ["type"]).agg(
*(agg_functions + [sum("count").alias("count")])
)
# Pivot on search type
pivoted = (
aggregated.groupBy(
[column for column in aggregated.columns if column not in ["type", "count"]]
)
.pivot(
"type",
[
"organic",
"tagged-sap",
"tagged-follow-on",
"sap",
"unknown",
"ad-click",
"search-with-ads",
],
)
.sum("count")
# Add convenience columns with underscores instead of hyphens.
# This makes the table easier to query from Presto.
.withColumn("tagged_sap", col("tagged-sap"))
.withColumn("tagged_follow_on", col("tagged-follow-on"))
.withColumn("ad_click", col("ad-click"))
.withColumn("search_with_ads", col("search-with-ads"))
)
return pivoted