in mozetl/search/aggregates.py [0:0]
def add_derived_columns(exploded_search_counts):
"""Adds the following columns to the provided dataset:
type: One of 'in-content-sap', 'follow-on', 'chrome-sap',
'ad-click' or 'search-with-ads'.
addon_version: The version of the followon-search@mozilla addon, or None
"""
udf_get_search_addon_version = udf(get_search_addon_version, StringType())
def _generate_when_expr(source_mappings):
if not source_mappings:
return "unknown"
source_mapping = source_mappings[0]
return when(
col("source").startswith(source_mapping[0]), source_mapping[1]
).otherwise(_generate_when_expr(source_mappings[1:]))
when_expr = when(col("source").isin(SEARCH_SOURCE_WHITELIST), "sap").otherwise(
when(col("source").isNull(), "sap").otherwise(
_generate_when_expr(
[
("in-content:sap:", "tagged-sap"),
("in-content:sap-follow-on:", "tagged-follow-on"),
("in-content:organic:", "organic"),
("sap:", "tagged-sap"),
("follow-on:", "tagged-follow-on"),
("ad-click:", "ad-click"),
("search-with-ads:", "search-with-ads"),
]
)
)
)
return exploded_search_counts.withColumn("type", when_expr).withColumn(
"addon_version", udf_get_search_addon_version("active_addons")
)