def add_derived_columns()

in mozetl/search/aggregates.py [0:0]


def add_derived_columns(exploded_search_counts):
    """Adds the following columns to the provided dataset:

    type:           One of 'in-content-sap', 'follow-on', 'chrome-sap',
                    'ad-click' or 'search-with-ads'.
    addon_version:  The version of the followon-search@mozilla addon, or None
    """
    udf_get_search_addon_version = udf(get_search_addon_version, StringType())

    def _generate_when_expr(source_mappings):
        if not source_mappings:
            return "unknown"
        source_mapping = source_mappings[0]
        return when(
            col("source").startswith(source_mapping[0]), source_mapping[1]
        ).otherwise(_generate_when_expr(source_mappings[1:]))

    when_expr = when(col("source").isin(SEARCH_SOURCE_WHITELIST), "sap").otherwise(
        when(col("source").isNull(), "sap").otherwise(
            _generate_when_expr(
                [
                    ("in-content:sap:", "tagged-sap"),
                    ("in-content:sap-follow-on:", "tagged-follow-on"),
                    ("in-content:organic:", "organic"),
                    ("sap:", "tagged-sap"),
                    ("follow-on:", "tagged-follow-on"),
                    ("ad-click:", "ad-click"),
                    ("search-with-ads:", "search-with-ads"),
                ]
            )
        )
    )

    return exploded_search_counts.withColumn("type", when_expr).withColumn(
        "addon_version", udf_get_search_addon_version("active_addons")
    )