in mozetl/addon_aggregates/addon_aggregates.py [0:0]
def add_addon_columns(df):
"""
Constructs additional indicator columns decribing the add-on/theme
present in a given record. The columns are
is_self_install
is_shield_addon
is_foreign_install
is_system
is_web_extension
Which maps True -> 1 and False -> 0
:param df: SparkDF, exploded on active_addons, each record
maps to a single add-on
:return df with the above columns added
"""
addons_expanded = (
df.withColumn(
"is_self_install",
fun.when(
(df.addon_id.isNotNull())
& (~df.is_system)
& (~df.foreign_install)
& (~df.addon_id.like("%mozilla%"))
& (~df.addon_id.like("%cliqz%"))
& (~df.addon_id.like("%@unified-urlbar%")),
1,
).otherwise(0),
)
.withColumn(
"is_shield_addon",
fun.when(df.addon_id.like("%@shield.mozilla%"), 1).otherwise(0),
)
.withColumn("is_foreign_install", fun.when(df.foreign_install, 1).otherwise(0))
.withColumn("is_system", fun.when(df.is_system, 1).otherwise(0))
.withColumn("is_web_extension", fun.when(df.is_web_extension, 1).otherwise(0))
)
return addons_expanded