in mozetl/taar/taar_ensemble.py [0:0]
def get_df(spark, date_from):
df = (
spark.sql("SELECT * FROM clients_daily")
.where("active_addons IS NOT null")
.where("size(active_addons) > 2")
.where("size(active_addons) < 100")
.where("channel = 'release'")
.where("app_name = 'Firefox'")
.where("submission_date_s3 >= {}".format(date_from))
.selectExpr(
"client_id as client_id",
"active_addons as active_addons",
"city as geo_city",
"subsession_hours_sum as subsession_length",
"locale as locale",
"os as os",
"row_number() OVER (PARTITION BY client_id ORDER BY submission_date_s3 desc) as rn",
"places_bookmarks_count_mean AS bookmark_count",
"scalar_parent_browser_engagement_tab_open_event_count_sum AS tab_open_count",
"scalar_parent_browser_engagement_total_uri_count_sum AS total_uri",
"scalar_parent_browser_engagement_unique_domains_count_max AS unique_tlds",
)
.where("rn = 1")
.drop("rn")
)
return df