def get_addons_per_client()

in mozetl/taar/taar_similarity.py [0:0]


def get_addons_per_client(users_df, addon_whitelist, minimum_addons_count):
    """Extracts a DataFrame that contains one row
    for each client along with the list of active add-on GUIDs.
    """

    def is_valid_addon(guid, addon):
        return not (
            addon.is_system
            or addon.app_disabled
            or addon.type != "extension"
            or addon.user_disabled
            or addon.foreign_install
            or guid not in addon_whitelist
        )

    # Create an add-ons dataset un-nesting the add-on map from each
    # user to a list of add-on GUIDs. Also filter undesired add-ons.

    # Note that this list comprehension was restructured
    # from the original longitudinal query.  In particular, note that
    # each client's 'active_addons' entry is a list containing the
    # a dictionary of {addon_guid: {addon_metadata_dict}}

    def flatten_valid_guid_generator(p):
        for data in p["active_addons"]:
            addon_guid = data["addon_id"]
            if not is_valid_addon(addon_guid, data):
                continue
            yield addon_guid

    return (
        users_df.rdd.map(
            lambda p: (p["client_id"], list(flatten_valid_guid_generator(p)))
        )
        .filter(lambda p: len(p[1]) > minimum_addons_count)
        .toDF(["client_id", "addon_ids"])
    )