def get_new_signatures()

in crashclouseau/datacollector.py [0:0]


def get_new_signatures(product, channel, date):
    """Get the new signatures. In nightly that means that we collect
    only signatures with no crashes in last few days"""

    limit = config.get_limit_facets()
    bids, search_date = get_builds(product, channel, date)
    if not bids:
        logger.warning("No buildids for {}-{}.".format(product, channel))
        return {}

    base = {}
    for bid in bids:
        bid = utils.get_build_date(bid)
        day = datetime(bid.year, bid.month, bid.day)
        if day not in base:
            base[day] = {"installs": {}, "bids": {}, "count": 0}
        base[day]["bids"][bid] = 0

    logger.info("Get crash numbers for {}-{}: started.".format(product, channel))

    def handler(base, json, data):
        if json["errors"]:
            raise Exception(
                "Error in json data from SuperSearch: {}".format(json["errors"])
            )
        if not json["facets"]["signature"]:
            return
        for facets in json["facets"]["signature"]:
            installs = facets["facets"]["cardinality_install_time"]["value"]
            sgn = facets["term"]
            bid_info = facets["facets"]["build_id"][0]
            count = bid_info["count"]
            bid = bid_info["term"]
            bid = utils.get_build_date(bid)
            day = datetime(bid.year, bid.month, bid.day)
            if sgn in data:
                numbers = data[sgn]
            else:
                data[sgn] = numbers = copy.deepcopy(base)
            numbers[day]["count"] += count
            numbers[day]["bids"][bid] = count
            numbers[day]["installs"][bid] = 1 if installs == 0 else installs
        del json

    params = {
        "product": product,
        "release_channel": utils.get_search_channel(channel),
        "date": search_date,
        "build_id": "",
        "_aggs.signature": ["build_id", "_cardinality.install_time"],
        "_results_number": 0,
        "_facets": "release_channel",
        "_facets_size": limit,
    }

    data = {}
    hdler = functools.partial(handler, base)
    for bid in bids:
        params["build_id"] = bid
        socorro.SuperSearch(params=params, handler=hdler, handlerdata=data).wait()

    shift = config.get_ndays() if channel == "nightly" else 1
    threshold = config.get_threshold("installs", product, channel)
    big_data = {}
    small_data = {}

    for sgn, numbers in data.items():
        bids, big = utils.get_new_crashing_bids(numbers, shift, threshold)
        if bids:
            d = {
                "bids": bids,
                "protos": {b: [] for b in bids},
                "installs": {b: 0 for b in bids},
            }
            if big:
                big_data[sgn] = d
            else:
                small_data[sgn] = d
        else:
            data[sgn] = None

    del data

    logger.info("Get crash numbers for {}-{}: finished.".format(product, channel))
    if big_data:
        get_proto_big(product, big_data, search_date, channel)

    if small_data:
        get_proto_small(product, small_data, search_date, channel)

    small_data.update(big_data)
    data = small_data

    if product == "Fennec":
        # Java crashes don't have any proto-signature...
        get_uuids_fennec(data, search_date, channel)

    return data