def get_proto_small()

in crashclouseau/datacollector.py [0:0]


def get_proto_small(product, signatures, search_date, channel):
    """Get the proto-signatures for signature with a small number of crashes.
    Since we 'must' aggregate uuid on proto-signatures, to be faster we query
    several signatures: it's possible because we know that card(proto) <= card(crashes)
    for a given signature."""
    logger.info(
        "Get proto-signatures (small) for {}-{}: started.".format(product, channel)
    )

    def handler(bid, threshold, json, data):
        if not json["facets"]["proto_signature"]:
            return
        for facets in json["facets"]["proto_signature"]:
            _facets = facets["facets"]
            sgn = _facets["signature"][0]["term"]
            protos = data[sgn]["protos"][bid]
            if len(protos) < threshold:
                proto = facets["term"]
                count = facets["count"]
                uuid = _facets["uuid"][0]["term"]
                protos.append({"proto": proto, "count": count, "uuid": uuid})
        for facets in json["facets"]["signature"]:
            sgn = facets["term"]
            count = facets["facets"]["cardinality_install_time"]["value"]
            data[sgn]["installs"][bid] = 1 if count == 0 else count

    limit = config.get_limit_facets()
    threshold = config.get_threshold("protos", product, channel)
    base_params = {
        "product": product,
        "release_channel": utils.get_search_channel(channel),
        "date": search_date,
        "build_id": "",
        "signature": "",
        "_aggs.proto_signature": ["uuid", "signature"],
        "_aggs.signature": "_cardinality.install_time",
        "_results_number": 0,
        "_facets": "release_channel",
        "_facets_size": limit,
    }

    sgns_by_bids = utils.get_sgns_by_bids(signatures)
    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params["build_id"] = utils.get_buildid(bid)
        queries = []
        hdler = functools.partial(handler, bid, threshold)
        for sgns in Connection.chunks(all_signatures, 5):
            params = copy.deepcopy(params)
            params["signature"] = ["=" + s for s in sgns]
            queries.append(
                Query(
                    socorro.SuperSearch.URL,
                    params=params,
                    handler=hdler,
                    handlerdata=signatures,
                )
            )

        socorro.SuperSearch(queries=queries).wait()

    logger.info(
        "Get proto-signatures (small) for {}-{}: finished.".format(product, channel)
    )