in crashclouseau/datacollector.py [0:0]
def get_new_signatures(product, channel, date):
"""Get the new signatures. In nightly that means that we collect
only signatures with no crashes in last few days"""
limit = config.get_limit_facets()
bids, search_date = get_builds(product, channel, date)
if not bids:
logger.warning("No buildids for {}-{}.".format(product, channel))
return {}
base = {}
for bid in bids:
bid = utils.get_build_date(bid)
day = datetime(bid.year, bid.month, bid.day)
if day not in base:
base[day] = {"installs": {}, "bids": {}, "count": 0}
base[day]["bids"][bid] = 0
logger.info("Get crash numbers for {}-{}: started.".format(product, channel))
def handler(base, json, data):
if json["errors"]:
raise Exception(
"Error in json data from SuperSearch: {}".format(json["errors"])
)
if not json["facets"]["signature"]:
return
for facets in json["facets"]["signature"]:
installs = facets["facets"]["cardinality_install_time"]["value"]
sgn = facets["term"]
bid_info = facets["facets"]["build_id"][0]
count = bid_info["count"]
bid = bid_info["term"]
bid = utils.get_build_date(bid)
day = datetime(bid.year, bid.month, bid.day)
if sgn in data:
numbers = data[sgn]
else:
data[sgn] = numbers = copy.deepcopy(base)
numbers[day]["count"] += count
numbers[day]["bids"][bid] = count
numbers[day]["installs"][bid] = 1 if installs == 0 else installs
del json
params = {
"product": product,
"release_channel": utils.get_search_channel(channel),
"date": search_date,
"build_id": "",
"_aggs.signature": ["build_id", "_cardinality.install_time"],
"_results_number": 0,
"_facets": "release_channel",
"_facets_size": limit,
}
data = {}
hdler = functools.partial(handler, base)
for bid in bids:
params["build_id"] = bid
socorro.SuperSearch(params=params, handler=hdler, handlerdata=data).wait()
shift = config.get_ndays() if channel == "nightly" else 1
threshold = config.get_threshold("installs", product, channel)
big_data = {}
small_data = {}
for sgn, numbers in data.items():
bids, big = utils.get_new_crashing_bids(numbers, shift, threshold)
if bids:
d = {
"bids": bids,
"protos": {b: [] for b in bids},
"installs": {b: 0 for b in bids},
}
if big:
big_data[sgn] = d
else:
small_data[sgn] = d
else:
data[sgn] = None
del data
logger.info("Get crash numbers for {}-{}: finished.".format(product, channel))
if big_data:
get_proto_big(product, big_data, search_date, channel)
if small_data:
get_proto_small(product, small_data, search_date, channel)
small_data.update(big_data)
data = small_data
if product == "Fennec":
# Java crashes don't have any proto-signature...
get_uuids_fennec(data, search_date, channel)
return data