crashclouseau/update.py (149 lines of code) (raw):
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from datetime import datetime
from dateutil.relativedelta import relativedelta
from libmozdata import utils as lmdutils
import pytz
from .logger import logger
from .pushlog import pushlog
from . import datacollector as dc
from . import buildhub, config, inspector, models, utils, worker, patch
def put_build(buildid, product, channel, version, node=None):
"""Put a build in the database"""
buildid = utils.get_build_date(buildid)
if not node:
node = dc.get_changeset(buildid, channel, product)
nodeid = models.Node.get_id(node, channel)
models.Build.put_build(buildid, nodeid, product, channel, version)
def put_filelog(channel, start_date=None, end_date=None):
"""Get and put the filelog in the database"""
if not end_date:
end_date = pytz.utc.localize(datetime.utcnow())
if not start_date:
start_date = models.Node.get_max_date(channel)
start_date += relativedelta(seconds=1)
logger.info(
"Get pushlog data for {} ({} to {}): started".format(
channel, start_date, end_date
)
)
data = pushlog(start_date, end_date, channel=channel)
logger.info("Get pushlog data: retrieved")
min_date, _ = models.Changeset.add(data, end_date, channel)
logger.info("Get pushlog data: finished.")
return end_date
def put_report(uuid, buildid, channel, product, chgset):
"""Put a report in the database"""
if channel == "nightly":
mindate = buildid - relativedelta(days=config.get_ndays())
else:
mindate = models.Build.get_pushdate_before(buildid, channel, product)
mindate += relativedelta(seconds=1)
interesting_chgsets = set()
res = inspector.get_crash(
uuid,
buildid,
channel,
mindate,
chgset,
models.Changeset.find,
interesting_chgsets,
)
if res is None:
# 'json_dump' is not in crash data
return
useless = True
chgsets = models.Changeset.to_analyze(chgsets=interesting_chgsets, channel=channel)
for nodeid, node in chgsets:
data = patch.parse(node, channel=channel)
models.Changeset.add_analyzis(data, nodeid, channel)
frames = res.get("nonjava")
sh = jsh = ""
if frames:
sh = frames["hash"]
if not models.UUID.is_stackhash_existing(sh, buildid, channel, product, False):
models.CrashStack.put_frames(uuid, frames, False, commit=True)
useless = False
jframes = res.get("java")
if jframes:
jsh = jframes["hash"]
if not models.UUID.is_stackhash_existing(jsh, buildid, channel, product, True):
models.CrashStack.put_frames(uuid, jframes, True, commit=True)
useless = False
models.UUID.add_stack_hash(uuid, sh, jsh)
models.UUID.set_analyzed(uuid, useless)
def analyze_one_report(uuid=None):
"""Get a non-analyzed UUID in the database and analyze it"""
a = models.UUID.to_analyze(uuid)
if a:
try:
put_report(*a)
except Exception as e:
logger.error(e, exc_info=True)
models.UUID.set_error(a[0])
analyze_reports()
else:
analyze_patches()
def analyze_reports():
"""Analyze all the non-analyzed reports available in the database"""
queue = worker.get_queue()
if len(queue) <= 1:
queue.enqueue_call(func=analyze_one_report, result_ttl=0)
def analyze_one_patch():
"""Get a non-analyzed patch in the database and analyze it"""
nodeid, node, channel = models.Changeset.to_analyze()
if node:
try:
data = patch.parse(node, channel=channel)
models.Changeset.add_analyzis(data, nodeid, channel)
except Exception as e:
logger.error(e, exc_info=True)
analyze_patches()
def analyze_patches():
"""Analyze all the non-analyzed patches available in the database"""
queue = worker.get_queue()
if len(queue) <= 1:
queue.enqueue_call(func=analyze_one_patch, result_ttl=0)
def update_builds(date, channel, product):
"""Update the builds"""
logger.info("Update builds for {}/{}: started.".format(channel, product))
if not date:
_, date = models.LastDate.get(channel)
date -= relativedelta(days=config.get_ndays())
data = buildhub.get(date, channel, prods=product)
if data:
models.Build.put_data(data)
logger.info("Update builds: finished.")
def put_crashes(date, channel, product):
"""Get and put crashes data in the database"""
if not date:
date = pytz.utc.localize(datetime.utcnow())
data = dc.get_new_signatures(product, channel, date)
errors = set()
for sgn, i in data.items():
sgnid = None
for bid, protos in i["protos"].items():
bidid = models.Build.get_id(bid, channel, product)
if bidid is None:
errors.add(bid)
continue
if sgnid is None:
sgnid = models.Signature.get_id(sgn)
models.Stats.add(sgnid, bidid, i["bids"][bid], i["installs"][bid])
for proto in protos:
uuid = proto["uuid"]
proto_sgn = proto["proto"]
models.UUID.add(uuid, sgnid, proto_sgn, bidid, commit=False)
models.commit()
for bid in errors:
logger.info("No buildid in db for {}/{}/{}".format(bid, product, channel))
def update(date, channel, product, analyze=True):
"""Update all the data for a given date/channel/product"""
logger.info("Update data: started.")
put_filelog(channel)
if date:
date = lmdutils.get_date_ymd(date)
update_builds(date, channel, product)
try:
put_crashes(date, channel, product)
except Exception as e:
logger.error(e, exc_info=True)
if analyze:
analyze_reports()
logger.info("Update data: finished.")
def update_in_queue(channel, product, date=None):
"""Update in the queue"""
queue = worker.get_queue()
queue.enqueue_call(func=update, args=(date, channel, product), result_ttl=0)
def update_all(
products=config.get_products(), channels=config.get_channels(), date=None
):
"""Update all"""
for product in products:
for channel in channels:
update_in_queue(channel, product)