in crashes.py [0:0]
def processRedashDataset(dbFilename, jsonUrl, queryId, userKey, cacheValue, parameters, crashProcessMax):
props = list()
reports = dict()
totals = {
'processed': 0,
'skippedBadSig': 0,
'alreadyProcessed': 0,
'outdated': 0
}
# load up our database of processed crash ids
# returns an empty dict() if no data is loaded.
reports, stats = loadReports(dbFilename)
if LoadLocally:
with open(LocalJsonFile) as f:
dataset = json.load(f)
else:
with Spinner("loading from redash..."):
dataset = getRedashQueryResult(jsonUrl, queryId, userKey, cacheValue, parameters)
print(" done.")
crashesToProcess = len(dataset["query_result"]["data"]["rows"])
if crashesToProcess > crashProcessMax:
crashesToProcess = crashProcessMax
print('%04d total reports loaded.' % crashesToProcess)
for recrow in dataset["query_result"]["data"]["rows"]:
if totals['processed'] >= crashProcessMax:
break
# pull some redash props out of the recrow. You can add these
# by modifying the sql query.
operatingSystem = recrow['normalized_os']
operatingSystemVer = recrow['normalized_os_version']
firefoxVer = recrow['display_version']
buildId = recrow['build_id']
compositor = recrow['compositor']
arch = recrow['arch']
oomSize = recrow['oom_size']
devVendor = recrow['vendor']
devGen = recrow['gen']
devChipset = recrow['chipset']
devDevice = recrow['device']
drvVer = recrow['driver_version']
drvDate = recrow['driver_date']
clientId = recrow['client_id']
devDesc = recrow['device_description']
# Load the json crash payload from recrow
props = json.loads(recrow["payload"])
# touch up for the crash symbolication package
props['stackTraces'] = props['stack_traces']
crashId = props['crash_id']
crashDate = props['crash_date']
minidumpHash = props['minidump_sha256_hash']
crashReason = props['metadata']['moz_crash_reason']
crashInfo = props['stack_traces']['crash_info']
startupCrash = False
if recrow['startup_crash']:
startupCrash = int(recrow['startup_crash'])
fissionEnabled = False
if recrow['fission_enabled']:
fissionEnabled = int(recrow['fission_enabled'])
lockdownEnabled = False
if recrow['lockdown_enabled']:
lockdownVal = int(recrow['lockdown_enabled'])
if lockdownVal == 1:
lockdownEnabled = True
if crashReason != None:
crashReason = crashReason.strip('\n')
# Ignore crashes older than 7 days
if not checkCrashAge(crashDate):
totals['processed'] += 1
totals['outdated'] += 1
progress(totals['processed'], crashesToProcess)
continue
# check if the crash id is processed, if so continue
## note, this search has become quite slow. optimize me.
found = False
signature = ""
for sighash in reports: # reports is a dictionary of signature hashes
for report in reports[sighash]['reportList']: # reportList is a list of dictionaries
if report['crashid'] == crashId: # string compare, slow
found = True
# if you add a new value to the sql queries, you can update
# the local json cache we have in memory here. Saves having
# to delete the file and symbolicate everything again.
#report['fission'] = fissionEnabled
#report['lockdown'] = lockdownEnabled
break
if found:
totals['processed'] += 1
totals['alreadyProcessed'] += 1
progress(totals['processed'], crashesToProcess)
continue
# symbolicate and return payload result
payload = symbolicate({ "normalized_os": operatingSystem, "payload": props })
signature = generateSignature(payload)
if skipProcessSignature(signature):
totals['processed'] += 1
totals['skippedBadSig'] += 1
progress(totals['processed'], crashesToProcess)
continue
# pull stack information for the crashing thread
try:
crashingThreadIndex = payload['crashing_thread']
except KeyError:
#print("KeyError on crashing_thread for report");
continue
threads = payload['threads']
try:
frames = threads[crashingThreadIndex]['frames']
except IndexError:
print("IndexError while indexing crashing thread");
continue
except TypeError:
print("TypeError while indexing crashing thread");
continue
# build up a pretty stack
stack = processStack(frames)
# generate a tracking hash
hash = generateSignatureHash(signature, operatingSystem, operatingSystemVer, arch, firefoxVer)
if hash not in reports.keys():
# Set up this signature's meta data we track in the signature header.
reports[hash] = {
'signature': signature,
'operatingsystem': [operatingSystem],
'osversion': [operatingSystemVer],
'firefoxver': [firefoxVer],
'arch': [arch],
'reportList': list()
}
# Update meta data we track in the report header.
if operatingSystem not in reports[hash]['operatingsystem']:
reports[hash]['operatingsystem'].append(operatingSystem)
if operatingSystemVer not in reports[hash]['osversion']:
reports[hash]['osversion'].append(operatingSystemVer)
if firefoxVer not in reports[hash]['firefoxver']:
reports[hash]['firefoxver'].append(firefoxVer)
if arch not in reports[hash]['arch']:
reports[hash]['arch'].append(arch)
# create our report with per crash meta data
report = {
'clientid': clientId,
'crashid': crashId,
'crashdate': crashDate,
'compositor': compositor,
'stack': stack,
'oomsize': oomSize,
'type': crashInfo['type'],
'devvendor': devVendor,
'devgen': devGen,
'devchipset': devChipset,
'devdevice': devDevice,
'devdescription': devDesc,
'driverversion' : drvVer,
'driverdate': drvDate,
'minidumphash': minidumpHash,
'crashreason': crashReason,
'startup': startupCrash,
'fission': fissionEnabled,
'lockdown': lockdownEnabled,
# Duplicated but useful if we decide to change the hashing algo
# and need to reprocess reports.
'operatingsystem': operatingSystem,
'osversion': operatingSystemVer,
'firefoxver': firefoxVer,
'arch': arch
}
# save this crash in our report list
reports[hash]['reportList'].append(report)
if hash not in stats.keys():
stats[hash] = {
'signature': signature,
'crashdata': {}
}
# check to see if stats has a date entry that matches crashDate
if crashDate not in stats[hash]['crashdata']:
stats[hash]['crashdata'][crashDate] = { 'crashids': [], 'clientids':[] }
if operatingSystem not in stats[hash]['crashdata'][crashDate]:
stats[hash]['crashdata'][crashDate][operatingSystem] = {}
if operatingSystemVer not in stats[hash]['crashdata'][crashDate][operatingSystem]:
stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer] = {}
if arch not in stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer]:
stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer][arch] = {}
if firefoxVer not in stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer][arch]:
stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer][arch][firefoxVer] = { 'clientcount': 0, 'crashcount': 0 }
if crashId not in stats[hash]['crashdata'][crashDate]['crashids']:
stats[hash]['crashdata'][crashDate]['crashids'].append(crashId)
stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer][arch][firefoxVer]['crashcount'] += 1
if clientId not in stats[hash]['crashdata'][crashDate]['clientids']:
stats[hash]['crashdata'][crashDate][operatingSystem][operatingSystemVer][arch][firefoxVer]['clientcount'] += 1
stats[hash]['crashdata'][crashDate]['clientids'].append(clientId)
totals['processed'] += 1
progress(totals['processed'], crashesToProcess)
print('\n')
print('%04d - reports processed' % totals['processed'])
print('%04d - cached results' % totals['alreadyProcessed'])
print('%04d - reports skipped, bad signature' % totals['skippedBadSig'])
print('%04d - reports skipped, out dated' % totals['outdated'])
# Post processing steps
# Purge signatures from our reports list that are outdated (based
# on crash date and version). This keeps our crash lists current,
# especially after a merge. Note this doesn't clear stats, just reports.
queryFxVersion = parameters['version']
purgeOldReports(reports, queryFxVersion)
# purge old crash and client ids from the stats database.
cleanupStats(reports, stats)
# calculate unique client id counts for each signature. These are client counts
# associated with the current redash query, and apply only to a seven day time
# window. They are stored in the reports database and displayed in the top crash
# reports.
clientCounts = dict()
needsUpdate = False
for hash in reports:
clientCounts[hash] = list()
for report in reports[hash]['reportList']:
clientId = report['clientid']
if clientId not in clientCounts[hash]:
clientCounts[hash].append(clientId)
reports[hash]['clientcount'] = len(clientCounts[hash])
return reports, stats, totals['processed']