in detectron/datasets/task_evaluation.py [0:0]
def check_expected_results(results, atol=0.005, rtol=0.1):
"""Check actual results against expected results stored in
cfg.EXPECTED_RESULTS. Optionally email if the match exceeds the specified
tolerance.
Expected results should take the form of a list of expectations, each
specified by four elements: [dataset, task, metric, expected value]. For
example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...].
The expected value may also be formatted as a list [mean, std] providing
an empirical mean and standard deviation from which a valid range is computed
using cfg.EXPECTED_RESULTS_SIGMA_TOL. For example:
[['coco_2014_minival', 'box_proposal', 'AR@1000', [0.387, 0.001]], ...]
"""
# cfg contains a reference set of results that we want to check against
if len(cfg.EXPECTED_RESULTS) == 0:
return
for dataset, task, metric, expected_val in cfg.EXPECTED_RESULTS:
assert dataset in results, 'Dataset {} not in results'.format(dataset)
assert task in results[dataset], 'Task {} not in results'.format(task)
assert metric in results[dataset][task], \
'Metric {} not in results'.format(metric)
actual_val = results[dataset][task][metric]
ok = False
if isinstance(expected_val, list):
assert len(expected_val) == 2, (
'Expected result must be in (mean, std) format'
)
mean, std = expected_val
lo = mean - cfg.EXPECTED_RESULTS_SIGMA_TOL * std
hi = mean + cfg.EXPECTED_RESULTS_SIGMA_TOL * std
ok = (lo < actual_val) and (actual_val < hi)
msg = (
'{} > {} > {} sanity check (actual vs. expected): '
'{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})'
).format(dataset, task, metric, actual_val, mean, std, lo, hi)
else:
err = abs(actual_val - expected_val)
tol = atol + rtol * abs(expected_val)
ok = (err > tol)
msg = (
'{} > {} > {} sanity check (actual vs. expected): '
'{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}'
).format(dataset, task, metric, actual_val, expected_val, err, tol)
if not ok:
msg = 'FAIL: ' + msg
logger.error(msg)
if cfg.EXPECTED_RESULTS_EMAIL != '':
subject = 'Detectron end-to-end test failure'
job_name = os.environ[
'DETECTRON_JOB_NAME'
] if 'DETECTRON_JOB_NAME' in os.environ else '<unknown>'
job_id = os.environ[
'WORKFLOW_RUN_ID'
] if 'WORKFLOW_RUN_ID' in os.environ else '<unknown>'
body = [
'Name:',
job_name,
'Run ID:',
job_id,
'Failure:',
msg,
'Config:',
pprint.pformat(cfg),
'Env:',
pprint.pformat(dict(os.environ)),
]
send_email(
subject, '\n\n'.join(body), cfg.EXPECTED_RESULTS_EMAIL
)
else:
msg = 'PASS: ' + msg
logger.info(msg)