in func/train.py [0:0]
def evaluate(
train_eval_op,
data_loaders: dict,
tb_writer,
logger,
epoch: float, # Can be a partial epoch
store=True,
store_endpoint='logits',
only_run_featext=False):
"""
Args:
data_loaders: A dict from key (name) to a data loader. Allows to
multiple dataloaders for testing on.
only_run_featext (bool): Set this to true and it will return after the
features are extracted and won't compute final numbers etc. So
it will never try to sync processes etc, which leads to crashes.
"""
all_metric_loggers = {}
final_accuracies = {}
for data_key, data_loader in data_loaders.items():
logger.info('Running evaluation for {0}{1}'.format(
DATASET_EVAL_CFG_KEY, data_key))
header = f'[{data_key}] Test:'
metric_logger = MetricLogger(delimiter=' ',
writer=tb_writer,
stat_set='val' + data_key,
logger=logger)
all_metric_loggers[data_key] = metric_logger
this_save_dir = RESULTS_SAVE_DIR + data_key + '/'
if not only_run_featext:
# Delete the stored output features files, since with H5 they
# might be getting appended and will blow up. Note that if
# feature extraction was the goal and we wanted to append,
# need to set in the config to not delete the old files so it
# can append to what has already been computed
logger.info('Clearing %s/%s/*', os.getcwd(), this_save_dir)
subprocess.call(f'rm -r {this_save_dir}/*', shell=True)
for data in metric_logger.log_every(data_loader, 2, header):
with torch.no_grad():
data, outputs, losses, accuracies = train_eval_op(
data, train_mode=False)
# Reduce the losses, since by default I no longer reduce the
# losses, to be able to store the outputs
losses_reduced = {
key: torch.mean(val)
for key, val in losses.items()
}
loss = torch.sum(torch.stack(list(losses_reduced.values())))
if store:
# allow to store logits and logits_regression if that's in too
all_logits = {
key: outputs[key].detach().cpu().numpy()
for key in outputs if key.startswith(store_endpoint)
}
all_logits.update({'idx': data['idx'].detach().cpu().numpy()})
uid_data = np.array(data['uid'])
# If strings, convert format to work with HDF5
if uid_data.dtype.kind == 'U':
# So that it can store upto 64 char strings -- will be
# used by the hdf5 too
assert int(uid_data.dtype.str[2:]) < STR_UID_MAXLEN, (
f'Make sure UID data is smaller than '
f'{STR_UID_MAXLEN}, or update that value of '
f'STR_UID_MAXLEN')
uid_data = uid_data.astype(f'S{STR_UID_MAXLEN}')
all_logits.update({'uid': uid_data})
# Storing the actual per batch/elt unreduced losses for
# potential analysis
all_logits.update({
'loss/' + key: val.detach().cpu()
for key, val in losses.items()
})
if not only_run_featext:
# store the targets as well
all_logits.update({
'target/' + key: val.detach().cpu().numpy()
for key, val in data['target'].items()
})
# Do the actual storage into HDF5s that can append to the
# stuff from previous batch. Doing it here rather than
# collecting (as I used to do) so that this can be used
# for feature extraction where storing into a list will
# be too expensive
all_logits.update({'epoch': np.array([epoch])})
store_append_h5(all_logits, this_save_dir)
# FIXME need to take into account that the datasets
# could have been padded in distributed setup
batch_size = data_loader.batch_size
metric_logger.update(loss=loss.item())
for acc_key, acc_val in accuracies.items():
metric_logger.meters[acc_key].update(acc_val.item(),
n=batch_size)
for loss_name, loss_val in losses_reduced.items():
metric_logger.meters[loss_name].update(loss_val.item(),
n=batch_size)
if not only_run_featext:
final_accuracies[data_key] = _evaluate_store_logs(
logger, metric_logger, accuracies.keys(), store, this_save_dir,
data_key, data_loader, epoch, losses_reduced.keys())
if only_run_featext:
# None of the rest is needed
return 0.0
# Return the accuracy on the main evaluation dataset, which must be the
# one which doesn't have any prefix (i.e. in the dataset_eval)
# Returning the accuracy metric that is most relevant to the dataset.
main_dataset_key = ''
main_metric = final_accuracies[main_dataset_key][
data_loaders[main_dataset_key].dataset.primary_metric]
return main_metric