in tools/train_net.py [0:0]
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None):
"""
Evaluate the model on the val set.
Args:
val_loader (loader): data loader to provide validation data.
model (model): model to evaluate the performance.
val_meter (ValMeter): meter instance to record and calculate the metrics.
cur_epoch (int): number of the current epoch of training.
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
writer (TensorboardWriter, optional): TensorboardWriter object
to writer Tensorboard log.
"""
# Evaluation mode enabled. The running stats would not be updated.
model.eval()
val_meter.iter_tic()
for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
if cfg.NUM_GPUS:
# Transferthe data to the current GPU device.
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(non_blocking=True)
else:
inputs = inputs.cuda(non_blocking=True)
if isinstance(labels, (dict,)):
labels = {k: v.cuda() for k, v in labels.items()}
else:
labels = labels.cuda()
for key, val in meta.items():
if isinstance(val, (list,)):
for i in range(len(val)):
if not isinstance(val[i], (str,)):
val[i] = val[i].cuda(non_blocking=True)
else:
meta[key] = val.cuda(non_blocking=True)
val_meter.data_toc()
with torch.cuda.amp.autocast(enabled=cfg.SOLVER.USE_MIXED_PRECISION):
preds = model(inputs)
if isinstance(labels, (dict,)) and cfg.TRAIN.DATASET == "Epickitchens":
# Compute the verb accuracies.
verb_top1_acc, verb_top5_acc = metrics.topk_accuracies(
preds[0], labels['verb'], (1, 5))
# Combine the errors across the GPUs.
if cfg.NUM_GPUS > 1:
verb_top1_acc, verb_top5_acc = du.all_reduce(
[verb_top1_acc, verb_top5_acc])
# Copy the errors from GPU to CPU (sync point).
verb_top1_acc, verb_top5_acc = verb_top1_acc.item(), verb_top5_acc.item()
# Compute the noun accuracies.
noun_top1_acc, noun_top5_acc = metrics.topk_accuracies(
preds[1], labels['noun'], (1, 5))
# Combine the errors across the GPUs.
if cfg.NUM_GPUS > 1:
noun_top1_acc, noun_top5_acc = du.all_reduce(
[noun_top1_acc, noun_top5_acc])
# Copy the errors from GPU to CPU (sync point).
noun_top1_acc, noun_top5_acc = noun_top1_acc.item(), noun_top5_acc.item()
# Compute the action accuracies.
action_top1_acc, action_top5_acc = metrics.multitask_topk_accuracies(
(preds[0], preds[1]),
(labels['verb'], labels['noun']),
(1, 5))
# Combine the errors across the GPUs.
if cfg.NUM_GPUS > 1:
action_top1_acc, action_top5_acc = du.all_reduce([action_top1_acc, action_top5_acc])
# Copy the errors from GPU to CPU (sync point).
action_top1_acc, action_top5_acc = action_top1_acc.item(), action_top5_acc.item()
val_meter.iter_toc()
# Update and log stats.
val_meter.update_stats(
(verb_top1_acc, noun_top1_acc, action_top1_acc),
(verb_top5_acc, noun_top5_acc, action_top5_acc),
inputs[0].size(0) * cfg.NUM_GPUS
)
# write to tensorboard format if available.
if writer is not None:
writer.add_scalars(
{
"Val/verb_top1_acc": verb_top1_acc,
"Val/verb_top5_acc": verb_top5_acc,
"Val/noun_top1_acc": noun_top1_acc,
"Val/noun_top5_acc": noun_top5_acc,
"Val/action_top1_acc": action_top1_acc,
"Val/action_top5_acc": action_top5_acc,
},
global_step=len(val_loader) * cur_epoch + cur_iter,
)
else:
# Compute the errors.
num_topks_correct = metrics.topks_correct(preds, labels, (1, 5))
# Combine the errors across the GPUs.
top1_err, top5_err = [
(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct
]
if cfg.NUM_GPUS > 1:
top1_err, top5_err = du.all_reduce([top1_err, top5_err])
# Copy the errors from GPU to CPU (sync point).
top1_err, top5_err = top1_err.item(), top5_err.item()
val_meter.iter_toc()
# Update and log stats.
val_meter.update_stats(
top1_err,
top5_err,
inputs[0].size(0)
* max(
cfg.NUM_GPUS, 1
),
)
# write to tensorboard format if available.
if writer is not None:
writer.add_scalars(
{"Val/Top1_err": top1_err, "Val/Top5_err": top5_err},
global_step=len(val_loader) * cur_epoch + cur_iter,
)
val_meter.update_predictions(preds, labels)
val_meter.log_iter_stats(cur_epoch, cur_iter)
val_meter.iter_tic()
# Log epoch stats.
val_meter.log_epoch_stats(cur_epoch)
# write to tensorboard format if available.
if writer is not None:
all_preds = [pred.clone().detach() for pred in val_meter.all_preds]
all_labels = [
label.clone().detach() for label in val_meter.all_labels
]
if cfg.NUM_GPUS:
all_preds = [pred.cpu() for pred in all_preds]
all_labels = [label.cpu() for label in all_labels]
writer.plot_eval(
preds=all_preds, labels=all_labels, global_step=cur_epoch
)
val_meter.reset()