in tools/kd/train_net.py [0:0]
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None):
"""
Evaluate the model on the val set.
Args:
val_loader (loader): data loader to provide validation data.
model (model): model to evaluate the performance.
val_meter (ValMeter): meter instance to record and calculate the metrics.
cur_epoch (int): number of the current epoch of training.
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
writer (TensorboardWriter, optional): TensorboardWriter object
to writer Tensorboard log.
"""
# Evaluation mode enabled. The running stats would not be updated.
model.eval()
val_meter.iter_tic()
for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
# Transferthe data to the current GPU device.
if isinstance(inputs, (list,)):
for i in range(len(inputs)):
inputs[i] = inputs[i].cuda(non_blocking=True)
else:
inputs = inputs.cuda(non_blocking=True)
labels = labels.cuda()
for key, val in meta.items():
if isinstance(val, (list,)):
for i in range(len(val)):
val[i] = val[i].cuda(non_blocking=True)
else:
meta[key] = val.cuda(non_blocking=True)
if cfg.DETECTION.ENABLE:
# Compute the predictions.
preds = model(inputs, meta["boxes"])
preds = preds.cpu()
ori_boxes = meta["ori_boxes"].cpu()
metadata = meta["metadata"].cpu()
if cfg.NUM_GPUS > 1:
preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0)
metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)
val_meter.iter_toc()
# Update and log stats.
val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu())
else:
preds, _ = model(inputs)
preds = preds[0]
if cfg.DATA.MULTI_LABEL:
if cfg.NUM_GPUS > 1:
preds, labels = du.all_gather([preds, labels])
else:
# Compute the errors.
ks = (1, 5) if cfg.MODEL.NUM_CLASSES >= 5 else (1, 1)
num_topks_correct = metrics.topks_correct(preds, labels, ks)
# Combine the errors across the GPUs.
top1_err, top5_err = [
(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct
]
if cfg.NUM_GPUS > 1:
top1_err, top5_err = du.all_reduce([top1_err, top5_err])
# Copy the errors from GPU to CPU (sync point).
top1_err, top5_err = top1_err.item(), top5_err.item()
val_meter.iter_toc()
# Update and log stats.
val_meter.update_stats(
top1_err, top5_err, inputs[0].size(0) * cfg.NUM_GPUS
)
# write to tensorboard format if available.
if writer is not None:
writer.add_scalars(
{"Val/Top1_err": top1_err, "Val/Top5_err": top5_err},
global_step=len(val_loader) * cur_epoch + cur_iter,
)
val_meter.update_predictions(preds, labels)
val_meter.log_iter_stats(cur_epoch, cur_iter)
val_meter.iter_tic()
# Log epoch stats.
val_meter.log_epoch_stats(cur_epoch)
# write to tensorboard format if available.
if writer is not None:
if cfg.DETECTION.ENABLE:
writer.add_scalars(
{"Val/mAP": val_meter.full_map}, global_step=cur_epoch
)
all_preds_cpu = [
pred.clone().detach().cpu() for pred in val_meter.all_preds
]
all_labels_cpu = [
label.clone().detach().cpu() for label in val_meter.all_labels
]
writer.plot_eval(
preds=all_preds_cpu, labels=all_labels_cpu, global_step=cur_epoch
)
val_meter.reset()