in main_moco.py [0:0]
def train(train_loader, model, optimizer, scaler, summary_writer, epoch, args):
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
learning_rates = AverageMeter('LR', ':.4e')
losses = AverageMeter('Loss', ':.4e')
progress = ProgressMeter(
len(train_loader),
[batch_time, data_time, learning_rates, losses],
prefix="Epoch: [{}]".format(epoch))
# switch to train mode
model.train()
end = time.time()
iters_per_epoch = len(train_loader)
moco_m = args.moco_m
for i, (images, _) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
# adjust learning rate and momentum coefficient per iteration
lr = adjust_learning_rate(optimizer, epoch + i / iters_per_epoch, args)
learning_rates.update(lr)
if args.moco_m_cos:
moco_m = adjust_moco_momentum(epoch + i / iters_per_epoch, args)
if args.gpu is not None:
images[0] = images[0].cuda(args.gpu, non_blocking=True)
images[1] = images[1].cuda(args.gpu, non_blocking=True)
# compute output
with torch.cuda.amp.autocast(True):
loss = model(images[0], images[1], moco_m)
losses.update(loss.item(), images[0].size(0))
if args.rank == 0:
summary_writer.add_scalar("loss", loss.item(), epoch * iters_per_epoch + i)
# compute gradient and do SGD step
optimizer.zero_grad()
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)