in main/train.py [0:0]
def main():
# argument parse and create log
args = parse_args()
cfg.set_args(args.gpu_ids, args.continue_train)
cudnn.benchmark = True
trainer = Trainer()
trainer._make_batch_generator()
trainer._make_model()
# train
for epoch in range(trainer.start_epoch, cfg.end_epoch):
trainer.set_lr(epoch)
trainer.tot_timer.tic()
trainer.read_timer.tic()
for itr, (inputs, targets, meta_info) in enumerate(trainer.batch_generator):
trainer.read_timer.toc()
trainer.gpu_timer.tic()
# forward
trainer.optimizer.zero_grad()
loss = trainer.model(inputs, targets, meta_info, 'train')
loss = {k:loss[k].mean() for k in loss}
# backward
sum(loss[k] for k in loss).backward()
trainer.optimizer.step()
trainer.gpu_timer.toc()
screen = [
'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
'lr: %g' % (trainer.get_lr()),
'speed: %.2f(%.2fs r%.2f)s/itr' % (
trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
'%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
]
screen += ['%s: %.4f' % ('loss_' + k, v.detach()) for k,v in loss.items()]
trainer.logger.info(' '.join(screen))
trainer.tot_timer.toc()
trainer.tot_timer.tic()
trainer.read_timer.tic()
# save model
trainer.save_model({
'epoch': epoch,
'network': trainer.model.state_dict(),
'optimizer': trainer.optimizer.state_dict(),
}, epoch)