in evaluate.py [0:0]
def main_worker(gpu, args):
args.rank += gpu
torch.distributed.init_process_group(
backend='nccl', init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
if args.rank == 0:
args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
print(' '.join(sys.argv))
print(' '.join(sys.argv), file=stats_file)
torch.cuda.set_device(gpu)
torch.backends.cudnn.benchmark = True
model = models.resnet50().cuda(gpu)
state_dict = torch.load(args.pretrained, map_location='cpu')
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
assert missing_keys == ['fc.weight', 'fc.bias'] and unexpected_keys == []
model.fc.weight.data.normal_(mean=0.0, std=0.01)
model.fc.bias.data.zero_()
if args.weights == 'freeze':
model.requires_grad_(False)
model.fc.requires_grad_(True)
classifier_parameters, model_parameters = [], []
for name, param in model.named_parameters():
if name in {'fc.weight', 'fc.bias'}:
classifier_parameters.append(param)
else:
model_parameters.append(param)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu])
criterion = nn.CrossEntropyLoss().cuda(gpu)
param_groups = [dict(params=classifier_parameters, lr=args.lr_classifier)]
if args.weights == 'finetune':
param_groups.append(dict(params=model_parameters, lr=args.lr_backbone))
optimizer = optim.SGD(param_groups, 0, momentum=0.9, weight_decay=args.weight_decay)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs)
# automatically resume from checkpoint if it exists
if (args.checkpoint_dir / 'checkpoint.pth').is_file():
ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
map_location='cpu')
start_epoch = ckpt['epoch']
best_acc = ckpt['best_acc']
model.load_state_dict(ckpt['model'])
optimizer.load_state_dict(ckpt['optimizer'])
scheduler.load_state_dict(ckpt['scheduler'])
else:
start_epoch = 0
best_acc = argparse.Namespace(top1=0, top5=0)
# Data loading code
traindir = args.data / 'train'
valdir = args.data / 'val'
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(traindir, transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
]))
if args.train_percent in {1, 10}:
train_dataset.samples = []
for fname in args.train_files:
fname = fname.decode().strip()
cls = fname.split('_')[0]
train_dataset.samples.append(
(traindir / cls / fname, train_dataset.class_to_idx[cls]))
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
kwargs = dict(batch_size=args.batch_size // args.world_size, num_workers=args.workers, pin_memory=True)
train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, **kwargs)
val_loader = torch.utils.data.DataLoader(val_dataset, **kwargs)
start_time = time.time()
for epoch in range(start_epoch, args.epochs):
# train
if args.weights == 'finetune':
model.train()
elif args.weights == 'freeze':
model.eval()
else:
assert False
train_sampler.set_epoch(epoch)
for step, (images, target) in enumerate(train_loader, start=epoch * len(train_loader)):
output = model(images.cuda(gpu, non_blocking=True))
loss = criterion(output, target.cuda(gpu, non_blocking=True))
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step % args.print_freq == 0:
torch.distributed.reduce(loss.div_(args.world_size), 0)
if args.rank == 0:
pg = optimizer.param_groups
lr_classifier = pg[0]['lr']
lr_backbone = pg[1]['lr'] if len(pg) == 2 else 0
stats = dict(epoch=epoch, step=step, lr_backbone=lr_backbone,
lr_classifier=lr_classifier, loss=loss.item(),
time=int(time.time() - start_time))
print(json.dumps(stats))
print(json.dumps(stats), file=stats_file)
# evaluate
model.eval()
if args.rank == 0:
top1 = AverageMeter('Acc@1')
top5 = AverageMeter('Acc@5')
with torch.no_grad():
for images, target in val_loader:
output = model(images.cuda(gpu, non_blocking=True))
acc1, acc5 = accuracy(output, target.cuda(gpu, non_blocking=True), topk=(1, 5))
top1.update(acc1[0].item(), images.size(0))
top5.update(acc5[0].item(), images.size(0))
best_acc.top1 = max(best_acc.top1, top1.avg)
best_acc.top5 = max(best_acc.top5, top5.avg)
stats = dict(epoch=epoch, acc1=top1.avg, acc5=top5.avg, best_acc1=best_acc.top1, best_acc5=best_acc.top5)
print(json.dumps(stats))
print(json.dumps(stats), file=stats_file)
# sanity check
if args.weights == 'freeze':
reference_state_dict = torch.load(args.pretrained, map_location='cpu')
model_state_dict = model.module.state_dict()
for k in reference_state_dict:
assert torch.equal(model_state_dict[k].cpu(), reference_state_dict[k]), k
scheduler.step()
if args.rank == 0:
state = dict(
epoch=epoch + 1, best_acc=best_acc, model=model.state_dict(),
optimizer=optimizer.state_dict(), scheduler=scheduler.state_dict())
torch.save(state, args.checkpoint_dir / 'checkpoint.pth')