in eval_semisup.py [0:0]
def main():
global args, best_acc
args = parser.parse_args()
init_distributed_mode(args)
fix_random_seeds(args.seed)
logger, training_stats = initialize_exp(
args, "epoch", "loss", "prec1", "prec5", "loss_val", "prec1_val", "prec5_val"
)
# build data
train_data_path = os.path.join(args.data_path, "train")
train_dataset = datasets.ImageFolder(train_data_path)
# take either 1% or 10% of images
subset_file = urllib.request.urlopen("https://raw.githubusercontent.com/google-research/simclr/master/imagenet_subsets/" + str(args.labels_perc) + "percent.txt")
list_imgs = [li.decode("utf-8").split('\n')[0] for li in subset_file]
train_dataset.samples = [(
os.path.join(train_data_path, li.split('_')[0], li),
train_dataset.class_to_idx[li.split('_')[0]]
) for li in list_imgs]
val_dataset = datasets.ImageFolder(os.path.join(args.data_path, "val"))
tr_normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.228, 0.224, 0.225]
)
train_dataset.transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
tr_normalize,
])
val_dataset.transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
tr_normalize,
])
sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(
train_dataset,
sampler=sampler,
batch_size=args.batch_size,
num_workers=args.workers,
pin_memory=True,
)
val_loader = torch.utils.data.DataLoader(
val_dataset,
batch_size=args.batch_size,
num_workers=args.workers,
pin_memory=True,
)
logger.info("Building data done with {} images loaded.".format(len(train_dataset)))
# build model
model = resnet_models.__dict__[args.arch](output_dim=1000)
# convert batch norm layers
model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
# load weights
if os.path.isfile(args.pretrained):
state_dict = torch.load(args.pretrained, map_location="cuda:" + str(args.gpu_to_work_on))
if "state_dict" in state_dict:
state_dict = state_dict["state_dict"]
# remove prefixe "module."
state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
for k, v in model.state_dict().items():
if k not in list(state_dict):
logger.info('key "{}" could not be found in provided state dict'.format(k))
elif state_dict[k].shape != v.shape:
logger.info('key "{}" is of different shape in model and provided state dict'.format(k))
state_dict[k] = v
msg = model.load_state_dict(state_dict, strict=False)
logger.info("Load pretrained model with msg: {}".format(msg))
else:
logger.info("No pretrained weights found => training from random weights")
# model to gpu
model = model.cuda()
model = nn.parallel.DistributedDataParallel(
model,
device_ids=[args.gpu_to_work_on],
find_unused_parameters=True,
)
# set optimizer
trunk_parameters = []
head_parameters = []
for name, param in model.named_parameters():
if 'head' in name:
head_parameters.append(param)
else:
trunk_parameters.append(param)
optimizer = torch.optim.SGD(
[{'params': trunk_parameters},
{'params': head_parameters, 'lr': args.lr_last_layer}],
lr=args.lr,
momentum=0.9,
weight_decay=0,
)
# set scheduler
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer, args.decay_epochs, gamma=args.gamma
)
# Optionally resume from a checkpoint
to_restore = {"epoch": 0, "best_acc": (0., 0.)}
restart_from_checkpoint(
os.path.join(args.dump_path, "checkpoint.pth.tar"),
run_variables=to_restore,
state_dict=model,
optimizer=optimizer,
scheduler=scheduler,
)
start_epoch = to_restore["epoch"]
best_acc = to_restore["best_acc"]
cudnn.benchmark = True
for epoch in range(start_epoch, args.epochs):
# train the network for one epoch
logger.info("============ Starting epoch %i ... ============" % epoch)
# set samplers
train_loader.sampler.set_epoch(epoch)
scores = train(model, optimizer, train_loader, epoch)
scores_val = validate_network(val_loader, model)
training_stats.update(scores + scores_val)
scheduler.step()
# save checkpoint
if args.rank == 0:
save_dict = {
"epoch": epoch + 1,
"state_dict": model.state_dict(),
"optimizer": optimizer.state_dict(),
"scheduler": scheduler.state_dict(),
"best_acc": best_acc,
}
torch.save(save_dict, os.path.join(args.dump_path, "checkpoint.pth.tar"))
logger.info("Fine-tuning with {}% of labels completed.\n"
"Test accuracies: top-1 {acc1:.1f}, top-5 {acc5:.1f}".format(
args.labels_perc, acc1=best_acc[0], acc5=best_acc[1]))