in main.py [0:0]
def validate_zeroshot(val_loader, model, tokenizer, args):
batch_time = AverageMeter('Time', ':6.3f')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(val_loader),
[batch_time, top1, top5],
prefix='Test: ')
# switch to evaluate mode
model.eval()
print('=> encoding captions')
cwd = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(cwd, 'templates.json')) as f:
templates = json.load(f)['imagenet']
with open(os.path.join(cwd, 'labels.json')) as f:
labels = json.load(f)['imagenet']
with torch.no_grad():
text_features = []
for l in labels:
texts = [t.format(l) for t in templates]
texts = tokenizer(texts).cuda(args.gpu, non_blocking=True)
class_embeddings = utils.get_model(model).encode_text(texts)
class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
class_embeddings = class_embeddings.mean(dim=0)
class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
text_features.append(class_embeddings)
text_features = torch.stack(text_features, dim=0)
end = time.time()
for i, (images, target) in enumerate(val_loader):
images = images.cuda(args.gpu, non_blocking=True)
target = target.cuda(args.gpu, non_blocking=True)
# encode images
image_features = utils.get_model(model).encode_image(images)
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
# cosine similarity as logits
logits_per_image = image_features @ text_features.t()
# measure accuracy and record loss
acc1, acc5 = accuracy(logits_per_image, target, topk=(1, 5))
acc1, acc5 = utils.scaled_all_reduce([acc1, acc5])
top1.update(acc1.item(), images.size(0))
top5.update(acc5.item(), images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
progress.synchronize()
print('0-shot * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))
return {'acc1': top1.avg, 'acc5': top5.avg}