in rmac_features.py [0:0]
def get_rmac_descriptors(cnn, args, video_path, aggregated=True, pca=None):
image_transforms = transforms.Compose(
[
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
print("Create video dataset")
dataset = VideoDataset(video_path, image_transforms, args.fps, args.image_area)
print("Extract features")
dataloader = DataLoader(dataset, batch_size=args.b_s, num_workers=args.workers)
devices_ids = list(range(torch.cuda.device_count()))
if devices_ids:
print(f"Use devices: {devices_ids}")
parallel_cnn = nn.DataParallel(cnn, device_ids=devices_ids)
else:
parallel_cnn = cnn
all_rmac_descriptors = []
dataloader_iter = iter(dataloader)
with torch.no_grad():
for it, x_images in enumerate(dataloader_iter):
images = x_images.float().to(args.device)
features = parallel_cnn(images)
nc = features.size()[1]
args.pca_dimensions = nc
rmac_regions = get_rmac_region_coordinates(
features.size()[2], features.size()[3], args.rmac_levels
).astype(np.int)
rmac_descriptors = []
for region in rmac_regions:
desc = torch.max(
torch.max(
features[
:,
:,
region[1] : (region[3] + region[1]),
region[0] : (region[2] + region[0]),
],
2,
keepdim=True,
)[0],
3,
keepdim=True,
)[0]
rmac_descriptors.append(desc.view(-1, 1, nc))
rmac_descriptors = torch.cat(rmac_descriptors, 1)
nr = rmac_descriptors.size()[1]
# L2-norm
rmac_descriptors = rmac_descriptors.view(-1, nc)
rmac_descriptors = rmac_descriptors / torch.sqrt(
torch.sum(rmac_descriptors ** 2, dim=1, keepdim=True)
)
if args.regional_descriptors:
rmac_descriptors = rmac_descriptors.view(-1, nr, nc)
if not aggregated:
rmac_descriptors = rmac_descriptors.cpu().data.numpy()
all_rmac_descriptors.append(rmac_descriptors)
else:
# PCA whitening
rmac_descriptors = pca.apply(rmac_descriptors)
# Sum aggregation and L2-normalization
rmac_descriptors = torch.sum(
rmac_descriptors.view(-1, nr, args.pca_dimensions), 1
)
rmac_descriptors = rmac_descriptors / torch.sqrt(
torch.sum(rmac_descriptors ** 2, dim=1, keepdim=True)
)
rmac_descriptors = rmac_descriptors.detach().cpu().numpy()
all_rmac_descriptors.append(rmac_descriptors)
return np.concatenate(all_rmac_descriptors)