in video_processing/modules/aesthetic_laion.py [0:0]
def __call__(self, images):
device = next(self.parameters()).device
inputs = self.processor(images=images, return_tensors="pt")
inputs = {k: v.to(self.dtype).to(device) for k, v in inputs.items()}
embed = self.clip(**inputs)[0]
# normalize embedding
embed = embed / torch.linalg.vector_norm(embed, dim=-1, keepdim=True)
return self.mlp(embed).squeeze(1)