metric_utils.py [30:241]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class CLIP(nn.Module):

    def __init__(self,
                 device,
                 clip_name='laion/CLIP-ViT-bigG-14-laion2B-39B-b160k',
                 size=224):  #'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'):
        super().__init__()
        self.size = size
        self.device = f"cuda:{device}"

        clip_name = clip_name

        self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
            clip_name)
        self.clip_model = CLIPModel.from_pretrained(clip_name).to(self.device)
        self.tokenizer = CLIPTokenizer.from_pretrained(
            'openai/clip-vit-base-patch32')

        self.normalize = transforms.Normalize(
            mean=self.feature_extractor.image_mean,
            std=self.feature_extractor.image_std)

        self.resize = transforms.Resize(224)
        self.to_tensor = transforms.ToTensor()

        # image augmentation
        self.aug = T.Compose([
            T.Resize((224, 224)),
            T.Normalize((0.48145466, 0.4578275, 0.40821073),
                        (0.26862954, 0.26130258, 0.27577711)),
        ])

    # * recommend to use this function for evaluation
    @torch.no_grad()
    def score_gt(self, ref_img_path, novel_views):
        # assert len(novel_views) == 100
        clip_scores = []
        for novel in novel_views:
            clip_scores.append(self.score_from_path(ref_img_path, [novel]))
        return np.mean(clip_scores)

    # * recommend to use this function for evaluation
    # def score_gt(self, ref_paths, novel_paths):
    #     clip_scores = []
    #     for img1_path, img2_path in zip(ref_paths, novel_paths):
    #         clip_scores.append(self.score_from_path(img1_path, img2_path))

    #     return np.mean(clip_scores)

    def similarity(self, image1_features: torch.Tensor,
                   image2_features: torch.Tensor) -> float:
        with torch.no_grad(), torch.cuda.amp.autocast():
            y = image1_features.T.view(image1_features.T.shape[1],
                                       image1_features.T.shape[0])
            similarity = torch.matmul(y, image2_features.T)
            # print(similarity)
            return similarity[0][0].item()

    def get_img_embeds(self, img):
        if img.shape[0] == 4:
            img = img[:3, :, :]

        img = self.aug(img).to(self.device)
        img = img.unsqueeze(0)  # b,c,h,w

        # plt.imshow(img.cpu().squeeze(0).permute(1, 2, 0).numpy())
        # plt.show()
        # print(img)

        image_z = self.clip_model.get_image_features(img)
        image_z = image_z / image_z.norm(dim=-1,
                                         keepdim=True)  # normalize features
        return image_z

    def score_from_feature(self, img1, img2):
        img1_feature, img2_feature = self.get_img_embeds(
            img1), self.get_img_embeds(img2)
        # for debug
        return self.similarity(img1_feature, img2_feature)

    def read_img_list(self, img_list):
        size = self.size
        images = []
        # white_background = np.ones((size, size, 3), dtype=np.uint8) * 255

        for img_path in img_list:
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            # print(img_path)
            if img.shape[2] == 4:  # Handle BGRA images
                alpha = img[:, :, 3]  # Extract alpha channel
                img = cv2.cvtColor(img,cv2.COLOR_BGRA2RGB)  # Convert BGRA to BGR
                img[np.where(alpha == 0)] = [
                    255, 255, 255
                ]  # Set transparent pixels to white
            else:  # Handle other image formats like JPG and PNG
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)

            # plt.imshow(img)
            # plt.show()

            images.append(img)

        images = np.stack(images, axis=0)
        # images[np.where(images == 0)] = 255  # Set black pixels to white
        # images = np.where(images == 0, white_background, images)  # Set transparent pixels to white
        # images = images.astype(np.float32)

        return images

    def score_from_path(self, img1_path, img2_path):
        img1, img2 = self.read_img_list(img1_path), self.read_img_list(img2_path)
        img1 = np.squeeze(img1)
        img2 = np.squeeze(img2)
        # plt.imshow(img1)
        # plt.show()
        # plt.imshow(img2)
        # plt.show()

        img1, img2 = self.to_tensor(img1), self.to_tensor(img2)
        # print("img1 to tensor ",img1)
        return self.score_from_feature(img1, img2)


def numpy_to_torch(images):
    images = images * 2.0 - 1.0
    images = torch.from_numpy(images.transpose((0, 3, 1, 2))).float()
    return images.cuda()


class LPIPSMeter:

    def __init__(self,
                 net='alex',
                 device=None,
                 size=224):  # or we can use 'alex', 'vgg' as network
        self.size = size
        self.net = net
        self.results = []
        self.device = device if device is not None else torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.fn = lpips.LPIPS(net=net).eval().to(self.device)

    def measure(self):
        return np.mean(self.results)

    def report(self):
        return f'LPIPS ({self.net}) = {self.measure():.6f}'

    def read_img_list(self, img_list):
        size = self.size
        images = []
        white_background = np.ones((size, size, 3), dtype=np.uint8) * 255

        for img_path in img_list:
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)

            if img.shape[2] == 4:  # Handle BGRA images
                alpha = img[:, :, 3]  # Extract alpha channel
                img = cv2.cvtColor(img,
                                   cv2.COLOR_BGRA2BGR)  # Convert BGRA to BGR

                img = cv2.cvtColor(img,
                                   cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                img[np.where(alpha == 0)] = [
                    255, 255, 255
                ]  # Set transparent pixels to white
            else:  # Handle other image formats like JPG and PNG
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
            images.append(img)

        images = np.stack(images, axis=0)
        # images[np.where(images == 0)] = 255  # Set black pixels to white
        # images = np.where(images == 0, white_background, images)  # Set transparent pixels to white
        images = images.astype(np.float32) / 255.0

        return images

    # * recommend to use this function for evaluation
    @torch.no_grad()
    def score_gt(self, ref_paths, novel_paths):
        self.results = []
        for path0, path1 in zip(ref_paths, novel_paths):
            # Load images
            # img0 = lpips.im2tensor(lpips.load_image(path0)).cuda() # RGB image from [-1,1]
            # img1 = lpips.im2tensor(lpips.load_image(path1)).cuda()
            img0, img1 = self.read_img_list([path0]), self.read_img_list(
                [path1])
            img0, img1 = numpy_to_torch(img0), numpy_to_torch(img1)
            # print(img0.shape,img1.shape)
            img0 = F.interpolate(img0,
                                    size=(self.size, self.size),
                                    mode='area')
            img1 = F.interpolate(img1,
                                    size=(self.size, self.size),
                                    mode='area')

            # for debug vis
            # plt.imshow(img0.cpu().squeeze(0).permute(1, 2, 0).numpy())
            # plt.show()
            # plt.imshow(img1.cpu().squeeze(0).permute(1, 2, 0).numpy())
            # plt.show()
            # equivalent to cv2.resize(rgba, (w, h), interpolation=cv2.INTER_AREA

            # print(img0.shape,img1.shape)

            self.results.append(self.fn.forward(img0, img1).cpu().numpy())

        return self.measure()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


threestudio/scripts/metric_utils.py [27:238]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class CLIP(nn.Module):

    def __init__(self,
                 device,
                 clip_name='laion/CLIP-ViT-bigG-14-laion2B-39B-b160k',
                 size=224):  #'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'):
        super().__init__()
        self.size = size
        self.device = f"cuda:{device}"

        clip_name = clip_name

        self.feature_extractor = CLIPFeatureExtractor.from_pretrained(
            clip_name)
        self.clip_model = CLIPModel.from_pretrained(clip_name).to(self.device)
        self.tokenizer = CLIPTokenizer.from_pretrained(
            'openai/clip-vit-base-patch32')

        self.normalize = transforms.Normalize(
            mean=self.feature_extractor.image_mean,
            std=self.feature_extractor.image_std)

        self.resize = transforms.Resize(224)
        self.to_tensor = transforms.ToTensor()

        # image augmentation
        self.aug = T.Compose([
            T.Resize((224, 224)),
            T.Normalize((0.48145466, 0.4578275, 0.40821073),
                        (0.26862954, 0.26130258, 0.27577711)),
        ])

    # * recommend to use this function for evaluation
    @torch.no_grad()
    def score_gt(self, ref_img_path, novel_views):
        # assert len(novel_views) == 100
        clip_scores = []
        for novel in novel_views:
            clip_scores.append(self.score_from_path(ref_img_path, [novel]))
        return np.mean(clip_scores)

    # * recommend to use this function for evaluation
    # def score_gt(self, ref_paths, novel_paths):
    #     clip_scores = []
    #     for img1_path, img2_path in zip(ref_paths, novel_paths):
    #         clip_scores.append(self.score_from_path(img1_path, img2_path))

    #     return np.mean(clip_scores)

    def similarity(self, image1_features: torch.Tensor,
                   image2_features: torch.Tensor) -> float:
        with torch.no_grad(), torch.cuda.amp.autocast():
            y = image1_features.T.view(image1_features.T.shape[1],
                                       image1_features.T.shape[0])
            similarity = torch.matmul(y, image2_features.T)
            # print(similarity)
            return similarity[0][0].item()

    def get_img_embeds(self, img):
        if img.shape[0] == 4:
            img = img[:3, :, :]

        img = self.aug(img).to(self.device)
        img = img.unsqueeze(0)  # b,c,h,w

        # plt.imshow(img.cpu().squeeze(0).permute(1, 2, 0).numpy())
        # plt.show()
        # print(img)

        image_z = self.clip_model.get_image_features(img)
        image_z = image_z / image_z.norm(dim=-1,
                                         keepdim=True)  # normalize features
        return image_z

    def score_from_feature(self, img1, img2):
        img1_feature, img2_feature = self.get_img_embeds(
            img1), self.get_img_embeds(img2)
        # for debug
        return self.similarity(img1_feature, img2_feature)

    def read_img_list(self, img_list):
        size = self.size
        images = []
        # white_background = np.ones((size, size, 3), dtype=np.uint8) * 255

        for img_path in img_list:
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            # print(img_path)
            if img.shape[2] == 4:  # Handle BGRA images
                alpha = img[:, :, 3]  # Extract alpha channel
                img = cv2.cvtColor(img,cv2.COLOR_BGRA2RGB)  # Convert BGRA to BGR
                img[np.where(alpha == 0)] = [
                    255, 255, 255
                ]  # Set transparent pixels to white
            else:  # Handle other image formats like JPG and PNG
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)

            # plt.imshow(img)
            # plt.show()

            images.append(img)

        images = np.stack(images, axis=0)
        # images[np.where(images == 0)] = 255  # Set black pixels to white
        # images = np.where(images == 0, white_background, images)  # Set transparent pixels to white
        # images = images.astype(np.float32)

        return images

    def score_from_path(self, img1_path, img2_path):
        img1, img2 = self.read_img_list(img1_path), self.read_img_list(img2_path)
        img1 = np.squeeze(img1)
        img2 = np.squeeze(img2)
        # plt.imshow(img1)
        # plt.show()
        # plt.imshow(img2)
        # plt.show()

        img1, img2 = self.to_tensor(img1), self.to_tensor(img2)
        # print("img1 to tensor ",img1)
        return self.score_from_feature(img1, img2)


def numpy_to_torch(images):
    images = images * 2.0 - 1.0
    images = torch.from_numpy(images.transpose((0, 3, 1, 2))).float()
    return images.cuda()


class LPIPSMeter:

    def __init__(self,
                 net='alex',
                 device=None,
                 size=224):  # or we can use 'alex', 'vgg' as network
        self.size = size
        self.net = net
        self.results = []
        self.device = device if device is not None else torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.fn = lpips.LPIPS(net=net).eval().to(self.device)

    def measure(self):
        return np.mean(self.results)

    def report(self):
        return f'LPIPS ({self.net}) = {self.measure():.6f}'

    def read_img_list(self, img_list):
        size = self.size
        images = []
        white_background = np.ones((size, size, 3), dtype=np.uint8) * 255

        for img_path in img_list:
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)

            if img.shape[2] == 4:  # Handle BGRA images
                alpha = img[:, :, 3]  # Extract alpha channel
                img = cv2.cvtColor(img,
                                   cv2.COLOR_BGRA2BGR)  # Convert BGRA to BGR

                img = cv2.cvtColor(img,
                                   cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
                img[np.where(alpha == 0)] = [
                    255, 255, 255
                ]  # Set transparent pixels to white
            else:  # Handle other image formats like JPG and PNG
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
            images.append(img)

        images = np.stack(images, axis=0)
        # images[np.where(images == 0)] = 255  # Set black pixels to white
        # images = np.where(images == 0, white_background, images)  # Set transparent pixels to white
        images = images.astype(np.float32) / 255.0

        return images

    # * recommend to use this function for evaluation
    @torch.no_grad()
    def score_gt(self, ref_paths, novel_paths):
        self.results = []
        for path0, path1 in zip(ref_paths, novel_paths):
            # Load images
            # img0 = lpips.im2tensor(lpips.load_image(path0)).cuda() # RGB image from [-1,1]
            # img1 = lpips.im2tensor(lpips.load_image(path1)).cuda()
            img0, img1 = self.read_img_list([path0]), self.read_img_list(
                [path1])
            img0, img1 = numpy_to_torch(img0), numpy_to_torch(img1)
            # print(img0.shape,img1.shape)
            img0 = F.interpolate(img0,
                                    size=(self.size, self.size),
                                    mode='area')
            img1 = F.interpolate(img1,
                                    size=(self.size, self.size),
                                    mode='area')

            # for debug vis
            # plt.imshow(img0.cpu().squeeze(0).permute(1, 2, 0).numpy())
            # plt.show()
            # plt.imshow(img1.cpu().squeeze(0).permute(1, 2, 0).numpy())
            # plt.show()
            # equivalent to cv2.resize(rgba, (w, h), interpolation=cv2.INTER_AREA

            # print(img0.shape,img1.shape)

            self.results.append(self.fn.forward(img0, img1).cpu().numpy())

        return self.measure()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -