maga_transformer/models/minicpmv/minicpmv.py [64:107]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    @torch.inference_mode()
    def mm_embedding(self, url: str, mm_type: MMUrlType, **kwargs):
        dtype = self._data_type
        if g_parallel_info.tp_rank > 0:
            return torch.Tensor([])
        cached_res = vit_emb_cache_.check_cache(url)
        if cached_res is None:
            cached_url_res = get_bytes_io_from_url(url)
            cached_url_res = self._mm_preprocess(cached_url_res, mm_type)
            with mm_lock:
                features = self.mm_process(cached_url_res,
                                        mm_type=mm_type,
                                        **kwargs)
            if isinstance(features, list):
                features = torch.stack(features).to(dtype).contiguous()
            vit_emb_cache_.insert_cache(url, features)
            return (features, None)
        else:
            return (cached_res, None)
        
    def _mm_preprocess(self, data, type, **kwargs):
        if type == MMUrlType.IMAGE:
            return Image.open(data).convert("RGB")
        elif type == MMUrlType.VIDEO:
            return encode_video(data)

    @torch.inference_mode()
    def mm_process(self, mm_input, **kwargs):
        mm_type = kwargs.get("mm_type")
        if mm_type == MMUrlType.DEFAULT:
            if isinstance(mm_input, list):
                return self.image_embedding(mm_input)
            else:
                return self.image_embedding([mm_input])
        elif mm_type == MMUrlType.IMAGE:
            if isinstance(mm_input, list):
                raise Exception("expect single image input, but get a list")
            return self.image_embedding([mm_input])
        elif mm_type == MMUrlType.VIDEO:
            if not isinstance(mm_input, list):
                raise Exception("expect video input, but get a single image")
            return self.image_embedding(mm_input)
        else:
            raise Exception("unknown mm url type")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


maga_transformer/models/minicpmv_embedding/minicpmv_embedding.py [122:165]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    @torch.inference_mode()
    def mm_embedding(self, url: str, mm_type: MMUrlType, **kwargs):
        dtype = self._data_type
        if g_parallel_info.tp_rank > 0:
            return torch.Tensor([])
        cached_res = vit_emb_cache_.check_cache(url)
        if cached_res is None:
            cached_url_res = get_bytes_io_from_url(url)
            cached_url_res = self._mm_preprocess(cached_url_res, mm_type)
            with mm_lock:
                features = self.mm_process(cached_url_res,
                                        mm_type=mm_type,
                                        **kwargs)
            if isinstance(features, list):
                features = torch.stack(features).to(dtype).contiguous()
            vit_emb_cache_.insert_cache(url, features)
            return (features, None)
        else:
            return (cached_res, None)
        
    def _mm_preprocess(self, data, type, **kwargs):
        if type == MMUrlType.IMAGE:
            return Image.open(data).convert("RGB")
        elif type == MMUrlType.VIDEO:
            return encode_video(data) 

    @torch.inference_mode()
    def mm_process(self, mm_input, **kwargs):
        mm_type = kwargs.get("mm_type")
        if mm_type == MMUrlType.DEFAULT:
            if isinstance(mm_input, list):
                return self.image_embedding(mm_input)
            else:
                return self.image_embedding([mm_input])
        elif mm_type == MMUrlType.IMAGE:
            if isinstance(mm_input, list):
                raise Exception("expect single image input, but get a list")
            return self.image_embedding([mm_input])
        elif mm_type == MMUrlType.VIDEO:
            if not isinstance(mm_input, list):
                raise Exception("expect video input, but get a single image")
            return self.image_embedding(mm_input)
        else:
            raise Exception("unknown mm url type")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -