def run_from_mlflow()

in sdk/python/foundation-models/healthcare-ai/medimageinsight/classification_demo/MedImageInsight.py [0:0]
47 lines of code
18 McCabe index (conditional complexity)

    def run_from_mlflow(self, image=None, text=None, params=None):
        """
        Run inference with the MLflow model.

        Parameters:
        - image (str): The path to the image data.
        - text (str): The path to the text data.
        - params (dict): Additional parameters for prediction.
            - image_standardization_jpeg_compression_ratio (int): The JPEG compression ratio for the model input, default: 75.
            - image_standardization_image_size (int): The image size for MedImageInsight model input, default: 512.

        Returns:
        - embeddings_dict (dict): A dictionary where each key is the name,
        and the value is another dictionary containing 'image_feature' and/or 'text_feature'.
        """

        embeddings_dict = {}
        if params is None:
            params = {}

        data_dict = {}

        # Collect image data into a dictionary
        if image is not None:
            # Assuming get_files_path returns a dictionary {name: {'file': image_data, 'index': index}}
            images_data = get_files_path(image)
            for name, data in images_data.items():
                data_dict.setdefault(name, {})["image"] = data["file"]

        # Collect text data into a dictionary
        if text is not None:
            # Assuming get_text returns a dictionary {name: {'text': text_data, 'index': index}}
            texts_data = get_text(text)
            for name, data in texts_data.items():
                data_dict.setdefault(name, {})["text"] = data["text"]

        # Ensure that image and text names match if both are provided
        if image is not None and text is not None:
            assert set(images_data.keys()) == set(
                texts_data.keys()
            ), "Image and text names do not match"
            print("--------Start Generating Image and Text Features--------")
        elif image is not None:
            print("--------Start Generating Image Features--------")
        elif text is not None:
            print("--------Start Generating Text Features--------")
        else:
            raise ValueError("At least one of 'image' or 'text' must be provided.")

        # Process each item in data_dict
        for name, data in tqdm(data_dict.items(), total=len(data_dict)):
            df = pd.DataFrame(
                {"image": [data.get("image", "")], "text": [data.get("text", "")]}
            )
            result = self.mlflow_model.predict(df, params=params)

            embeddings_dict[name] = {}
            if "image_features" in result:
                embeddings_dict[name]["image_feature"] = np.array(
                    result["image_features"][0]
                )
            if "text_features" in result:
                embeddings_dict[name]["text_feature"] = np.array(
                    result["text_features"][0]
                )

        if "scaling_factor" in result:
            scaling_factor = np.array(result["scaling_factor"][0])
        else:
            scaling_factor = None

        if image is not None:
            print("--------Finished All Image Features Generation!!--------")
        if text is not None:
            print("--------Finished All Text Features Prediction!!--------")

        return embeddings_dict, scaling_factor