def prep_product_desc()

in modules/python/src/datapreprocessing/datacleaner.py [0:0]


    def prep_product_desc(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Prepares the product description by performing NLP preprocessing using spaCy.

        Args:
            df (pd.DataFrame): The input DataFrame containing the 'description' column.

        Returns:
            pd.DataFrame: The DataFrame with the preprocessed 'description' column.
        """
        spacy.cli.download("en_core_web_sm")
        model = spacy.load("en_core_web_sm")

        def parse_nlp_description(description: str) -> str:
            if not pd.isna(description):
                try:
                    doc = model(description.lower())
                    lemmas = []
                    for token in doc:
                        if (
                            token.lemma_ not in lemmas
                            and not token.is_stop
                            and token.is_alpha
                        ):
                            lemmas.append(token.lemma_)
                    return " ".join(lemmas)
                except:
                    self.logger.error("Unable to load spacy model")

        df["description"] = df["description"].apply(parse_nlp_description)
        return df