def prep_cat()

in modules/python/src/datapreprocessing/datacleaner.py [0:0]


    def prep_cat(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Prepares product category information by splitting the 'product_category_tree' column into separate category levels.

        Args:
            df (pd.DataFrame): The input DataFrame containing the 'product_category_tree' column.

        Returns:
            pd.DataFrame: The DataFrame with the added category level columns.
        """
        df["product_category_tree"] = df["product_category_tree"].apply(
            lambda x: self.reformat(x)
        )
        temp_df = df["product_category_tree"].str.split(">>", expand=True)
        max_splits = temp_df.shape[1]  # Get the number of columns after splitting
        # Create column names dynamically
        column_names = [f"c{i}_name" for i in range(max_splits)]
        temp_df.columns = column_names
        for col in temp_df.columns:
            temp_df[col] = temp_df[col].apply(lambda x: x.strip() if x else x)
        # concatenating df1 and df2 along rows
        df_with_cat = pd.concat([df, temp_df], axis=1)
        df_with_cat = df_with_cat.drop("product_category_tree", axis=1)
        return df_with_cat