def update_dataframe()

in modules/python/src/datapreprocessing/dataprep.py [0:0]


    def update_dataframe(self) -> pd.DataFrame:
        """
        Updates the DataFrame by selecting only the required columns and dropping rows with null values
        in the specified columns.

        Returns:
            pd.DataFrame: The updated Pandas DataFrame.
        """
        self.df = self.df[self.required_cols]
        self.logger.info(f"Original dataset shape: '{self.df.shape}'")
        # Drop rows with null values in specified columns
        self.df.dropna(
            subset=self.filter_null_cols,
            inplace=True,
        )
        self.logger.info(f"After dropping null values: '{self.df.shape}'")
        return self.df