in modules/python/src/datapreprocessing/dataprep.py [0:0]
def update_dataframe(self) -> pd.DataFrame:
"""
Updates the DataFrame by selecting only the required columns and dropping rows with null values
in the specified columns.
Returns:
pd.DataFrame: The updated Pandas DataFrame.
"""
self.df = self.df[self.required_cols]
self.logger.info(f"Original dataset shape: '{self.df.shape}'")
# Drop rows with null values in specified columns
self.df.dropna(
subset=self.filter_null_cols,
inplace=True,
)
self.logger.info(f"After dropping null values: '{self.df.shape}'")
return self.df