in src/mlmax/monitoring.py [0:0]
def read_data(input_data_path):
logger.info(f"Reading input data from {input_data_path}")
df = pd.read_csv(input_data_path)
df = pd.DataFrame(data=df, columns=columns)
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df.replace(class_labels, list(range(len(class_labels))), inplace=True)
negative_examples, positive_examples = np.bincount(df[target_col])
logger.info(
f"Data after cleaning: {df.shape}, {positive_examples} positive examples, "
f"{negative_examples} negative examples"
)
return df