in src/mlmax/preprocessing.py [0:0]
def read_data(input_data_path):
print(f"Reading input data from {input_data_path}")
df = pd.read_csv(input_data_path)
df = pd.DataFrame(data=df, columns=columns)
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df.replace(class_labels, list(range(len(class_labels))), inplace=True)
negative_examples, positive_examples = np.bincount(df[target_col])
print(
f"Data after cleaning: {df.shape}, {positive_examples} positive examples, "
f"{negative_examples} negative examples"
)
return df