def read_data()

in src/mlmax/preprocessing.py [0:0]

13 lines of code
1 McCabe index (conditional complexity)


def read_data(input_data_path):
    print(f"Reading input data from {input_data_path}")
    df = pd.read_csv(input_data_path)
    df = pd.DataFrame(data=df, columns=columns)
    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)
    df.replace(class_labels, list(range(len(class_labels))), inplace=True)
    negative_examples, positive_examples = np.bincount(df[target_col])
    print(
        f"Data after cleaning: {df.shape}, {positive_examples} positive examples, "
        f"{negative_examples} negative examples"
    )
    return df