def load_data()

in Project-AutoML/automl/data.py [0:0]


def load_data(data_path, label_column, test_size=0.25, random_state=1):
    if os.path.isdir(data_path):
        train_path = os.path.join(data_path, "train.csv")
        test_path = os.path.join(data_path, "test.csv")
        assert os.path.exists(train_path) and os.path.exists(
            test_path
        ), PATH_ERROR_MESSAGE

        logger.info(f"load train data from {train_path}")
        logger.info(f"load test data from {test_path}")
        train_x, train_y = load_csv_data(train_path, label_column)
        test_x, test_y = load_csv_data(test_path, label_column)

    elif data_path.endswith(".csv"):
        logger.info(f"load data from {data_path}")
        logger.info("split data to train set and test set")
        train_x, train_y, test_x, test_y = load_split_csv_data(
            data_path, label_column, test_size=test_size, random_state=random_state
        )

    else:
        raise Exception(PATH_ERROR_MESSAGE)

    return train_x, train_y, test_x, test_y