in Project-BasicAlgorithm/core/data.py [0:0]
def load_data(data_path, label_column, test_size=0.25, random_state=1):
if os.path.isdir(data_path):
train_path = os.path.join(data_path, "train.csv")
test_path = os.path.join(data_path, "test.csv")
assert os.path.exists(train_path) and os.path.exists(
test_path
), PATH_ERROR_MESSAGE
print(f"load train data from {train_path}")
print(f"load test data from {test_path}")
train_x, train_y = load_csv_data(train_path, label_column)
test_x, test_y = load_csv_data(test_path, label_column)
elif data_path.endswith(".csv"):
print(f"load data from {data_path}")
print("split data to train set and test set")
train_x, train_y, test_x, test_y = load_split_csv_data(
data_path, label_column, test_size=test_size, random_state=random_state
)
else:
raise Exception(PATH_ERROR_MESSAGE)
return train_x, train_y, test_x, test_y