def process_data()

in quickstarts/microsoft.machinelearningservices/machine-learning-job-create-sweep-job/scripts/main.py [0:0]


def process_data(df):
    # split dataframe into X and y
    X = df.drop(["species"], axis=1)
    y = df["species"]

    # encode label
    enc = LabelEncoder()
    y = enc.fit_transform(y)

    # train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # return splits and encoder
    return X_train, X_test, y_train, y_test, enc