in src/data_prep_intent.py [0:0]
def prepare_train_test_datasets(self, df):
"""Split the train test split dataframs and convert them to datasets"""
train_df, val_df = train_test_split(df, test_size=0.05, random_state=42, stratify=df['label'])
train_dataset = Dataset.from_pandas(train_df, preserve_index=False)
val_dataset = Dataset.from_pandas(val_df, preserve_index=False)
return DatasetDict({
'train': train_dataset,
'validation': val_dataset
})