in ml/dataset_transformer.py [0:0]
def test_example():
example_data = {
'chosen': ['This is a good response', 'Another good one'],
'rejected': ['This is a bad response', 'Another bad one'],
'metadata': ['meta1', 'meta2'],
'timestamp': ['2024-01-01', '2024-01-02'],
'id': [1, 2]
}
df = pd.DataFrame(example_data)
transformed_df = transform_rlhf_dataset(
df,
chosen_col='chosen',
rejected_col='rejected',
drop_cols=['metadata', 'id']
)
print("Original shape:", df.shape)
print("\nTransformed shape:", transformed_df.shape)
print("\nTransformation sample:")
print(transformed_df.head())
print("\nLabel distribution:")
print(transformed_df['label'].value_counts())