in distilvit/curate_gpt.py [0:0]
def drop_duplicates_in_split(split):
df = pd.DataFrame(split)
df_selected = (
df[["image_id", "image_path", "coco_url"]]
.drop_duplicates()
.reset_index(drop=True)
)
return Dataset.from_pandas(df_selected)