def drop_duplicates_in_split()

in distilvit/curate_gpt.py [0:0]


def drop_duplicates_in_split(split):
    df = pd.DataFrame(split)
    df_selected = (
        df[["image_id", "image_path", "coco_url"]]
        .drop_duplicates()
        .reset_index(drop=True)
    )
    return Dataset.from_pandas(df_selected)