def cleanup()

in scripts/decontaminate.py [0:0]


    def cleanup(dataset: Dataset) -> Dataset:
        initial_size = len(dataset)
        contamination_cols = [col for col in dataset.column_names if col.startswith("contaminated_")]
        for col in contamination_cols:
            if col.startswith("contaminated_"):
                size_prior = len(dataset)
                dataset = dataset.filter(lambda x: not x[col], num_proc=8)
                if len(dataset) < size_prior:
                    print(f"Removed {size_prior - len(dataset)} samples from '{col.replace('contaminated_', '')}'")
        dataset = dataset.remove_columns(contamination_cols)
        print(f"Initial size: {initial_size}, Final size: {len(dataset)}")
        return dataset