def _process()

in ultravox/tools/ds_tool/ds_tool.py [0:0]


    def _process(self, ds_chunk: datasets.Dataset) -> datasets.Dataset:
        ds_mapped = self.args.task.map_split(
            ds_chunk,
            self.args.num_workers,
            self.args.writer_batch_size,
            self.args.exclude_fields,
        )

        check_empty_columns = self.args.check_empty_columns
        if len(check_empty_columns) > 0:
            return ds_mapped.filter(
                lambda sample: all(
                    sample[column] is not None for column in check_empty_columns
                ),
                num_proc=self.args.num_workers,
                writer_batch_size=self.args.writer_batch_size,
            )
        else:
            return ds_mapped