in ultravox/tools/mds_tool.py [0:0]
def _convert_worker(self, task_args: _ProcessArgs) -> int:
n = 0
with streaming.MDSWriter(
out=task_args.out, columns=task_args.columns
) as writer:
for sample in self._process_batch(task_args.start_idx, task_args.end_idx):
writer.write(sample)
n += 1
if task_args.start_idx == 0 and n % 1000 == 0:
logging.info(f"Processed {n * self._args.num_groups} samples...")
return n