in ultravox/tools/ds_tool/ds_tool.py [0:0]
def _upload(self, ds_chunk_processed: datasets.Dataset, data_dir: str, split_name):
print(f"Uploading chunk to hub: {data_dir}")
ds_split_chunked: chunked_dataset.ChunkedDataset = (
chunked_dataset.convert_to_chunked_dataset(ds_chunk_processed)
)
hub_args: Dict[str, Any] = {
"config_name": self.args.upload_subset,
"token": self.args.token or os.environ.get("HF_TOKEN"),
"private": self.args.private,
"data_dir": data_dir,
"num_shards": self.args.num_shards,
"split": split_name,
}
assert isinstance(self.args.upload_name, str)
ds_split_chunked.push_to_hub(self.args.upload_name, **hub_args)