in ultravox/tools/ds_tool/ds_tool.py [0:0]
def _store_sample_as_files(self, sample, temp_dir: str, exclude_fields: Set[str]):
sample_id = self.get_id(sample)
audio_path = os.path.join(temp_dir, f"{sample_id}.wav")
with open(audio_path, "wb") as f:
audio = sample[self.audio_column_name]
if audio["sampling_rate"] != self.sample_rate:
audio["array"] = librosa.resample(
audio["array"],
orig_sr=audio["sampling_rate"],
target_sr=self.sample_rate,
)
sf.write(f, audio["array"], 16000, format="WAV", subtype="PCM_16")
text_path = os.path.join(temp_dir, f"{sample_id}.txt")
text = apply_jinja_template(self.template, sample, exclude_fields)
with open(text_path, "w") as f:
f.write(text)