in ultravox/tools/ds_tool/ds_tool.py [0:0]
def _retrieve_timestamps(self, sample, temp_dir: str):
# find the timestamps for the audio and populate the timestamps column
sample_id = self.get_id(sample)
text_path = os.path.join(temp_dir, f"{sample_id}.TextGrid")
if not os.path.exists(text_path):
sample[self.timestamp_column_name] = None
return sample
tg = textgrid.openTextgrid(text_path, False)
timestamps = tg.getTier("words").entries
sample[self.timestamp_column_name] = [
{"start": entry.start, "end": entry.end, "text": entry.label}
for entry in timestamps
]
return sample