def _retrieve_timestamps()

in ultravox/tools/ds_tool/ds_tool.py [0:0]


    def _retrieve_timestamps(self, sample, temp_dir: str):
        # find the timestamps for the audio and populate the timestamps column
        sample_id = self.get_id(sample)
        text_path = os.path.join(temp_dir, f"{sample_id}.TextGrid")
        if not os.path.exists(text_path):
            sample[self.timestamp_column_name] = None
            return sample

        tg = textgrid.openTextgrid(text_path, False)
        timestamps = tg.getTier("words").entries
        sample[self.timestamp_column_name] = [
            {"start": entry.start, "end": entry.end, "text": entry.label}
            for entry in timestamps
        ]
        return sample