in ultravox/data/datasets.py [0:0]
def _get_sample(self, row) -> Optional[data_sample.VoiceSample]:
assert self._config.user_template is not None
assert self._config.user_template_args is not None
assert self._config.assistant_template is not None
assert self._config.transcript_template is not None
try:
user_content = jinja2.Template(
self._config.user_template, undefined=jinja2.StrictUndefined
).render(
**row,
text_proc=text_proc,
**self._config.user_template_args,
)
assistant_content = jinja2.Template(
self._config.assistant_template, undefined=jinja2.StrictUndefined
).render(**row, text_proc=text_proc)
transcript = jinja2.Template(
self._config.transcript_template, undefined=jinja2.StrictUndefined
).render(**row, text_proc=text_proc)
except jinja2.TemplateError as e:
print(f"Error rendering template: {e}")
print(f"user_template: {self._config.user_template}")
print(f"assistant_template: {self._config.assistant_template}")
print(f"transcript_template: {self._config.transcript_template}")
print(f"sample keys: {list(row.keys())}")
raise ValueError(
"Template rendering failed. Make sure all keys in the template exist in the sample."
) from e
if not self._args.include_audio:
user_content = user_content.replace(
types.AUDIO_PLACEHOLDER, f'"{transcript}"'
)
messages = _get_messages(user_content, assistant_content)
audio = self._get_audio(row, self._config.audio_field)
return self._make_sample(messages, audio, audio_transcript=transcript)