in ultravox/tools/data_tool.py [0:0]
def main(args: argparse.Namespace):
data_args = datasets.VoiceDatasetArgs(
shuffle=args.shuffle,
split=args.data_split,
)
if args.seed is not None:
data_args.shuffle_seed = args.seed
data_sets = [datasets.create_dataset(ds, data_args) for ds in args.data_sets]
out_set = datasets.Range(datasets.InterleaveDataset(data_sets), args.num_samples)
for i, sample in enumerate(out_set):
print(f"--- Sample {i} ---")
messages = sample.messages
assert len(messages) >= 2, f"Bad sample (messages) {len(messages)}"
assert messages[-2]["role"] == "user", f"Bad sample (Q role): {messages}"
assert messages[-1]["role"] == "assistant", f"Bad sample (A role): {messages}"
answer = messages[-1]["content"].replace("\n", "\\n")
print(f"Q: {messages[-2]['content']} [\"{sample.audio_transcript}\"]")
print(f"A: {answer}")
if args.play:
audio = sample.audio
if args.playback_rate is not None:
audio = librosa.effects.time_stretch(audio, rate=args.playback_rate)
sd.play(audio, sample.sample_rate)
sd.wait()
if args.write:
with open(f"sample{i}.wav", "wb") as f:
f.write(datasets.audio_to_wav(sample.audio, sample.sample_rate))