in ultravox/evaluation/string_metrics.py [0:0]
def main():
parser = argparse.ArgumentParser(
description="Evaluate JSON files using WER and BLEU."
)
parser.add_argument("input_file", type=str, help="Path to the input JSON file.")
parser.add_argument(
"--metric",
type=str,
choices=["wer", "bleu"],
required=True,
help="Metric to compute.",
)
parser.add_argument(
"--lang_id", type=str, default="en", help="Language ID (e.g., en, zh, ja)."
)
args = parser.parse_args()
with open(args.input_file, "r", encoding="utf-8") as f:
data = json.load(f)
samples = [eval_types.Sample(**sample) for sample in data]
if args.metric == "wer":
result = wer(samples, {"lang_id": args.lang_id})
else:
result = bleu(samples, {"tokenize": args.lang_id})
print(f"{args.metric.upper()} Score: {result.score}")