def wer()

in ultravox/evaluation/string_metrics.py [0:0]


def wer(samples: List[eval_types.Sample], args: Dict[str, Any]) -> eval_types.WerResult:
    """Compute WER or CER using Whisper's text normalization."""
    lang_id = args.get("lang_id", "<undefined>").lower()  # Ensure case-insensitive

    # Initialize the appropriate text normalizer
    if lang_id == "en":
        normalizer = whisper_english.EnglishTextNormalizer()
    else:
        normalizer = whisper_basic.BasicTextNormalizer()

    references = [sample.expected_answer for sample in samples]
    hypotheses = [sample.generated_answer for sample in samples]

    if lang_id == "ar":
        references = [remove_diacritics(ref) for ref in references]
        hypotheses = [remove_diacritics(hyp) for hyp in hypotheses]

    # Normalize both reference and hypothesis
    references = [normalizer(ref) for ref in references]
    hypotheses = [normalizer(hyp) for hyp in hypotheses]

    # Languages where we compute CER (space-separated characters)
    if lang_id in ["zh", "ja", "th", "lo", "my"]:
        # Convert to space-separated characters for CER
        references = [" ".join(list(ref)) for ref in references]
        hypotheses = [" ".join(list(hyp)) for hyp in hypotheses]
    # Compute WER using space-separated words
    wer_metric = evaluate.load("wer")
    wer_score = wer_metric.compute(predictions=hypotheses, references=references)
    return eval_types.WerResult(score=wer_score * 100)