def translate()

in evals/translators/nllb.py [0:0]


def translate(texts, tokenizer, model, target):
    results = []

    if target in LANG_CODE_MAP:
        lang_code = LANG_CODE_MAP[target]
    else:
        lang_code = None
        for lang in tokenizer.additional_special_tokens:
            if lang.startswith(iso.iso3_code(target)):
                assert (
                    lang_code is None
                ), "Multiple NLLB language codes found for the same language ID, need to disambiguate!"
                lang_code = lang
        assert lang_code is not None, f"Lang code for {target} was not found"

    forced_bos_token_id = tokenizer.lang_code_to_id[lang_code]

    for partition in tqdm(list(toolz.partition_all(10, texts))):
        tokenized_src = tokenizer(partition, return_tensors="pt", padding=True).to(device)
        generated_tokens = model.generate(**tokenized_src, forced_bos_token_id=forced_bos_token_id)
        results += tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

    return results