def ceval_adapter()

in src/lighteval/tasks/multilingual/adapters.py [0:0]


def ceval_adapter(lang: Language, formulation: Formulation, line: dict) -> MCQInput | None:
    # All ceval tasks ends with ____(。?)
    # Some can follow with with possible answers in format
    # (①|②|③|④)text
    # We must thus remove ___ and extract the answers from the second part of input text

    translation_literals = TranslationLiterals(lang)
    choices = [line["A"], line["B"], line["C"], line["D"]]

    # We found the new line variant to be the best for CF formulation, however for MCF this doesn't work well
    # because possible options presentation
    join_variant = "NEW_LINE" if isinstance(formulation, CFFormulation) else "COMMA"

    parts = line["question"].rsplit("____", maxsplit=1)
    cleaned_question = parts[0].rstrip(WHITESPACES)
    possible_answers_part = parts[1].lstrip(PUNCT + WHITESPACES).rstrip()
    gold_index = LETTER_INDICES.index(line["answer"])

    # We only attempt to extract answers if the answers are a chinese numbers
    answer_prefixes = [answer.replace("和", "").strip() for answer in choices]
    answer_prefixes_set = set("".join(answer_prefixes))

    maybe_extracted_answers = (
        extract_answers_from_string(possible_answers_part, list(answer_prefixes_set))
        if answer_prefixes_set.issubset("①②③④⑤⑥")
        else None
    )
    if maybe_extracted_answers:
        start_index, prefix_answer_map = maybe_extracted_answers
        # Here we don't expect anything to be in front of the answers from second part
        assert start_index == 0, f"Start index is not 0: {start_index}"

        choices_groups = [[prefix_answer_map.get(prefix) for prefix in prefixes] for prefixes in answer_prefixes]
        # If we failed to extract some of the answers we discard the sample
        if any(choice is None for choices in choices_groups for choice in choices):
            return None

        choices = [multichoice_join(mc, join_variant, translation_literals) for mc in choices_groups]
    else:
        # If the second part is not list of answers we put it back
        cleaned_question = f"{cleaned_question} {possible_answers_part}" if possible_answers_part else cleaned_question

    # Lastly make it into question:
    cleaned_question = f"{cleaned_question.strip().rstrip(PUNCT)}{translation_literals.question_mark}"

    # If we still have only the numbers in the answers or we have just single choice we discard this sample
    if set("".join(choices).replace("和", "").strip()).issubset("①②③④⑤⑥") or len(choices) <= 1:
        return None

    return {
        "question": cleaned_question,
        "choices": choices,
        "gold_idx": gold_index,
    }