lm_eval/tasks/glianorex/preprocess_glianorex.py (13 lines of code) (raw):
import datasets
def doc_to_text(doc) -> str:
option_choices = doc["options"]
answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items())
return f"Question: {doc['question']}\n{answers}Answer:"
def doc_to_target(doc) -> int:
return doc["answer_idx"]
def filter_dataset(dataset: datasets.Dataset, lang: str) -> datasets.Dataset:
return dataset.filter(lambda example: example["language"].startswith(lang))
def filter_french(dataset: datasets.Dataset) -> datasets.Dataset:
return filter_dataset(dataset, "fr")
def filter_english(dataset: datasets.Dataset) -> datasets.Dataset:
return filter_dataset(dataset, "en")