in ml/eval/evaluation_pipeline.py [0:0]
def evaluator_master_fn(eval_dataset: list[dict],
reward_output_filepath: str,
all_responses: str,
language: str,
new_model,
old_model="CohereForAI/aya-expanse-8b"):
# language is string for now, will be an object later with FeeLLanguage class definition with specific lanugage
# functionalities (will also store latest model and be much easier to handle such functions)
# 1. Reward score evaluation:
args = EvalArguments(bfloat16=True,
reward_output_fmt='1-0',