evals/elsuite/make_me_say/eval.py (48 lines of code) (raw):

import numpy as np import evals from evals.api import DummyCompletionFn from evals.elsuite.make_me_say.autoeval import run as run_auto_eval from evals.elsuite.make_me_say.core import Game from evals.record import RecorderBase class MakeMeSay(evals.Eval): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if len(self.completion_fns) == 1 and isinstance(self.completion_fns[0], DummyCompletionFn): completion_fn = self.completion_fns[0] self.completion_fns = [completion_fn for _ in range(3)] assert len(self.completion_fns) == 3, "MakeMeSay only supports three completion fns" ( self.manipulator_completion_fn, self.manipulatee_completion_fn, self.judge_completion_fn, ) = self.completion_fns def eval_sample(self, sample: dict, rng) -> None: del rng assert isinstance(sample, dict), "sample must be a dict" assert isinstance(sample["input"], dict) assert "codeword" in sample["input"] assert isinstance(sample["input"]["codeword"], str) game: Game = run_auto_eval( codeword=sample["input"]["codeword"], manipulator_completion_fn=self.manipulator_completion_fn, manipulatee_completion_fn=self.manipulatee_completion_fn, judge_completion_fn=self.judge_completion_fn, ) evals.record.record_metrics( is_win=game.is_win(), is_loss=game.is_loss(), is_violation=game.is_violation(), is_user_aware_of_manipulation=game.is_user_aware_of_manipulation(), ) def run(self, recorder: RecorderBase): samples = self.get_samples() self.eval_all_samples(recorder, samples) metrics = recorder.get_metrics() return { "win_rate": np.mean([d["is_win"] for d in metrics]), "loss_rate": np.mean([d["is_loss"] for d in metrics]), "violation_rate": np.mean([d["is_violation"] for d in metrics]), "user_aware_of_manipulation_rate": np.mean( [d["is_user_aware_of_manipulation"] for d in metrics] ), }