in evals/registry/data/solve-for-variable/tools/tester.py [0:0]
def test(self):
"""
Test the validity of the given answers, both the correct and incorrect ones.
"""
q_vars, a_vars = self._variables()
ok = len(self.answers) * [True]
self.bad_answers = set()
v = ValueGenerator()
for values in v.generate(q_vars.rhs):
scope = dict(values)
# Suppose a question and answers are of the form
# Q: v = <expression containing "x", the variable to solve for>
# A1: x = <expression containing "v">
# A2: x = <expression containing "v">
# A3: x = <expression containing "v">
# ...
#
# For each combination of variable values in the RHS of Q,
# execute the assignment
# v = <expression>
try:
exec(self.question.code, scope)
except ZeroDivisionError:
continue
# "expected" is the value of "x", in our example above
# (one of the RHS values in Q, as generated by ValueGenerator)
expected = scope[a_vars.lhs]
for n in range(len(self.answers)):
if not ok[n]:
continue
# For each answer, evaluate the assignment
# x = <expression containing "v">
# and verify that we obtain back the same value
# of "x" as "expected"
del scope[a_vars.lhs]
try:
exec(self.answers[n].code, scope)
except ZeroDivisionError:
# just add something so that it can be deleted
# by "del" above in the next iteration
scope[a_vars.lhs] = expected
continue
computed = scope[a_vars.lhs]
check = computed == expected
if n == self.correct:
if not check:
self.msg = (
f'The "correct" answer {n+1} is not correct'
+ f"\n expected {a_vars.lhs} = {expected}\n "
+ "\n ".join(f"{v} = {scope[v]}" for v in sorted(scope) if len(v) == 1)
)
return False
else:
if not check:
ok[n] = False
# "Incorrect" answers may evaluate correctly by chance
# for SOME values of the variables, but not for ALL of them
ret = True
for n in range(len(ok)):
if ok[n] and n != self.correct:
self.bad_answers.add(n)
self.msg = f'The "wrong" answer {n+1}' + " turns out to be correct"
ret = False
return ret