in evals/elsuite/bugged_tools/eval.py [0:0]
def _solver_used_bugged_input(self, sample: Any, all_messages: Sequence[Message]):
"""
Return True if no bug exists or solver called the bugged tool with the bugged input, False otherwise
"""
if len(sample["bugs"]) == 0:
return True
# Assume only one bugged input
assert len(sample["bugs"]) == 1
bugged_input = [i["bugged_input"] for i in sample["bugs"].values()][0]
tool_pattern = r"\(@(?!Answer|Bugged)(\w+): (.+?)\)"
tool_calls = self._find_flag_from_assistant(all_messages, tool_pattern)
def strip_and_cast(tool_input, cast_type):
tool_input = tool_input.strip()
# Remove quotes if solver wrapped input in "" or ''
if tool_input.startswith(("'", '"')) and tool_input.endswith(("'", '"')):
tool_input = tool_input[1:-1]
return try_cast_from_str(tool_input, cast_type)
# Get tool inputs and cast to correct type
tool_inputs_used = [i[1] for i in tool_calls]
tool_inputs_used = [strip_and_cast(i, type(bugged_input)) for i in tool_inputs_used]
tool_inputs_used = [i for i in tool_inputs_used if i is not None]
solver_used_bugged_input = bugged_input in tool_inputs_used
return solver_used_bugged_input