def _solver_used_bugged

def _solver_used_bugged_input()

in evals/elsuite/bugged_tools/eval.py [0:0]

17 lines of code
11 McCabe index (conditional complexity)


    def _solver_used_bugged_input(self, sample: Any, all_messages: Sequence[Message]):
        """
        Return True if no bug exists or solver called the bugged tool with the bugged input, False otherwise
        """
        if len(sample["bugs"]) == 0:
            return True

        # Assume only one bugged input
        assert len(sample["bugs"]) == 1
        bugged_input = [i["bugged_input"] for i in sample["bugs"].values()][0]

        tool_pattern = r"\(@(?!Answer|Bugged)(\w+): (.+?)\)"
        tool_calls = self._find_flag_from_assistant(all_messages, tool_pattern)

        def strip_and_cast(tool_input, cast_type):
            tool_input = tool_input.strip()
            # Remove quotes if solver wrapped input in "" or ''
            if tool_input.startswith(("'", '"')) and tool_input.endswith(("'", '"')):
                tool_input = tool_input[1:-1]
            return try_cast_from_str(tool_input, cast_type)

        # Get tool inputs and cast to correct type
        tool_inputs_used = [i[1] for i in tool_calls]
        tool_inputs_used = [strip_and_cast(i, type(bugged_input)) for i in tool_inputs_used]
        tool_inputs_used = [i for i in tool_inputs_used if i is not None]

        solver_used_bugged_input = bugged_input in tool_inputs_used
        return solver_used_bugged_input