project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_iterative.py [138:229]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if tools is None:
        tools = []
    tools = tools if isinstance(tools, Solver) else use_tools(tools)

    # resolve score_value function
    score_value_fn = score_value or value_to_float()

    # submission tool
    @tool
    def end_task() -> Tool:
        async def execute(end_message: str) -> ToolResult:
            """Signal that you are completely finished.

            Args:
              end_message (str): Final message to signal that you are finished.
            """
            return end_message

        return execute

    # solver that adds submission tool
    @solver
    def end_task_tool() -> Solver:
        async def solve(state: TaskState, generate: Generate) -> TaskState:
            state.tools.append(tool_with(end_task(), submit_name, submit_description))
            return state

        return solve

    # helper to extract a submitted answer
    def submission(tool_results: list[ChatMessageTool]) -> str | None:
        return next(
            (result.text for result in tool_results if result.function == submit_name),
            None,
        )

    # main agent loop
    @solver
    def basic_agent_loop() -> Solver:
        async def solve(state: TaskState, generate: Generate) -> TaskState:
            # resolve message_limit -- prefer parameter then fall back to task
            state.message_limit = message_limit or state.message_limit

            # resolve token limit
            state.token_limit = token_limit or state.token_limit

            # track attempts
            attempts = 0

            num_steps = 0
            start_time = time.time()
            model = get_model()
            setattr(model, "total_retry_time", 0)
            setattr(model, "generate", generate_patched)

            # main loop (state.completed checks message_limit and token_limit)
            while not state.completed:
                # Proactively prune messages if we're approaching 1000 messages for Claude 3.5 Sonnet on Anthropic API
                if "sonnet" in model.name.lower() and len(state.messages) > 900:
                    state.messages = prune_messages(state.messages)

                # Finish depending on step and time limits. If both are set, both must be met.
                num_steps += 1

                # NOTE log.warning is necessary here for this to appear in inspect.log, which is necessary so we can know the agents runtime
                logger.warning(
                    f"total runtime: {round(time.time() - start_time, 2)}, total productive runtime: {round(time.time() - start_time - model.total_retry_time, 2)}, total retry time: {round(model.total_retry_time, 2)}"
                )

                over_time_limit = (
                    (time.time() - start_time - model.total_retry_time > real_time_limit)
                    if real_time_limit is not None
                    else False
                )
                if real_time_limit is not None and over_time_limit:
                    state.completed = True
                    break

                # Send progress update every 5 steps
                if num_steps % 5 == 0:
                    if real_time_limit is not None:
                        elapsed_time = time.time() - start_time - model.total_retry_time
                        periodic_msg = f"Info: {format_progress_time(elapsed_time)} time elapsed out of {format_progress_time(real_time_limit)}. Remember, you only have to stop working when the time limit has been reached."
                    else:
                        elapsed_time = time.time() - start_time
                        periodic_msg = f"Info: {format_progress_time(elapsed_time)} time elapsed"
                    periodic_msg += "\n\nNote: Don't forget to git commit regularly!"
                    state.messages.append(ChatMessageUser(content=periodic_msg))

                length_finish_error = False
                prune_individual = False
                try:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_plus.py [144:235]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    if tools is None:
        tools = []
    tools = tools if isinstance(tools, Solver) else use_tools(tools)

    # resolve score_value function
    score_value_fn = score_value or value_to_float()

    # submission tool
    @tool
    def end_task() -> Tool:
        async def execute(end_message: str) -> ToolResult:
            """Signal that you are completely finished.

            Args:
              end_message (str): Final message to signal that you are finished.
            """
            return end_message

        return execute

    # solver that adds submission tool
    @solver
    def end_task_tool() -> Solver:
        async def solve(state: TaskState, generate: Generate) -> TaskState:
            state.tools.append(tool_with(end_task(), submit_name, submit_description))
            return state

        return solve

    # helper to extract a submitted answer
    def submission(tool_results: list[ChatMessageTool]) -> str | None:
        return next(
            (result.text for result in tool_results if result.function == submit_name),
            None,
        )

    # main agent loop
    @solver
    def basic_agent_loop() -> Solver:
        async def solve(state: TaskState, generate: Generate) -> TaskState:
            # resolve message_limit -- prefer parameter then fall back to task
            state.message_limit = message_limit or state.message_limit

            # resolve token limit
            state.token_limit = token_limit or state.token_limit

            # track attempts
            attempts = 0

            num_steps = 0
            start_time = time.time()
            model = get_model()
            setattr(model, "total_retry_time", 0)
            setattr(model, "generate", generate_patched)

            # main loop (state.completed checks message_limit and token_limit)
            while not state.completed:
                # Proactively prune messages if we're approaching 1000 messages for Claude 3.5 Sonnet on Anthropic API
                if "sonnet" in model.name.lower() and len(state.messages) > 900:
                    state.messages = prune_messages(state.messages)

                # Finish depending on step and time limits. If both are set, both must be met.
                num_steps += 1

                # NOTE log.warning is necessary here for this to appear in inspect.log, which is necessary so we can know the agents runtime
                logger.warning(
                    f"total runtime: {round(time.time() - start_time, 2)}, total productive runtime: {round(time.time() - start_time - model.total_retry_time, 2)}, total retry time: {round(model.total_retry_time, 2)}"
                )

                over_time_limit = (
                    (time.time() - start_time - model.total_retry_time > real_time_limit)
                    if real_time_limit is not None
                    else False
                )
                if real_time_limit is not None and over_time_limit:
                    state.completed = True
                    break

                # Send progress update every 5 steps
                if num_steps % 5 == 0:
                    if real_time_limit is not None:
                        elapsed_time = time.time() - start_time - model.total_retry_time
                        periodic_msg = f"Info: {format_progress_time(elapsed_time)} time elapsed out of {format_progress_time(real_time_limit)}. Remember, you only have to stop working when the time limit has been reached."
                    else:
                        elapsed_time = time.time() - start_time
                        periodic_msg = f"Info: {format_progress_time(elapsed_time)} time elapsed"
                    periodic_msg += "\n\nNote: Don't forget to git commit regularly!"
                    state.messages.append(ChatMessageUser(content=periodic_msg))

                length_finish_error = False
                prune_individual = False
                try:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



