in evals/elsuite/bluff/strategy_solver.py [0:0]
def _solve(self, task_state: TaskState):
"""
This solver does three things that should help the model play better:
1. Adds a strategy guide as the first message (just after the task description)
2. Strategy guide requires a JSON response (scratchpad etc). This JSON is parsed here,
and a raw bid is returned.
3. After a certain number of rounds, requests the model to analyze the strategy.
"""
# GENERAL NOTE.
# This function is pretty ugly. I'm not sure how to implement this better. We decided this is good enough.
# Before the first move in a game - strategy guide goes first
strategy_msg = Message("system", strategy)
task_state.messages.insert(0, strategy_msg)
task_state.messages = self.interaction_cache.load_private_interaction(task_state)
game = task_state.current_state
if (
self.rethink_strategy_after is not None
and len(game.rounds) == 1 + self.rethink_strategy_after
and len(game.rounds[-1].moves) < 2
):
# Add the "rethink your strategy" prompt.
# We want to add it (and an answer to it) before the last system message.
strategy_update_msg = Message("system", strategy_update)
# This if has the same purpose as with strategy_msg
if strategy_update_msg not in task_state.messages:
last_system_message = task_state.messages.pop()
task_state.messages.append(strategy_update_msg)
response = self._generate_response(task_state)
task_state.messages.append(Message("assistant", response))
task_state.messages.append(last_system_message)
# Manually update interaction cache, since we re-order messages
last_interaction = self.interaction_cache.last_interaction
last_interaction_messages = last_interaction.messages[:-1] + [
Message("system", strategy_update_msg),
Message("assistant", response),
Message("system", last_system_message),
]
last_interaction_private_ids = last_interaction.private_messages_ids + [
len(task_state.messages) - 3,
len(task_state.messages) - 2,
]
self.interaction_cache.last_interaction.messages = last_interaction_messages
self.interaction_cache.last_interaction.private_messages_ids = (
last_interaction_private_ids
)
# If this move_str is preserved, the game engine will have to deal with that
# (and it has some way of solving this problem)
move_str = "[INVALID MOVE]"
for _ in range(self.max_attempts):
response = self._generate_response(task_state)
try:
move_str = self._parse_response(response)
# This will raise ValueError if this is not a valid move
get_bluff_move(move_str)
break
except ValueError:
pass
task_state.messages.append(Message("assistant", response))
task_state.messages.append(Message("assistant", move_str))
self.interaction_cache.save_private_interaction(task_state)
return SolverResult(move_str)