in simulation/decai/simulation/simulate_ttt_dt.py [0:0]
def evaluate_on_self(classifier, tic_tac_toe):
print("Evaluating by playing against itself.")
def _run_game(board, next_player):
if next_player == -1:
# Flip the board since the bot always thinks it is 1.
board_for_prediction = -board
else:
board_for_prediction = board
pos = classifier.predict(board_for_prediction.flatten())
pos = _map_pos(tic_tac_toe, board, pos)
if board[pos] != 0:
return "TIE", np.count_nonzero(board == next_player)
board[pos] = next_player
if tic_tac_toe.get_winner(board):
return next_player, np.count_nonzero(board == next_player)
else:
return _run_game(board, -1 if next_player == 1 else 1)
# Start with empty board and let the model pick where to start.
board = np.zeros((tic_tac_toe.width, tic_tac_toe.length), dtype=np.int8)
winner, num_moves = _run_game(board, 1)
if winner == 1:
print(f"When model starts: WINS in {num_moves} moves.")
elif isinstance(winner, str):
print(f"When model starts: {winner} in {num_moves} moves.")
else:
print(f"When model starts: LOSES. Winner has {num_moves} moves.")
winners = Counter()
winner_move_counts = []
for start_pos in range(board.size):
board = np.zeros((tic_tac_toe.width, tic_tac_toe.length), dtype=np.int8)
board[_map_pos(tic_tac_toe, board, start_pos)] = -1
winner, num_moves = _run_game(board, 1)
winners[winner] += 1
winner_move_counts.append(num_moves)
print("Winners when -1 starts in each position:")
print(json.dumps(winners, indent=2))
print(f"Winner move counts:\n{winner_move_counts}")
print(f"Avg # winner moves: {np.average(winner_move_counts)}")