def evaluate_on_self()

in simulation/decai/simulation/simulate_ttt_dt.py [0:0]


def evaluate_on_self(classifier, tic_tac_toe):
    print("Evaluating by playing against itself.")

    def _run_game(board, next_player):
        if next_player == -1:
            # Flip the board since the bot always thinks it is 1.
            board_for_prediction = -board
        else:
            board_for_prediction = board
        pos = classifier.predict(board_for_prediction.flatten())
        pos = _map_pos(tic_tac_toe, board, pos)
        if board[pos] != 0:
            return "TIE", np.count_nonzero(board == next_player)
        board[pos] = next_player
        if tic_tac_toe.get_winner(board):
            return next_player, np.count_nonzero(board == next_player)
        else:
            return _run_game(board, -1 if next_player == 1 else 1)

    # Start with empty board and let the model pick where to start.
    board = np.zeros((tic_tac_toe.width, tic_tac_toe.length), dtype=np.int8)
    winner, num_moves = _run_game(board, 1)
    if winner == 1:
        print(f"When model starts: WINS in {num_moves} moves.")
    elif isinstance(winner, str):
        print(f"When model starts: {winner} in {num_moves} moves.")
    else:
        print(f"When model starts: LOSES. Winner has {num_moves} moves.")

    winners = Counter()
    winner_move_counts = []
    for start_pos in range(board.size):
        board = np.zeros((tic_tac_toe.width, tic_tac_toe.length), dtype=np.int8)
        board[_map_pos(tic_tac_toe, board, start_pos)] = -1
        winner, num_moves = _run_game(board, 1)
        winners[winner] += 1
        winner_move_counts.append(num_moves)
    print("Winners when -1 starts in each position:")
    print(json.dumps(winners, indent=2))
    print(f"Winner move counts:\n{winner_move_counts}")
    print(f"Avg # winner moves: {np.average(winner_move_counts)}")