def run_game()

in pyhanabi/tools/run_game.py [0:0]


def run_game(agents, seed, verbose, op):
    params = {
        "players": str(len(agents)),
        "seed": str(seed),
        "bomb": str(0),
        "random_start_player": str(0),
    }
    game = hanalearn.HanabiEnv(params, -1, False)  # max_len  # verbose
    game.reset()

    hids = [agent.get_h0(1) for agent in agents]
    for h in hids:
        for k, v in h.items():
            if isinstance(agents[0], r2d2.R2D2Agent):
                h[k] = v.cuda().unsqueeze(0)  # add batch dim
            else:
                h[k] = v.cuda()

    if op:
        color_permutes = []
        inv_color_permutes = []
        for _ in range(len(agents)):
            perm = [0, 1, 2, 3, 4]
            random.shuffle(perm)
            inv_perm = np.argsort(perm).tolist()
            for i in range(5):
                assert inv_perm[perm[i]] == i
            color_permutes.append(perm)
            inv_color_permutes.append(inv_perm)

    step = 0
    moves = []
    while not game.terminated():
        # print(game.get_hle_state().to_string())
        actions = []
        new_hids = []

        for i, (agent, hid) in enumerate(zip(agents, hids)):
            # Note: argument here is (game_state, player_idx, hide_action)
            # make sure to specify the correct hide_action value
            if op:
                obs = hanalearn.observe_op(
                    game.get_hle_state(),
                    i,
                    True,
                    color_permutes[i],
                    inv_color_permutes[i],
                    False,
                    True,
                    False,
                )
            else:
                obs = hanalearn.observe(game.get_hle_state(), i, False)

            priv_s = obs["priv_s"].cuda().unsqueeze(0)
            publ_s = obs["publ_s"].cuda().unsqueeze(0)
            legal_move = obs["legal_move"].cuda().unsqueeze(0)

            action, new_hid = agent.greedy_act(priv_s, publ_s, legal_move, hid)
            if i == 0:
                actions.append([action.item()])
            else:
                actions[-1].append(action.item())
            new_hids.append(new_hid)

        hids = new_hids
        cur_player = game.get_current_player()
        move = game.get_move(actions[-1][cur_player])
        if op and move.move_type() == hanalearn.MoveType.RevealColor:
            move.set_color(inv_color_permutes[cur_player][move.color()])
        moves.append(move)

        game.step(move)
        step += 1

    return game.get_score(), moves