def log_explore_ratio()

in pyhanabi/utils.py [0:0]


def log_explore_ratio(games, expected_eps):
    explore = []
    for g in games:
        explore.append(g.get_explore_count())
    explore = np.stack(explore)
    explore = explore.sum(0)  # .reshape((8, 10)).sum(1)

    step_counts = []
    for g in games:
        step_counts.append(g.get_step_count())
    step_counts = np.stack(step_counts)
    step_counts = step_counts.sum(0)  # .reshape((8, 10)).sum(1)

    factor = []
    for i in range(len(explore)):
        if step_counts[i] == 0:
            factor.append(1.0)
        else:
            f = expected_eps / max(1e-5, (explore[i] / step_counts[i]))
            f = max(0.5, min(f, 2))
            factor.append(f)
    print(">>>explore factor:", len(factor))

    explore = explore.reshape((8, 10)).sum(1)
    step_counts = step_counts.reshape((8, 10)).sum(1)

    print("exploration:")
    for i in range(len(explore)):
        ratio = 100 * explore[i] / step_counts[i]
        print(
            "\tbucket [%2d, %2d]: %5d, %5d, %2.2f%%"
            % (i * 10, (i + 1) * 10, explore[i], step_counts[i], ratio)
        )

    # print('timestep visit count:')
    # for i in range(len(step_counts)):
    #     print('\tbucket [%2d, %2d]: %.2f' % (i*10, (i+1)*10, 100 * step_counts[i]))

    for g in games:
        g.reset_count()

    return factor