in pyhanabi/utils.py [0:0]
def log_explore_ratio(games, expected_eps):
explore = []
for g in games:
explore.append(g.get_explore_count())
explore = np.stack(explore)
explore = explore.sum(0) # .reshape((8, 10)).sum(1)
step_counts = []
for g in games:
step_counts.append(g.get_step_count())
step_counts = np.stack(step_counts)
step_counts = step_counts.sum(0) # .reshape((8, 10)).sum(1)
factor = []
for i in range(len(explore)):
if step_counts[i] == 0:
factor.append(1.0)
else:
f = expected_eps / max(1e-5, (explore[i] / step_counts[i]))
f = max(0.5, min(f, 2))
factor.append(f)
print(">>>explore factor:", len(factor))
explore = explore.reshape((8, 10)).sum(1)
step_counts = step_counts.reshape((8, 10)).sum(1)
print("exploration:")
for i in range(len(explore)):
ratio = 100 * explore[i] / step_counts[i]
print(
"\tbucket [%2d, %2d]: %5d, %5d, %2.2f%%"
% (i * 10, (i + 1) * 10, explore[i], step_counts[i], ratio)
)
# print('timestep visit count:')
# for i in range(len(step_counts)):
# print('\tbucket [%2d, %2d]: %.2f' % (i*10, (i+1)*10, 100 * step_counts[i]))
for g in games:
g.reset_count()
return factor