in simple_game/load_strategies.py [0:0]
def print_policy_table(policy):
# Get max c1 and c2
maxcards = [0, 0]
for (player, card, history), p in policy.items():
maxcards[player - 1] = max(maxcards[player - 1], card)
table = []
for c1 in range(maxcards[0] + 1):
this_table = dict()
for c2 in range(maxcards[1] + 1):
history = "r"
player = 1
while True:
c = c1 if player == 1 else c2
key = (player, c, history)
p = policy.get(key, None)
if p is None:
break
opt_a = p[1]
history += str(opt_a)
player = 3 - player
if history[-1] == 'r':
reward = 0
else:
contract = 1 << (int(history[-2]) - 1)
if c1 + c2 >= contract:
reward = contract
else:
reward = 0
this_table[c2] = history[1:] + " (" + str(reward) + ")"
table.append(this_table)
print(pd.DataFrame(table))