in #U57fa#U7840#U6559#U7a0b/A7-#U5f3a#U5316#U5b66#U4e60/02-#U591a#U81c2#U8d4c#U535a#U673a/src/bandit_20_base.py [0:0]
def mp_simulate(bandits, k_arms, runs, steps, labels, title):
# statistic
all_rewards = []
all_best = []
all_actions = []
pool = mp.Pool(processes=4)
results = []
for i, bandit in enumerate(bandits):
results.append(pool.apply_async(bandit.simulate, args=(runs,steps,)))
pool.close()
pool.join()
for i in range(len(results)):
rewards, best_action, actions = results[i].get()
print(labels[i])
all_rewards.append(rewards)
all_best.append(best_action)
all_actions.append(actions)
all_best_actions = np.array(all_best).mean(axis=1)
all_mean_rewards = np.array(all_rewards).mean(axis=1)
all_done_actions = np.array(all_actions)
best_action_per_bandit = all_done_actions[:,k_arms-1]/all_done_actions.sum(axis=1)
mean_reward_per_bandit = all_mean_rewards.sum(axis=1) / steps
# draw
#grid = plt.GridSpec(nrows=4, ncols=3, wspace=0.2, hspace=0.2)
grid = plt.GridSpec(nrows=4, ncols=3)
plt.figure(figsize=(15, 20))
plt.subplot(grid[0:2, 0])
for i, mean_rewards in enumerate(all_mean_rewards):
tmp = ss.savgol_filter(mean_rewards[0:100], 5, 3)
plt.plot(tmp, label=labels[i] + str.format("{0:0.4f}", mean_reward_per_bandit[i]))
plt.xlabel('steps')
plt.ylabel('average reward')
plt.legend()
plt.grid()
plt.subplot(grid[0:1, 1])
for i, mean_rewards in enumerate(all_mean_rewards):
tmp = ss.savgol_filter(mean_rewards[300:500], 15, 3)
plt.plot(tmp)
ticks = [0,50,100,150,200]
tlabels = [300,350,400,450,500]
plt.xticks(ticks, tlabels)
plt.xlabel('steps')
plt.ylabel('average reward')
plt.grid()
plt.subplot(grid[1:2, 1])
for i, mean_rewards in enumerate(all_mean_rewards):
tmp = ss.savgol_filter(mean_rewards[700:900], 15, 3)
plt.plot(tmp)
ticks = [0,50,100,150,200]
tlabels = [700,750,800,850,900]
plt.xticks(ticks, tlabels)
plt.xlabel('steps')
plt.ylabel('average reward')
plt.grid()
plt.subplot(grid[2:4, 0:2])
for i, counts in enumerate(all_best_actions):
plt.plot(counts, label=labels[i] + str.format("{0:0.3f}", best_action_per_bandit[i]))
plt.xlabel('steps')
plt.ylabel('% optimal action')
plt.legend()
plt.grid()
X = ["0","1","2","3","4","5","6","7","8","9"]
colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']
for i in range(4):
plt.subplot(grid[i, 2])
Y = all_done_actions[i].tolist()
plt.bar(X, Y, label=labels[i], color=colors[i])
for x,y in zip(X, Y):
if (int(x) < 8):
plt.text(x,y, str(y), ha='center', rotation=-30)
else:
plt.text(x,y, str(y), ha='center')
plt.legend()
plt.suptitle(title)
plt.show()
return