def mp_simulate()

in #U57fa#U7840#U6559#U7a0b/A7-#U5f3a#U5316#U5b66#U4e60/02-#U591a#U81c2#U8d4c#U535a#U673a/src/bandit_20_base.py [0:0]


def mp_simulate(bandits, k_arms, runs, steps, labels, title):

    # statistic

    all_rewards = []
    all_best = []
    all_actions = []

    pool = mp.Pool(processes=4)
    results = []
    for i, bandit in enumerate(bandits):
        results.append(pool.apply_async(bandit.simulate, args=(runs,steps,)))
    pool.close()
    pool.join()

    for i in range(len(results)):
        rewards, best_action, actions = results[i].get()
        print(labels[i])
        all_rewards.append(rewards)
        all_best.append(best_action)
        all_actions.append(actions)

    all_best_actions = np.array(all_best).mean(axis=1)
    all_mean_rewards = np.array(all_rewards).mean(axis=1)
    all_done_actions = np.array(all_actions)
    best_action_per_bandit = all_done_actions[:,k_arms-1]/all_done_actions.sum(axis=1)
    mean_reward_per_bandit = all_mean_rewards.sum(axis=1) / steps

    # draw

    #grid = plt.GridSpec(nrows=4, ncols=3, wspace=0.2, hspace=0.2)
    grid = plt.GridSpec(nrows=4, ncols=3)
    plt.figure(figsize=(15, 20))
    
    plt.subplot(grid[0:2, 0])
    for i, mean_rewards in enumerate(all_mean_rewards):     
        tmp = ss.savgol_filter(mean_rewards[0:100], 5, 3)
        plt.plot(tmp, label=labels[i] + str.format("{0:0.4f}", mean_reward_per_bandit[i]))
    plt.xlabel('steps')
    plt.ylabel('average reward')
    plt.legend()
    plt.grid()

    plt.subplot(grid[0:1, 1])
    for i, mean_rewards in enumerate(all_mean_rewards):
        tmp = ss.savgol_filter(mean_rewards[300:500], 15, 3)
        plt.plot(tmp)
    ticks = [0,50,100,150,200]
    tlabels = [300,350,400,450,500]
    plt.xticks(ticks, tlabels)
    plt.xlabel('steps')
    plt.ylabel('average reward')
    plt.grid()

    plt.subplot(grid[1:2, 1])
    for i, mean_rewards in enumerate(all_mean_rewards):
        tmp = ss.savgol_filter(mean_rewards[700:900], 15, 3)
        plt.plot(tmp)
    ticks = [0,50,100,150,200]
    tlabels = [700,750,800,850,900]
    plt.xticks(ticks, tlabels)
    plt.xlabel('steps')
    plt.ylabel('average reward')
    plt.grid()



    plt.subplot(grid[2:4, 0:2])
    for i, counts in enumerate(all_best_actions):
        plt.plot(counts, label=labels[i] + str.format("{0:0.3f}", best_action_per_bandit[i]))
    plt.xlabel('steps')
    plt.ylabel('% optimal action')
    plt.legend()
    plt.grid()
    
    X = ["0","1","2","3","4","5","6","7","8","9"]
    colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']
    for i in range(4):
        plt.subplot(grid[i, 2])
        Y = all_done_actions[i].tolist()
        plt.bar(X, Y, label=labels[i], color=colors[i])
        for x,y in zip(X, Y):
            if (int(x) < 8):
                plt.text(x,y, str(y), ha='center', rotation=-30)
            else:
                plt.text(x,y, str(y), ha='center')
    plt.legend()

    plt.suptitle(title)
    plt.show()

    return