in #U57fa#U7840#U6559#U7a0b/A7-#U5f3a#U5316#U5b66#U4e60/02-#U591a#U81c2#U8d4c#U535a#U673a/src/bandit_28_all.py [0:0]
def staristic(k_arms, runs, steps):
bandits:kab_base.KArmBandit = []
bandits.append(KAB_Greedy(k_arms, 25))
bandits.append(KAB_E_Greedy(k_arms, 0.1))
bandits.append(KAB_Optimistic_Initial(k_arms, 0.1, 5))
bandits.append(KAB_Softmax(k_arms, 0.15, 0.8))
bandits.append(KAB_UCB(k_arms, 1))
bandits.append(KAB_Thompson(k_arms, 0.5))
# statistic
all_rewards = []
all_best = []
all_actions = []
pool = mp.Pool(processes=4)
results = []
for i, bandit in enumerate(bandits):
results.append(pool.apply_async(bandit.simulate, args=(runs,steps,)))
pool.close()
pool.join()
for i in range(len(results)):
rewards, best_action, actions = results[i].get()
all_rewards.append(rewards)
all_best.append(best_action)
all_actions.append(actions)
all_best_actions = np.array(all_best).mean(axis=1)
all_mean_rewards = np.array(all_rewards).mean(axis=1)
all_done_actions = np.array(all_actions)
best_action_per_bandit = all_done_actions[:,k_arms-1]/all_done_actions.sum(axis=1)
mean_reward_per_bandit = all_mean_rewards.sum(axis=1) / steps
features = np.zeros(shape=(len(bandits),8))
# 0-100步的平均收益
features[:,0] = all_mean_rewards[:,0:100].mean(axis=1)
# 300-500步的平均收益
features[:,1] = all_mean_rewards[:,300:500].mean(axis=1)
# 700-900步的平均收益
features[:,2] = all_mean_rewards[:,700:900].mean(axis=1)
# 1000步的平均收益
features[:,3] = mean_reward_per_bandit
# 0-100步的最佳利用率
features[:,4] = all_best_actions[:,0:100].mean(axis=1)
# 300-500步的最佳利用率
features[:,5] = all_best_actions[:,300:500].mean(axis=1)
# 700-900步的最佳利用率
features[:,6] = all_best_actions[:,700:900].mean(axis=1)
# 1000步的最佳利用率
features[:,7] = best_action_per_bandit
print(np.round(features, 3))
X = features
# X: 第一维是不同的算法,第二维是8个特征值
# 归一化, 按特征值归一化
Y = (X - np.min(X, axis=0, keepdims=True)) / (np.max(X, axis=0, keepdims=True) - np.min(X, axis=0, keepdims=True))
print("Y.shape=", Y.shape)
print(np.round(Y, 3))
# 计算权重值
Z = Y / np.sqrt(np.sum(Y * Y))
print("Z.shape=", Z.shape)
print(np.round(Z, 3))
# Z+ Z-
max_z = np.max(Z, axis=0)
min_z = np.min(Z, axis=0)
print("max_z.shape=", max_z.shape)
print(max_z)
print(min_z)
# D+, D-
d_plus = np.sqrt(np.sum(np.square(Z - max_z), axis=1))
d_minus = np.sqrt(np.sum(np.square(Z - min_z), axis=1))
print("d_plus.shape=", d_plus.shape)
print(d_plus)
print(d_minus)
C = d_minus / (d_plus + d_minus)
print("C=", C)
sort = np.argsort(C)
print("sort.shape=",sort.shape)
best_to_worst = list(reversed(sort))
print(best_to_worst)
for i in best_to_worst:
print(bandits[i].__class__.__name__)