in isoexp/mab/smab_algs.py [0:0]
def EXP3_P(T, MAB, eta=0.1, gamma=0):
K = len(MAB)
S = np.zeros((K,))
rewards = np.zeros((T,))
draws = 0*rewards
sum_exp = K
exp_S = np.ones((K,))
arms = np.linspace(0, K-1, K, dtype='int')
for t in range(T):
P = (1 - gamma) * exp_S / sum_exp + gamma / K * np.ones((K,))
if t < K:
action = t
attack_t = 0
else:
# print('Probability distribution:', P)
action = np.random.choice(arms, p=P)
X = 1*MAB[action].sample().squeeze()
S = S + 1
S[action] = S[action] - (1 - X)/P[action]
exp_S = exp_S*np.exp(eta)
exp_S[action] = exp_S[action]*np.exp(-eta *(1 - X)/P[action])
sum_exp = np.sum(exp_S)
rewards[t] = X
draws[t] = action
return rewards, draws