in isoexp/mab/smab_algs.py [0:0]
def attacked_EXP3_IX(T, MAB, target_arm, eta = None, gamma = None, delta=0.99):
K = len(MAB)
losses = np.zeros((K,))
rewards = np.zeros((T,))
draws = 0*rewards
sum_exp = K
exp_losses = np.ones((K,))
arms = np.linspace(0, K-1, K, dtype='int')
N = np.ones((K,)) # number of observations of each arm
S = np.zeros((K,))
beta = np.zeros((K,))
attacks = np.zeros((T,))
time_of_attacks = np.zeros((T,))
if eta is None or gamma is None:
eta = np.sqrt(2*np.log(K + 1)/(K*T))
gamma = np.sqrt(2*np.log(K + 1)/(K*T))/2
for t in range(T):
P = exp_losses/sum_exp
if t < K:
action = t
attack_t = 0
else:
time_of_attacks[t] = 1
action = np.random.choice(arms, p=P)
if action != target_arm:
beta = np.sqrt(np.log(np.pi ** 2 * K * N ** 2 / (3 * delta)) / (2*N))
attack_t = - np.maximum((S / N)[action] - (S / N)[target_arm] + beta[action] + beta[target_arm], 0)
else:
attack_t = 0
attacks[t] = attack_t
true_X = 1*MAB[action].sample().squeeze()
X = true_X + attack_t
losses[action] = losses[action] + (1 - X)/(gamma + P[action])
exp_losses[action] = exp_losses[action]*np.exp(-eta*(1 - X)/(gamma + P[action]))
sum_exp = np.sum(exp_losses)
rewards[t] = true_X
draws[t] = action
N[action] += 1
S[action] += true_X
return rewards, draws, attacks, time_of_attacks