in isoexp/mab/smab_algs.py [0:0]
def attacked_UCB1(T, MAB, target_arm, alpha=1., delta=0.99, constant_attack=False):
K = len(MAB)
rewards = np.zeros((T,))
draws = np.zeros((T,))
N = np.ones((K,)) # number of observations of each arm
S = np.zeros((K,)) # sum of rewards for each arm
N_pre = np.ones((K,)) # number of observations of each arm
S_pre = np.zeros((K,))
attacks = np.zeros((T,))
time_of_attacks = np.zeros((T,))
for k in range(K):
a = k
r = MAB[a].sample()
rewards[k] = r
draws[k] = a
S[a] += r
N[a] += 1
S_pre[a] += r
N_pre[a] += 1
attacks[k] = 0
for t in range(K, T):
# select the arm
ucb = S / N + alpha * np.sqrt(np.log(t + 1) / N)
beta = np.sqrt(np.log(np.pi**2*K*N**2/(3*delta))/(2*N))
idxs = np.flatnonzero(np.isclose(ucb, ucb.max()))
a = np.asscalar(np.random.choice(idxs))
if a != target_arm:
time_of_attacks[t] = 1
if constant_attack:
attack_t = - 2 * np.maximum(0, MAB[a].mean - MAB[target_arm].mean)
else:
beta = np.sqrt(np.log(np.pi ** 2 * K * N ** 2 / (3 * delta)) / (2 * N))
attack_t = - np.maximum((S_pre / N)[a] - (S_pre / N)[target_arm] + beta[a] + beta[target_arm], 0)
else:
attack_t = 0
attacks[t] = attack_t
r = MAB[a].sample()
false_r = r + attack_t
# update quantities
rewards[t] = r
draws[t] = a
S[a] += false_r
N[a] += 1
S_pre[a] += r
N_pre[a] += 1
return rewards, draws, attacks, time_of_attacks