in isoexp/mab/smab_algs.py [0:0]
def attacked_FTRL(T, MAB, target_arm, eta=10, alg='exp_3', delta=0.99, constant_attack=False):
K = len(MAB)
true_S = np.zeros((K,))
true_losses = np.zeros((K,))
N = np.zeros((K,))
estimated_losses = np.zeros((K,))
rewards = np.zeros((T,))
draws = 0*rewards
arms = np.linspace(0, K-1, K, dtype='int')
attacks = np.zeros((T,))
time_of_attacks = np.zeros((T,))
for t in trange(T):
x = cp.Variable(K, pos=True)
temp_1 = cp.Constant(value=np.ones((K,)))
temp_2 = cp.Constant(value=estimated_losses)
constraints = [cp.sum(cp.multiply(temp_1, x)) == 1]
if alg == 'log_barrier':
obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 1/eta*cp.sum(cp.log(x)))
elif alg == 'inf':
obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 2/eta*cp.sum(cp.sqrt(x)))
else:
obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) + 1/eta*(cp.sum(cp.kl_div(x, temp_1)) - K))
pb = cp.Problem(obj, constraints)
try:
pb.solve()
P = x.value
except:
P = np.ones((K,))/K
# print("\nThe optimal value is", pb.value)
# print("A solution x is")
# print(x.value)
# print("A dual solution corresponding to the inequality constraints is")
# print(pb.constraints[0].dual_value)
# print('Probability distribution:', P)
if not np.sum(P) == 1:
P = P/np.sum(P)
if t < K:
action = t
attack_t = 0
else:
action = np.random.choice(arms, p=P)
if action != target_arm:
time_of_attacks[t] = 1
beta = np.sqrt(np.log(np.pi ** 2 * K * N ** 2 / (3 * delta)) / (2 * N))
if constant_attack:
attack_t = - 2*np.maximum(0, MAB[action].mean - MAB[target_arm].mean)
else:
attack_t = - np.maximum((true_S / N)[action] - (true_S / N)[target_arm] + beta[action]
+ beta[target_arm], 0)
else:
attack_t = 0
attacks[t] = attack_t
true_X = 1*MAB[action].sample().squeeze()
X = true_X + attack_t
true_S[action] = true_S[action] + true_X
true_losses[action] = true_losses[action] + (1-true_X)/P[action]
estimated_losses[action] = estimated_losses[action] + (1 - X)/P[action]
N[action] = N[action] + 1
rewards[t] = true_X
draws[t] = action
return rewards, draws, attacks, time_of_attacks