def attacked_FTRL()

in isoexp/mab/smab_algs.py [0:0]


def attacked_FTRL(T, MAB, target_arm, eta=10, alg='exp_3', delta=0.99, constant_attack=False):

    K = len(MAB)
    true_S = np.zeros((K,))
    true_losses = np.zeros((K,))
    N = np.zeros((K,))
    estimated_losses = np.zeros((K,))
    rewards = np.zeros((T,))
    draws = 0*rewards
    arms = np.linspace(0, K-1, K, dtype='int')
    attacks = np.zeros((T,))
    time_of_attacks = np.zeros((T,))
    for t in trange(T):
        x = cp.Variable(K, pos=True)
        temp_1 = cp.Constant(value=np.ones((K,)))
        temp_2 = cp.Constant(value=estimated_losses)
        constraints = [cp.sum(cp.multiply(temp_1, x)) == 1]
        if alg == 'log_barrier':
            obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 1/eta*cp.sum(cp.log(x)))
        elif alg == 'inf':
            obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 2/eta*cp.sum(cp.sqrt(x)))
        else:
            obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) + 1/eta*(cp.sum(cp.kl_div(x, temp_1)) - K))
        pb = cp.Problem(obj, constraints)
        try:
            pb.solve()
            P = x.value
        except:
            P = np.ones((K,))/K
        # print("\nThe optimal value is", pb.value)
        # print("A solution x is")
        # print(x.value)
        # print("A dual solution corresponding to the inequality constraints is")
        # print(pb.constraints[0].dual_value)
        # print('Probability distribution:', P)
        if not np.sum(P) == 1:
            P = P/np.sum(P)
        if t < K:
            action = t
            attack_t = 0
        else:
            action = np.random.choice(arms, p=P)
            if action != target_arm:
                time_of_attacks[t] = 1
                beta = np.sqrt(np.log(np.pi ** 2 * K * N ** 2 / (3 * delta)) / (2 * N))
                if constant_attack:
                    attack_t = - 2*np.maximum(0, MAB[action].mean - MAB[target_arm].mean)
                else:
                    attack_t = - np.maximum((true_S / N)[action] - (true_S / N)[target_arm] + beta[action]
                                            + beta[target_arm], 0)
            else:
                attack_t = 0
        attacks[t] = attack_t
        true_X = 1*MAB[action].sample().squeeze()
        X = true_X + attack_t
        true_S[action] = true_S[action] + true_X
        true_losses[action] = true_losses[action] + (1-true_X)/P[action]
        estimated_losses[action] = estimated_losses[action] + (1 - X)/P[action]
        N[action] = N[action] + 1
        rewards[t] = true_X
        draws[t] = action
    return rewards, draws, attacks, time_of_attacks