def attacked_EXP3_IX()

in isoexp/mab/smab_algs.py [0:0]


def attacked_EXP3_IX(T, MAB, target_arm, eta = None, gamma = None, delta=0.99):

    K = len(MAB)
    losses = np.zeros((K,))
    rewards = np.zeros((T,))
    draws = 0*rewards
    sum_exp = K
    exp_losses = np.ones((K,))
    arms = np.linspace(0, K-1, K, dtype='int')
    N = np.ones((K,))  # number of observations of each arm
    S = np.zeros((K,))
    beta = np.zeros((K,))
    attacks = np.zeros((T,))
    time_of_attacks = np.zeros((T,))
    if eta is None or gamma is None:
        eta = np.sqrt(2*np.log(K + 1)/(K*T))
        gamma = np.sqrt(2*np.log(K + 1)/(K*T))/2

    for t in range(T):
        P = exp_losses/sum_exp
        if t < K:
            action = t
            attack_t = 0
        else:
            time_of_attacks[t] = 1
            action = np.random.choice(arms, p=P)
            if action != target_arm:
                beta = np.sqrt(np.log(np.pi ** 2 * K * N ** 2 / (3 * delta)) / (2*N))
                attack_t = - np.maximum((S / N)[action] - (S / N)[target_arm] + beta[action] + beta[target_arm], 0)
            else:
                attack_t = 0
        attacks[t] = attack_t
        true_X = 1*MAB[action].sample().squeeze()
        X = true_X + attack_t
        losses[action] = losses[action] + (1 - X)/(gamma + P[action])
        exp_losses[action] = exp_losses[action]*np.exp(-eta*(1 - X)/(gamma + P[action]))
        sum_exp = np.sum(exp_losses)
        rewards[t] = true_X
        draws[t] = action
        N[action] += 1
        S[action] += true_X

    return rewards, draws, attacks, time_of_attacks