def attacked_UCB1()

in isoexp/mab/smab_algs.py [0:0]


def attacked_UCB1(T, MAB, target_arm, alpha=1., delta=0.99, constant_attack=False):

    K = len(MAB)
    rewards = np.zeros((T,))
    draws = np.zeros((T,))
    N = np.ones((K,))  # number of observations of each arm
    S = np.zeros((K,))  # sum of rewards for each arm
    N_pre = np.ones((K,))  # number of observations of each arm
    S_pre = np.zeros((K,))
    attacks = np.zeros((T,))
    time_of_attacks = np.zeros((T,))

    for k in range(K):
        a = k
        r = MAB[a].sample()
        rewards[k] = r
        draws[k] = a
        S[a] += r
        N[a] += 1
        S_pre[a] += r
        N_pre[a] += 1
        attacks[k] = 0

    for t in range(K, T):
        # select the arm
        ucb = S / N + alpha * np.sqrt(np.log(t + 1) / N)
        beta = np.sqrt(np.log(np.pi**2*K*N**2/(3*delta))/(2*N))
        idxs = np.flatnonzero(np.isclose(ucb, ucb.max()))
        a = np.asscalar(np.random.choice(idxs))
        if a != target_arm:
            time_of_attacks[t] = 1
            if constant_attack:
                attack_t = - 2 * np.maximum(0, MAB[a].mean - MAB[target_arm].mean)
            else:
                beta = np.sqrt(np.log(np.pi ** 2 * K * N ** 2 / (3 * delta)) / (2 * N))
                attack_t = - np.maximum((S_pre / N)[a] - (S_pre / N)[target_arm] + beta[a] + beta[target_arm], 0)
        else:
            attack_t = 0
        attacks[t] = attack_t
        r = MAB[a].sample()
        false_r = r + attack_t
        # update quantities
        rewards[t] = r
        draws[t] = a
        S[a] += false_r
        N[a] += 1
        S_pre[a] += r
        N_pre[a] += 1

    return rewards, draws, attacks, time_of_attacks