def EXP3_P()

in isoexp/mab/smab_algs.py [0:0]


def EXP3_P(T, MAB, eta=0.1, gamma=0):

    K = len(MAB)
    S = np.zeros((K,))
    rewards = np.zeros((T,))
    draws = 0*rewards
    sum_exp = K
    exp_S = np.ones((K,))
    arms = np.linspace(0, K-1, K, dtype='int')
    for t in range(T):
        P = (1 - gamma) * exp_S / sum_exp + gamma / K * np.ones((K,))
        if t < K:
            action = t
            attack_t = 0
        else:
            # print('Probability distribution:', P)
            action = np.random.choice(arms, p=P)
        X = 1*MAB[action].sample().squeeze()
        S = S + 1
        S[action] = S[action] - (1 - X)/P[action]
        exp_S = exp_S*np.exp(eta)
        exp_S[action] = exp_S[action]*np.exp(-eta *(1 - X)/P[action])
        sum_exp = np.sum(exp_S)
        rewards[t] = X
        draws[t] = action
    return rewards, draws