def FTRL()

in isoexp/mab/smab_algs.py [0:0]


def FTRL(T, MAB, eta=10, alg='exp_3'):

    K = len(MAB)
    S = np.zeros((K,))
    losses = np.zeros((K,))
    rewards = np.zeros((T,))
    draws = 0*rewards
    arms = np.linspace(0, K-1, K, dtype='int')

    for t in trange(T):
        x = cp.Variable(K, pos=True)
        temp_1 = cp.Constant(value=np.ones((K,)))
        temp_2 = cp.Constant(value=losses)
        constraints = [cp.sum(cp.multiply(temp_1, x)) == 1]
        if alg == 'log_barrier':
            obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 1/eta*cp.sum(cp.log(x)))
        elif alg == 'inf':
            obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 2/eta*cp.sum(cp.sqrt(x)))
        else:
            obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) + 1/eta*(cp.sum(cp.kl_div(x, temp_1)) - K))
        pb = cp.Problem(obj, constraints)
        try:
            pb.solve()
            P = x.value
        except:
            P = np.ones((K,))/K
        # print('Probability distribution:', P)
        if not np.sum(P) == 1:
            P = P/np.sum(P)
        action = np.random.choice(arms, p=P)
        X = 1*MAB[action].sample().squeeze()
        S[action] = S[action] + X/P[action]
        losses[action] = losses[action] + (-X)/P[action]
        rewards[t] = X
        draws[t] = action
    return rewards, draws