in isoexp/mab/smab_algs.py [0:0]
def FTRL(T, MAB, eta=10, alg='exp_3'):
K = len(MAB)
S = np.zeros((K,))
losses = np.zeros((K,))
rewards = np.zeros((T,))
draws = 0*rewards
arms = np.linspace(0, K-1, K, dtype='int')
for t in trange(T):
x = cp.Variable(K, pos=True)
temp_1 = cp.Constant(value=np.ones((K,)))
temp_2 = cp.Constant(value=losses)
constraints = [cp.sum(cp.multiply(temp_1, x)) == 1]
if alg == 'log_barrier':
obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 1/eta*cp.sum(cp.log(x)))
elif alg == 'inf':
obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) - 2/eta*cp.sum(cp.sqrt(x)))
else:
obj = cp.Minimize(cp.sum(cp.multiply(temp_2, x)) + 1/eta*(cp.sum(cp.kl_div(x, temp_1)) - K))
pb = cp.Problem(obj, constraints)
try:
pb.solve()
P = x.value
except:
P = np.ones((K,))/K
# print('Probability distribution:', P)
if not np.sum(P) == 1:
P = P/np.sum(P)
action = np.random.choice(arms, p=P)
X = 1*MAB[action].sample().squeeze()
S[action] = S[action] + X/P[action]
losses[action] = losses[action] + (-X)/P[action]
rewards[t] = X
draws[t] = action
return rewards, draws