in isoexp/mab/smab_algs.py [0:0]
def PHE(T, MAB, alpha = 2) :
K = len(MAB)
rewards = np.zeros((T,))
draws = 0*rewards
N = np.zeros((K,))
S = np.zeros((K,))
biased_test = np.zeros((K,))
for k in range(K):
a = k
r = 1*MAB[a].sample().squeeze()
rewards[k] = r
draws[k] = a
S[a] +=r
N[a] +=1
for t in range(K, T) :
for i in range(K) :
Z = np.random.binomial(1,1/2, size = int(alpha*N[i]))
biased_test[i] = (np.sum(Z) + S[i])/((alpha+1)*N[i])
idxs = np.flatnonzero(np.isclose(biased_test, biased_test.max()))
a = np.asscalar(np.random.choice(idxs))
r = 1*MAB[a].sample().squeeze()
N[a] +=1
S[a] +=r
rewards[t] = r
draws[t] = a
return rewards, draws