def Random_exploration()

in isoexp/mab/smab_algs.py [0:0]


def Random_exploration(T, MAB, alpha = 2) :
    K = len(MAB)
    rewards = np.zeros((T,))
    draws = 0*rewards
    N = np.zeros((K,))
    S = np.zeros((K,))
    biased_test = np.zeros((K,))
    for k in range(K):
        
        a = k
        r = 1*MAB[a].sample().squeeze()
        
        rewards[k] = r
        draws[k] = a
        S[a] +=r
        N[a] +=1
    for t in range(K, T) :
        for i in range(K) :
            Z = np.random.binomial(1,1/2, size = int(alpha*N[i]))
            biased_test[i] = np.nan_to_num(np.mean(Z))+ S[i]/N[i]
        idxs = np.flatnonzero(np.isclose(biased_test, biased_test.max()))

        a = np.asscalar(np.random.choice(idxs))
        
        r = 1*MAB[a].sample().squeeze()  
        N[a] +=1
        S[a] +=r
        rewards[t] = r
        draws[t] = a
    return rewards, draws