def solve_MLE()

in isoexp/linear/linearbandit.py [0:0]


    def solve_MLE(self, rewards_history, features_history) :
        
        if self.iteration > 1:
            if not self.model is None :
                n_samples = len(self.rewards_history)
                n_features = self.d
                X = np.zeros((n_samples, n_features))
                X = 1*np.array(self.features_history)
                y = (np.array(self.rewards_history).reshape((n_samples,)))
                beta = cp.Variable(n_features)
                lambd = cp.Parameter(nonneg = True)
                lambd.value = self.reg_factor/2
                
                if self.model == 'bernoulli' :
                    
                    log_likelihood = cp.sum(cp.multiply(y, X @ beta) -
                            cp.log_sum_exp(cp.vstack([np.zeros(n_samples), X @ beta]), axis=0)) - lambd * cp.norm(beta, 2)
                    problem = cp.Problem(cp.Maximize(log_likelihood))
                    problem.solve(verbose = False, warm_start = False, max_iters = 200)
                    return beta.value
                else :
                    log_likelihood = cp.sum( cp.multiply(y, X @ beta) -
                            cp.power(X@beta, 2)/2) - lambd * cp.norm(beta, 2)
                    problem = cp.Problem(cp.Maximize(log_likelihood))
                    problem.solve(verbose = False, warm_start = False, max_iters = 200)
                    return beta.value
        else :
            return np.zeros((self.d,))