in isoexp/linear/linearbandit.py [0:0]
def solve_MLE(self, rewards_history, features_history) :
if self.iteration > 1:
if not self.model is None :
n_samples = len(self.rewards_history)
n_features = self.d
X = np.zeros((n_samples, n_features))
X = 1*np.array(self.features_history)
y = (np.array(self.rewards_history).reshape((n_samples,)))
beta = cp.Variable(n_features)
lambd = cp.Parameter(nonneg = True)
lambd.value = self.reg_factor/2
if self.model == 'bernoulli' :
log_likelihood = cp.sum(cp.multiply(y, X @ beta) -
cp.log_sum_exp(cp.vstack([np.zeros(n_samples), X @ beta]), axis=0)) - lambd * cp.norm(beta, 2)
problem = cp.Problem(cp.Maximize(log_likelihood))
problem.solve(verbose = False, warm_start = False, max_iters = 200)
return beta.value
else :
log_likelihood = cp.sum( cp.multiply(y, X @ beta) -
cp.power(X@beta, 2)/2) - lambd * cp.norm(beta, 2)
problem = cp.Problem(cp.Maximize(log_likelihood))
problem.solve(verbose = False, warm_start = False, max_iters = 200)
return beta.value
else :
return np.zeros((self.d,))