in syne_tune/optimizer/schedulers/searchers/bayesopt/models/meanstd_acqfunc.py [0:0]
def compute_acq_with_gradient(
self, input: np.ndarray,
model: Optional[SurrogateOutputModel] = None) -> (float, np.ndarray):
if model is None:
model = self.model
if isinstance(model, SurrogateModel):
model = dictionarize_objective(model)
output_to_predictions = self._map_outputs_to_predictions(
model, input.reshape(1, -1))
current_bests = self._get_current_bests(model)
# Reshaping of predictions to accomodate _compute_head_and_gradient. We
# also store the original shapes, which are needed below
shapes = dict()
for output_name, preds_for_samples in output_to_predictions.items():
shapes[output_name] = {
k: v.shape for k, v in preds_for_samples[0].items()}
for prediction in preds_for_samples:
for k in prediction.keys():
prediction[k] = prediction[k].reshape((-1,))
# MCMC average is product over lists coming from each model. We need to
# accumulate head gradients w.r.t. each model, each of which being a
# list over MCMC samples from that model (size 1 if no MCMC)
fvals_list = []
# For accumulation of head gradients, we also need to have the
# position in each list
list_values = [
list(enumerate(output_to_predictions[name]))
for name in self.model_output_names]
head_gradient = {
name: [None] * len(predictions)
for name, predictions in output_to_predictions.items()}
for preds_and_pos in itertools.product(*list_values):
positions, predictions = zip(*preds_and_pos)
output_to_preds = dict(zip(self.model_output_names, predictions))
current_best = current_bests(positions)
head_result = self._compute_head_and_gradient(
output_to_preds, current_best)
fvals_list.append(head_result.hval)
for output_name, pos in zip(self.model_output_names, positions):
head_gradient[output_name][pos] = self._add_head_gradients(
head_result.gradient[output_name],
head_gradient[output_name][pos])
# Sum up the gradients coming from each output model
fval = np.mean(fvals_list)
num_total = len(fvals_list)
gradient = 0.0
for output_name, output_model in model.items():
# Reshape head gradients so they have the same shape as corresponding
# predictions. This is required for `backward_gradient` to work.
shp = shapes[output_name]
head_grad = [
{k: v.reshape(shp[k]) for k, v in orig_grad.items()}
for orig_grad in head_gradient[output_name]]
# Gradients are computed by the model
gradient_list = output_model.backward_gradient(input, head_grad)
# Average over MCMC samples
output_gradient = np.sum(gradient_list, axis=0) / num_total
gradient += output_gradient
return fval, gradient