def compute_acq_with_gradient()

in syne_tune/optimizer/schedulers/searchers/bayesopt/models/meanstd_acqfunc.py [0:0]


    def compute_acq_with_gradient(
            self, input: np.ndarray,
            model: Optional[SurrogateOutputModel] = None) -> (float, np.ndarray):
        if model is None:
            model = self.model
        if isinstance(model, SurrogateModel):
            model = dictionarize_objective(model)
        output_to_predictions = self._map_outputs_to_predictions(
            model, input.reshape(1, -1))
        current_bests = self._get_current_bests(model)

        # Reshaping of predictions to accomodate _compute_head_and_gradient. We
        # also store the original shapes, which are needed below
        shapes = dict()
        for output_name, preds_for_samples in output_to_predictions.items():
            shapes[output_name] = {
                k: v.shape for k, v in preds_for_samples[0].items()}
            for prediction in preds_for_samples:
                for k in prediction.keys():
                    prediction[k] = prediction[k].reshape((-1,))

        # MCMC average is product over lists coming from each model. We need to
        # accumulate head gradients w.r.t. each model, each of which being a
        # list over MCMC samples from that model (size 1 if no MCMC)
        fvals_list = []
        # For accumulation of head gradients, we also need to have the
        # position in each list
        list_values = [
            list(enumerate(output_to_predictions[name]))
            for name in self.model_output_names]
        head_gradient = {
            name: [None] * len(predictions)
            for name, predictions in output_to_predictions.items()}
        for preds_and_pos in itertools.product(*list_values):
            positions, predictions = zip(*preds_and_pos)
            output_to_preds = dict(zip(self.model_output_names, predictions))
            current_best = current_bests(positions)
            head_result = self._compute_head_and_gradient(
                output_to_preds, current_best)
            fvals_list.append(head_result.hval)
            for output_name, pos in zip(self.model_output_names, positions):
                head_gradient[output_name][pos] = self._add_head_gradients(
                    head_result.gradient[output_name],
                    head_gradient[output_name][pos])

        # Sum up the gradients coming from each output model
        fval = np.mean(fvals_list)
        num_total = len(fvals_list)
        gradient = 0.0
        for output_name, output_model in model.items():
            # Reshape head gradients so they have the same shape as corresponding
            # predictions. This is required for `backward_gradient` to work.
            shp = shapes[output_name]
            head_grad = [
                {k: v.reshape(shp[k]) for k, v in orig_grad.items()}
                for orig_grad in head_gradient[output_name]]
            # Gradients are computed by the model
            gradient_list = output_model.backward_gradient(input, head_grad)
            # Average over MCMC samples
            output_gradient = np.sum(gradient_list, axis=0) / num_total
            gradient += output_gradient
        return fval, gradient