def _predict_point_and

def _predict_point_and_var()

in econml/grf/_base_grf.py [0:0]
67 lines of code
17 McCabe index (conditional complexity)

    def _predict_point_and_var(self, X, full=False, point=True, var=False, project=False, projector=None):
        """ An internal private method that coordinates all prediction functionality and tries to share
        as much computation between different predict methods to avoid re-computation and re-spawining of
        parallel executions.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input samples. Internally, it will be converted to
            ``dtype=np.float64``.
        full : bool, default=False
            Whether to return the full estimated parameter or only the relevant part
        point : bool, default=True
            Whether to return the point estimate theta(x)
        var : bool, default=False
            Whether to return the co-variance of the point estimate V(theta(x))
        project : bool, default=False
            Whether to project the point estimate using an inner product with a projector, and also
            return the variance of the projection
        projector : array-like of shape (n_samples, n_outputs)
            The projection vector for each sample. The point estimate theta(x) for each sample will
            be projected and return the inner produce <theta(x), projector(x)> for each sample x.
            Also the variance information will be about the inner product as opposed to the parameter
            theta(x).

        Returns
        -------
        point : array-like of shape (n_samples, x)
            The point estimate of the parameter theta(x) or its inner product with projector(x) for each
            sample x in X.
            If `point=False`, this return value is omitted. If `project=True`, then `x=1`. If `project=False`
            and `full=True`, then `x=n_outputs`. If `project=False` and `full=False`, then `x=n_relevant_outputs`.
        var : array-like of shape (n_samples, x, x) or (n_samples, 1)
            The covariance of the parameter theta(x) or its inner product with projector(x) for each sample x in X.
            If `var=False`, this return value is omitted. If `project=True`, then return is of shape (n_samples, 1).
            If `project=False` and `full=True`, then `x=n_outputs`. If `project=False` and `full=False`,
            then `x=n_relevant_outputs`.
        """

        alpha, jac = self.predict_alpha_and_jac(X)
        invjac = np.linalg.pinv(jac)
        parameter = np.einsum('ijk,ik->ij', invjac, alpha)

        if var:
            if not self.inference:
                raise AttributeError("Inference not available. Forest was initiated with `inference=False`.")

            slices = self.slices_
            n_jobs, _, _ = _partition_estimators(len(slices), self.n_jobs)

            moment_bags, moment_var_bags = zip(*Parallel(n_jobs=n_jobs, verbose=self.verbose, backend='threading')(
                delayed(self.predict_moment_and_var)(X, parameter, slice=sl, parallel=False) for sl in slices))

            moment = np.mean(moment_bags, axis=0)

            trans_moment_bags = np.moveaxis(moment_bags, 0, -1)
            sq_between = np.einsum('tij,tjk->tik', trans_moment_bags,
                                   np.transpose(trans_moment_bags, (0, 2, 1))) / len(slices)
            moment_sq = np.einsum('tij,tjk->tik',
                                  moment.reshape(moment.shape + (1,)),
                                  moment.reshape(moment.shape[:-1] + (1, moment.shape[-1])))
            var_between = sq_between - moment_sq
            pred_cov = np.einsum('ijk,ikm->ijm', invjac,
                                 np.einsum('ijk,ikm->ijm', var_between, np.transpose(invjac, (0, 2, 1))))

            if project:
                pred_var = np.einsum('ijk,ikm->ijm', projector.reshape((-1, 1, projector.shape[1])),
                                     np.einsum('ijk,ikm->ijm', pred_cov,
                                               projector.reshape((-1, projector.shape[1], 1))))[:, 0, 0]
            else:
                pred_var = np.diagonal(pred_cov, axis1=1, axis2=2)

            #####################
            # Variance correction
            #####################
            # Subtract the average within bag variance. This ends up being equal to the
            # overall (E_{all trees}[moment^2] - E_bags[ E[mean_bag_moment]^2 ]) / sizeof(bag).
            # The negative part is just sq_between.
            var_total = np.mean(moment_var_bags, axis=0)
            correction = (var_total - sq_between) / (len(slices[0]) - 1)
            pred_cov_correction = np.einsum('ijk,ikm->ijm', invjac,
                                            np.einsum('ijk,ikm->ijm', correction, np.transpose(invjac, (0, 2, 1))))
            if project:
                pred_var_correction = np.einsum('ijk,ikm->ijm', projector.reshape((-1, 1, projector.shape[1])),
                                                np.einsum('ijk,ikm->ijm', pred_cov_correction,
                                                          projector.reshape((-1, projector.shape[1], 1))))[:, 0, 0]
            else:
                pred_var_correction = np.diagonal(pred_cov_correction, axis1=1, axis2=2)
            # Objective bayes debiasing for the diagonals where we know a-prior they are positive
            # The off diagonals we have no objective prior, so no correction is applied.
            naive_estimate = pred_var - pred_var_correction
            se = np.maximum(pred_var, pred_var_correction) * np.sqrt(2.0 / len(slices))
            zstat = naive_estimate / np.clip(se, 1e-10, np.inf)
            numerator = np.exp(- (zstat**2) / 2) / np.sqrt(2.0 * np.pi)
            denominator = 0.5 * erfc(-zstat / np.sqrt(2.0))
            pred_var_corrected = naive_estimate + se * numerator / denominator

            # Finally correcting the pred_cov or pred_var
            if project:
                pred_var = pred_var_corrected
            else:
                pred_cov = pred_cov - pred_cov_correction
                for t in range(self.n_outputs_):
                    pred_cov[:, t, t] = pred_var_corrected[:, t]

        if project:
            if point:
                pred = np.sum(parameter * projector, axis=1)
                if var:
                    return pred, pred_var
                else:
                    return pred
            else:
                return pred_var
        else:
            n_outputs = self.n_outputs_ if full else self.n_relevant_outputs_
            if point and var:
                return (parameter[:, :n_outputs],
                        pred_cov[:, :n_outputs, :n_outputs],)
            elif point:
                return parameter[:, :n_outputs]
            else:
                return pred_cov[:, :n_outputs, :n_outputs]