def robust_standardize()

in bayesmark/stats.py [0:0]


def robust_standardize(X, q_level=0.5):
    """Perform robust standardization of data matrix `X` over axis 0.

    Similar to :func:`sklearn:sklearn.preprocessing.robust_scale` except also does a Gaussian
    adjustment rescaling so that if Gaussian data is passed in the transformed
    data will, in large `n`, be distributed as N(0,1). See sklearn feature
    request #10139 on github.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (n, ...)
        Array containing elements standardize. Require ``n >= 2``.
    q_level : scalar
        Must be in [0, 1]. Inter-quartile range to use for scale estimation.

    Returns
    -------
    X : :class:`numpy:numpy.ndarray` of shape (n, ...)
        Elements of input `X` standardization.
    """
    X = np.asarray(X)
    assert X.ndim in (1, 2)
    assert np.all(np.isfinite(X))
    assert 0.0 < q_level and q_level <= 1.0
    assert X.shape[0] >= 2

    mu = np.median(X, axis=0)

    q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
    v = np.percentile(X, 100 * q1, axis=0) - np.percentile(X, 100 * q0, axis=0)
    v = np.asarray(v)
    v[v == 0.0] = 1.0

    X_ss = (X - mu) / v
    # Rescale to match scale of N(0,1)
    X_ss = X_ss * (sst.norm.ppf(q1) - sst.norm.ppf(q0))
    assert X.shape == X_ss.shape
    return X_ss