in bayesmark/stats.py [0:0]
def robust_standardize(X, q_level=0.5):
"""Perform robust standardization of data matrix `X` over axis 0.
Similar to :func:`sklearn:sklearn.preprocessing.robust_scale` except also does a Gaussian
adjustment rescaling so that if Gaussian data is passed in the transformed
data will, in large `n`, be distributed as N(0,1). See sklearn feature
request #10139 on github.
Parameters
----------
X : :class:`numpy:numpy.ndarray` of shape (n, ...)
Array containing elements standardize. Require ``n >= 2``.
q_level : scalar
Must be in [0, 1]. Inter-quartile range to use for scale estimation.
Returns
-------
X : :class:`numpy:numpy.ndarray` of shape (n, ...)
Elements of input `X` standardization.
"""
X = np.asarray(X)
assert X.ndim in (1, 2)
assert np.all(np.isfinite(X))
assert 0.0 < q_level and q_level <= 1.0
assert X.shape[0] >= 2
mu = np.median(X, axis=0)
q0, q1 = 0.5 * (1.0 - q_level), 0.5 * (1.0 + q_level)
v = np.percentile(X, 100 * q1, axis=0) - np.percentile(X, 100 * q0, axis=0)
v = np.asarray(v)
v[v == 0.0] = 1.0
X_ss = (X - mu) / v
# Rescale to match scale of N(0,1)
X_ss = X_ss * (sst.norm.ppf(q1) - sst.norm.ppf(q0))
assert X.shape == X_ss.shape
return X_ss