def encode()

in bayesmark/space.py [0:0]


def encode(X, labels, assume_sorted=False, dtype=bool, assume_valid=False):
    """Perform one hot encoding of categorical data in :class:`numpy:numpy.ndarray` variable `X` of any dimension.

    Parameters
    ----------
    X : :class:`numpy:numpy.ndarray` of shape (...)
        Categorical values of any standard type. Vectorized to work for any dimensional `X`.
    labels : :class:`numpy:numpy.ndarray` of shape (n,)
        Complete list of all possible labels. List is flattened if it is not already 1 dimensional.
    assume_sorted : bool
        If true, assume labels is already sorted and unique. This saves the computational cost of calling
        :func:`numpy:numpy.unique`.
    dtype : type
        Desired data of feature array. One-hot is most logically `bool`, but feature matrices are usually `float`.
    assume_valid : bool
        If true, assume all element of `X` are in the list `labels`. This saves the computational cost of verifying
        `X` are in `labels`. If true and a non-label `X` occurs this routine will silently give bogus result.

    Returns
    -------
    Y : :class:`numpy:numpy.ndarray` of shape (..., n)
        One-hot encoding of `X`. Extra dimension is appended at end for the one-hot vector. It has data type `dtype`.
    """
    X = np.asarray(X)
    labels = np.asarray(labels) if assume_sorted else np.unique(labels)
    check_array(labels, "labels", pre=True, ndim=1, min_size=1)

    idx = np.searchsorted(labels, X)
    # If x is not even in labels then this will fail. This is not ValueError
    # because the user explictly asked for this using argument assume_valid.
    assert assume_valid or np.all(np.asarray(labels[idx]) == X)

    # This is using some pro np indexing technique to vectorize across all
    # possible input dimensions for X in the same code.
    Y = np.zeros(X.shape + (len(labels),), dtype=dtype)
    Y[unravel_index(X.shape) + (idx.ravel(),)] = True
    return Y