def pca()

in dataloading.py [0:0]


def pca(data, num_dims=None, mapping=None):
    """
    Applies PCA on the specified `data` to reduce its dimensionality to
    `num_dims` dimensions, and returns the reduced data and `mapping`.

    If a `mapping` is specified as input, `num_dims` is ignored and that mapping
    is applied on the input `data`.
    """

    # work on both data tensor and data dict:
    data_dict = False
    if isinstance(data, dict):
        assert "features" in data, "data dict does not have features field"
        data_dict = True
        original_data = data
        data = original_data["features"]
    assert data.dim() == 2, "data tensor must be two-dimensional matrix"

    # compute PCA mapping:
    if mapping is None:
        assert num_dims is not None, "must specify num_dims or mapping"
        mean = torch.mean(data, 0, keepdim=True)
        zero_mean_data = data.sub(mean)
        covariance = torch.matmul(zero_mean_data.t(), zero_mean_data)
        _, projection = torch.symeig(covariance, eigenvectors=True)
        projection = projection[:, -min(num_dims, projection.size(1)):]
        mapping = {"mean": mean, "projection": projection}
    else:
        assert isinstance(mapping, dict), "mapping must be a dict"
        assert "mean" in mapping and "projection" in mapping, "mapping missing keys"
        if num_dims is not None:
            logging.warning("Value of num_dims is ignored when mapping is specified.")

    # apply PCA mapping:
    reduced_data = data.sub(mapping["mean"]).matmul(mapping["projection"])

    # return results:
    if data_dict:
        original_data["features"] = reduced_data
        reduced_data = original_data
    return reduced_data, mapping