xformers/components/attention/feature_maps/softmax.py [180:211]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(
        self,
        dim_features: int,
        iter_before_redraw: Optional[int],
        normalize_inputs: bool = False,
        epsilon: float = 1e-6,
        softmax_temp: float = -1,
    ):
        super().__init__(
            dim_features, iter_before_redraw, normalize_inputs, epsilon, softmax_temp
        )

        assert (
            dim_features % 2 == 0
        ), "The feature dimension needs to be even with this kernel"
        self.dim_feature_map = self.dim_features // 2

    @torch.no_grad()
    def _get_feature_map(self, dim_input: int, dim_features: int, device: torch.device):
        """
        Generate the projection matrix onto the random features

        .. note: The heads dimension needs to be taken into account, hence the per-block random matrix
        and not uniformally random.
        """

        # Get per block random unitary matrices.
        # We need enough of them to project the whole input dimension, regardless of the
        # requested dimension of the features
        features = self._get_random_ortho_matrix(
            math.ceil(dim_input / dim_features),
            dim_features,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


xformers/components/attention/feature_maps/softmax.py [240:271]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(
        self,
        dim_features: int,
        iter_before_redraw: Optional[int],
        normalize_inputs: bool = False,
        epsilon: float = 1e-6,
        softmax_temp: float = -1,
    ):
        super().__init__(
            dim_features, iter_before_redraw, normalize_inputs, epsilon, softmax_temp
        )

        assert (
            dim_features % 2 == 0
        ), "The feature dimension needs to be even with this kernel"
        self.dim_feature_map = self.dim_features // 2

    @torch.no_grad()
    def _get_feature_map(self, dim_input: int, dim_features: int, device: torch.device):
        """
        Generate the projection matrix onto the random features

        .. note: The heads dimension needs to be taken into account, hence the per-block random matrix
        and not uniformally random.
        """

        # Get per block random unitary matrices.
        # We need enough of them to project the whole input dimension, regardless of the
        # requested dimension of the features
        features = self._get_random_ortho_matrix(
            math.ceil(dim_input / dim_features),
            dim_features,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -