def get_scaled_bases()

in src/peft/tuners/randlora/layer.py [0:0]


    def get_scaled_bases(self, adapter, device=None) -> tuple[torch.Tensor, torch.Tensor]:
        """
        Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the correct
        order to fit the target layers' dimensions

        Args:
            adapter (str):
                The name of the adapter for which the delta weight should be computed.
        """

        randlora_A = self.randlora_A[adapter]
        randlora_B = self.randlora_B[adapter]
        if device is None:
            device = randlora_B.device
        dtype = randlora_B.dtype

        # In case users wants to merge the adapter weights that are in
        # (b)float16 while being on CPU, we need to cast the weights to float32, perform the merge and then cast back to
        # (b)float16 because some CPUs have slow bf16/fp16 matmuls.
        cast_to_fp32 = device.type == "cpu" and (dtype == torch.float16 or dtype == torch.bfloat16)

        randlora_lambda = self.randlora_lambda[adapter].to(device)
        randlora_gamma = self.randlora_gamma[adapter].to(device)

        if cast_to_fp32:
            randlora_A = randlora_A.float()
            randlora_B = randlora_B.float()
            randlora_lambda = randlora_lambda.float()
            randlora_gamma = randlora_gamma.float()

        # The trainable parameters are always applied to randlora_A, the smallest basis.
        min_dim, max_dim = min(self.out_features, self.in_features), max(self.out_features, self.in_features)

        # As adapted layers may have different shapes and RandLora contains a single shared pair of A and B matrices,
        # we initialize these matrices with the largest required size for each dimension.
        # During the forward pass, required submatrices are sliced out from the shared randlora_A and randlora_B.
        sliced_A = randlora_A[:, : self.num_bases, :min_dim].to(device)
        sliced_B = randlora_B[:max_dim, : self.num_bases, :].to(device)

        # Flattening the matrices over the rank and number of bases dimensions is more memory efficient
        update_B = sliced_B.flatten(start_dim=1)
        update_A = UniqueBaseGrad.apply(sliced_A, randlora_lambda, randlora_gamma).flatten(end_dim=1)

        # Since update_A is applied on the smallest dimension, test whether update_A or update_B should be applied first. This is done to reduce trainable parameters.
        if min_dim == self.in_features:
            return update_A, update_B
        return update_B.T, update_A.T