def get_scaled_bases()

in src/peft/tuners/randlora/bnb.py [0:0]


        def get_scaled_bases(self, adapter, device=None) -> list[torch.Tensor, torch.Tensor]:
            """
            Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the
            correct order to fit the target layers' dimensions

            Args:
                adapter (str):
                    The name of the adapter for which the delta weight should be computed.
            """

            randlora_A = self.randlora_A[adapter]
            randlora_B = self.randlora_B[adapter]
            if device is None:
                device = randlora_B.device
            dtype = randlora_B.dtype

            # In case users wants to merge the adapter weights that are in
            # (b)float16 while being on CPU, we need to cast the weights to float32, perform the merge and then cast back to
            # (b)float16 because some CPUs have slow bf16/fp16 matmuls.
            cast_to_fp32 = device.type == "cpu" and (dtype == torch.float16 or dtype == torch.bfloat16)

            randlora_lambda = self.randlora_lambda[adapter].to(device)
            randlora_gamma = self.randlora_gamma[adapter].to(device)

            if cast_to_fp32:
                randlora_A = randlora_A.float()
                randlora_B = randlora_B.float()
                randlora_lambda = randlora_lambda.float()
                randlora_gamma = randlora_gamma.float()

            # The trainable parameters are always applied to randlora_A, the smallest basis.
            min_dim, max_dim = min(self.out_features, self.in_features), max(self.out_features, self.in_features)

            # As adapted layers may have different shapes and RandLora contains a single shared pair of A and B matrices,
            # we initialize these matrices with the largest required size for each dimension.
            # During the forward pass, required submatrices are sliced out from the shared randlora_A and randlora_B.
            sliced_A = randlora_A[:, : self.num_bases, :min_dim].to(device)
            sliced_B = randlora_B[:max_dim, : self.num_bases, :].to(device)
            # Flattening the matrices over the rank and number of bases dimensions is more memory efficient
            update_B = sliced_B.flatten(start_dim=1)
            update_A = UniqueBaseGrad.apply(sliced_A, randlora_lambda, randlora_gamma).flatten(end_dim=1)
            if min_dim == self.in_features:
                return update_A, update_B

            return update_B.T, update_A.T