src/peft/tuners/randlora/bnb.py [137:225]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        def get_scaled_bases(self, adapter, device=None) -> list[torch.Tensor, torch.Tensor]:
            """
            Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the
            correct order to fit the target layers' dimensions

            Args:
                adapter (str):
                    The name of the adapter for which the delta weight should be computed.
            """

            randlora_A = self.randlora_A[adapter]
            randlora_B = self.randlora_B[adapter]

            if device is None:
                device = randlora_B.device
            dtype = randlora_B.dtype

            # In case users wants to merge the adapter weights that are in
            # (b)float16 while being on CPU, we need to cast the weights to float32, perform the merge and then cast back to
            # (b)float16 because some CPUs have slow bf16/fp16 matmuls.
            cast_to_fp32 = device.type == "cpu" and (dtype == torch.float16 or dtype == torch.bfloat16)

            randlora_lambda = self.randlora_lambda[adapter].to(device)
            randlora_gamma = self.randlora_gamma[adapter].to(device)

            if cast_to_fp32:
                randlora_A = randlora_A.float()
                randlora_B = randlora_B.float()
                randlora_lambda = randlora_lambda.float()
                randlora_gamma = randlora_gamma.float()

            # The trainable parameters are always applied to randlora_A, the smallest basis.
            min_dim, max_dim = min(self.out_features, self.in_features), max(self.out_features, self.in_features)

            # As adapted layers may have different shapes and RandLora contains a single shared pair of A and B matrices,
            # we initialize these matrices with the largest required size for each dimension.
            # During the forward pass, required submatrices are sliced out from the shared randlora_A and randlora_B.
            sliced_A = randlora_A[:, : self.num_bases, :min_dim].to(device)
            sliced_B = randlora_B[:max_dim, : self.num_bases, :].to(device)

            # Flattening the matrices over the rank and number of bases dimensions is more memory efficient
            update_B = sliced_B.flatten(start_dim=1)
            update_A = UniqueBaseGrad.apply(sliced_A, randlora_lambda, randlora_gamma).flatten(end_dim=1)
            if min_dim == self.in_features:
                return update_A, update_B

            return update_B.T, update_A.T

        def get_delta_weight(self, adapter) -> torch.Tensor:
            """
            Compute the delta weight for the given adapter.

            Args:
                adapter (str):
                    The name of the adapter for which the delta weight should be computed.
            """

            update_B, update_A = self.get_scaled_bases(adapter)

            update = update_B @ update_A
            output_tensor = transpose(update, self.fan_in_fan_out)

            scaling = self.scaling[adapter]

            return output_tensor * scaling

        def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
            """
            Perform the forward pass using the RandLora adapter.

            Args:
                x (torch.Tensor): Input tensor.

            Returns:
                torch.Tensor: Output tensor after applying the RandLora adaptation.

            Note:
                This method implements the RandLora-specific forward pass. It applies the shared projections
                (randlora_A and randlora_B) along with the per-layer trainable parameters (lambda and gamma) to compute
                the adapter output.
            """
            if self.disable_adapters:
                if self.merged:
                    self.unmerge()
                result = self.base_layer(x, *args, **kwargs)
            elif self.merged:
                result = self.base_layer(x, *args, **kwargs)
            else:
                result = self.base_layer(x, *args, **kwargs)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/peft/tuners/randlora/bnb.py [354:425]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        def get_scaled_bases(self, adapter, device=None) -> list[torch.Tensor, torch.Tensor]:
            """
            Performs scaling on the smallest random base (randlora_A) and returns randlora_A and randlora_B in the
            correct order to fit the target layers' dimensions

            Args:
                adapter (str):
                    The name of the adapter for which the delta weight should be computed.
            """

            randlora_A = self.randlora_A[adapter]
            randlora_B = self.randlora_B[adapter]
            if device is None:
                device = randlora_B.device
            dtype = randlora_B.dtype

            # In case users wants to merge the adapter weights that are in
            # (b)float16 while being on CPU, we need to cast the weights to float32, perform the merge and then cast back to
            # (b)float16 because some CPUs have slow bf16/fp16 matmuls.
            cast_to_fp32 = device.type == "cpu" and (dtype == torch.float16 or dtype == torch.bfloat16)

            randlora_lambda = self.randlora_lambda[adapter].to(device)
            randlora_gamma = self.randlora_gamma[adapter].to(device)

            if cast_to_fp32:
                randlora_A = randlora_A.float()
                randlora_B = randlora_B.float()
                randlora_lambda = randlora_lambda.float()
                randlora_gamma = randlora_gamma.float()

            # The trainable parameters are always applied to randlora_A, the smallest basis.
            min_dim, max_dim = min(self.out_features, self.in_features), max(self.out_features, self.in_features)

            # As adapted layers may have different shapes and RandLora contains a single shared pair of A and B matrices,
            # we initialize these matrices with the largest required size for each dimension.
            # During the forward pass, required submatrices are sliced out from the shared randlora_A and randlora_B.
            sliced_A = randlora_A[:, : self.num_bases, :min_dim].to(device)
            sliced_B = randlora_B[:max_dim, : self.num_bases, :].to(device)
            # Flattening the matrices over the rank and number of bases dimensions is more memory efficient
            update_B = sliced_B.flatten(start_dim=1)
            update_A = UniqueBaseGrad.apply(sliced_A, randlora_lambda, randlora_gamma).flatten(end_dim=1)
            if min_dim == self.in_features:
                return update_A, update_B

            return update_B.T, update_A.T

        def get_delta_weight(self, adapter) -> torch.Tensor:
            """
            Compute the delta weight for the given adapter.

            Args:
                adapter (str):
                    The name of the adapter for which the delta weight should be computed.
            """
            update_B, update_A = self.get_scaled_bases(adapter)

            update = update_B @ update_A
            output_tensor = transpose(update, self.fan_in_fan_out)

            scaling = self.scaling[adapter]

            return output_tensor * scaling

        def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
            if self.disable_adapters:
                if self.merged:
                    self.unmerge()
                result = self.base_layer(x, *args, **kwargs)
            elif self.merged:
                result = self.base_layer(x, *args, **kwargs)
            else:
                result = self.base_layer(x, *args, **kwargs)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -