in optimum/neuron/peft/tuners/lora/layer.py [0:0]
def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
# TODO: no dtype conversion here, unlike in Linear, is that correct?
self._check_forward_args(x, *args, **kwargs)
adapter_names = kwargs.pop("adapter_names", None)
if self.disable_adapters:
if self.merged:
self.unmerge()
result = self.base_layer(x, *args, **kwargs)
elif adapter_names is not None:
result = self._mixed_batch_forward(x, *args, adapter_names=adapter_names, **kwargs)
elif self.merged:
result = self.base_layer(x, *args, **kwargs)
else:
result = self.base_layer(x, *args, **kwargs)
# If sequence parallelism is enabled, we need to scatter the input to the sequence parallel region.
sequence_parallel_enabled = self.get_base_layer().sequence_parallel_enabled
sequence_dimension = self.get_base_layer().sequence_dim
if sequence_dimension is None:
sequence_dimension = 0
if sequence_parallel_enabled:
if sequence_dimension == 0:
x = x.transpose(0, 1).contiguous()
x = scatter_to_sequence_parallel_region(x, sequence_dimension=sequence_dimension)
torch_result_dtype = result.dtype
for active_adapter in self.active_adapters:
if active_adapter not in self.lora_embedding_A:
continue
embedding_A = self.lora_embedding_A[active_adapter].T
embedding_B = self.lora_embedding_B[active_adapter].T
scaling = self.scaling[active_adapter]
if not self.use_dora[active_adapter]:
after_A = self._embed(x, embedding_A)
result = result + (after_A @ embedding_B) * scaling
else:
mag_norm_scale, dora_result = self.lora_magnitude_vector[active_adapter](
x,
lora_A=embedding_A,
lora_B=embedding_B,
scaling=scaling,
base_layer=self.get_base_layer(),
embed_fn=self._embed,
)
result = mag_norm_scale * result + dora_result
result = result.to(torch_result_dtype)
return result