in optimum/exporters/neuron/utils.py [0:0]
def _load_lora_weights_to_pipeline(pipeline: "DiffusionPipeline", lora_args: Optional[LoRAAdapterArguments]):
if lora_args is None:
lora_args = LoRAAdapterArguments()
if lora_args.model_ids and lora_args.weight_names:
if len(lora_args.model_ids) == 1:
pipeline.load_lora_weights(lora_args.model_ids[0], weight_name=lora_args.weight_names[0])
# For tracing the lora weights, we need to use PEFT to fuse adapters directly into the model weights. It won't work by passing the lora scale to the Neuron pipeline during the inference.
pipeline.fuse_lora(lora_scale=lora_args.scales[0] if lora_args.scales else 1.0)
elif len(lora_args.model_ids) > 1:
if not len(lora_args.model_ids) == len(lora_args.weight_names) == len(lora_args.adapter_names):
raise ValueError(
f"weight_name and lora_scale are required to fuse more than one lora. You have {len(lora_args.model_ids)} lora models to fuse, but you have {len(lora_args.weight_names)} lora weight names and {len(lora_args.adapter_names)} adapter names."
)
for model_id, weight_name, adapter_name in zip(
lora_args.model_ids, lora_args.weight_names, lora_args.adapter_names
):
pipeline.load_lora_weights(model_id, weight_name=weight_name, adapter_name=adapter_name)
if lora_args.scales:
pipeline.set_adapters(lora_args.adapter_names, adapter_weights=lora_args.scales)
pipeline.fuse_lora()
return pipeline