in src/peft/peft_model.py [0:0]
def _setup_prompt_encoder(self, adapter_name: str):
config = self.peft_config[adapter_name]
if not hasattr(self, "prompt_encoder"):
self.prompt_encoder = torch.nn.ModuleDict({})
self.prompt_tokens = {}
transformer_backbone = None
for name, module in self.base_model.named_children():
for param in module.parameters():
param.requires_grad = False
if isinstance(module, PreTrainedModel):
# Make sure to freeze Tranformers model
if transformer_backbone is None:
transformer_backbone = module
self.transformer_backbone_name = name
if transformer_backbone is None:
transformer_backbone = self.base_model
if config.num_transformer_submodules is None:
config.num_transformer_submodules = 2 if config.task_type == TaskType.SEQ_2_SEQ_LM else 1
# determine the word embeddings
word_embeddings = None
try:
# First try to find the word embeddings based on the module name, this should work for models like Bert,
# Roberta, Deberta, etc.
word_embeddings = self.base_model.get_submodule("embeddings.word_embeddings")
except AttributeError:
pass
if word_embeddings is None:
# Word embeddings could not be determined. Next try to guess them by checking which parameter has the size
# of the vocab.
for named_param, value in list(transformer_backbone.named_parameters()):
# for ZeRO-3, the tensor is sharded across accelerators and deepspeed modifies it to a tensor with shape
# [0] the actual unsharded shape is stored in "ds_shape" attribute special handling is needed in case
# the model is initialized in deepspeed.zero.Init() context or HfDeepSpeedConfig has been called before
# For reference refer to issue: https://github.com/huggingface/peft/issues/996
deepspeed_distributed_tensor_shape = getattr(value, "ds_shape", None)
# Handle VLM case with separate text and vision configs
if hasattr(self.base_model.config, "get_text_config"):
vocab_size = self.base_model.config.get_text_config().vocab_size
# below: for older transformers versions before get_text_config was added
elif "text_config" in self.base_model.config:
vocab_size = self.base_model.config.text_config.vocab_size
else:
vocab_size = self.base_model.config.vocab_size
if value.shape[0] == vocab_size or (
deepspeed_distributed_tensor_shape is not None
and deepspeed_distributed_tensor_shape[0] == vocab_size
):
word_embeddings = transformer_backbone.get_submodule(named_param.replace(".weight", ""))
break
self.word_embeddings = word_embeddings
model_cls = PEFT_TYPE_TO_TUNER_MAPPING[config.peft_type]
if config.peft_type in (PeftType.PROMPT_TUNING, PeftType.MULTITASK_PROMPT_TUNING, PeftType.CPT):
prompt_encoder = model_cls(config, self.word_embeddings)
elif config.peft_type == PeftType.P_TUNING:
prompt_encoder = model_cls(config)
elif config.peft_type == PeftType.PREFIX_TUNING:
# prefix tuning now uses Cache but that won't work with gradient checkpointing
if any(getattr(module, "gradient_checkpointing", False) for module in self.get_base_model().modules()):
raise ValueError("Prefix tuning does not work with gradient checkpointing.")
prompt_encoder = model_cls(config)
else:
raise ValueError("Not supported")
prompt_encoder = prompt_encoder.to(self.device)
self.prompt_encoder.update(torch.nn.ModuleDict({adapter_name: prompt_encoder}))
self.prompt_tokens[adapter_name] = torch.arange(
config.num_virtual_tokens * config.num_transformer_submodules
).long()