in src/lighteval/models/transformers/transformers_model.py [0:0]
def _create_auto_model(self) -> transformers.PreTrainedModel:
"""
Creates an instance of the pretrained HF model.
Returns:
transformers.PreTrainedModel: The created auto model instance.
"""
model_parallel, max_memory, device_map = self.init_model_parallel(self.config.model_parallel)
self.config.model_parallel = model_parallel
if self.config.dtype == "4bit":
quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
elif self.config.dtype == "8bit":
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
else:
quantization_config = None
torch_dtype = _get_dtype(self.config.dtype)
subfolder = self.config.subfolder
revision = self.config.revision + (f"/{subfolder}" if subfolder is not None else "")
pretrained_config = self.transformers_config
kwargs = self.config.model_loading_kwargs.copy()
if "quantization_config" not in pretrained_config.to_dict():
kwargs["quantization_config"] = quantization_config
model = AutoModelForCausalLM.from_pretrained(
self.config.model_name,
revision=revision,
max_memory=max_memory,
device_map=device_map,
torch_dtype=torch_dtype,
trust_remote_code=self.config.trust_remote_code,
**kwargs,
)
# model.to(self.device)
model.eval()
torch.set_grad_enabled(False)
if self.config.compile:
try:
logger.info("Compiling the model")
model.compile()
except AttributeError as e:
logger.warning("Could not compile the model because: ", e)
return model