in optimum_benchmark/backends/pytorch/backend.py [0:0]
def load_transformers_model(self):
if self.config.deepspeed_inference and self.is_quantized:
raise ValueError("Deepspeed-Inference is not compatible with Transformers quantization")
# Quantization
if self.is_quantized:
self.logger.info("\t+ Processing AutoQuantization config")
self.quantization_config = AutoQuantizationConfig.from_dict(
dict(
getattr(self.pretrained_config, "quantization_config", {}),
**self.config.quantization_config,
)
)
# Model loading
if self.config.no_weights:
self.logger.info("\t+ Creating no weights model")
if self.config.tp_plan is not None:
self.create_no_weights_model_slow()
else:
self.create_no_weights_model_fast()
self.logger.info("\t+ Loading model with random weights")
self.load_transformers_model_with_no_weights()
else:
self.logger.info("\t+ Loading model with pretrained weights")
self.load_transformers_model_from_pretrained()
# KV-Cache
if self.config.cache_implementation is not None:
self.logger.info(f"\t+ Setting cache implementation to {self.config.cache_implementation}")
self.pretrained_model.generation_config.cache_implementation = self.config.cache_implementation
# BetterTransformer
if self.config.to_bettertransformer:
self.logger.info("\t+ To BetterTransformer")
self.pretrained_model.to_bettertransformer()
# Eval mode
if self.config.eval_mode:
self.logger.info("\t+ Enabling eval mode")
self.pretrained_model.eval()
# PEFT
if self.config.peft_type is not None:
self.logger.info("\t+ Applying PEFT")
self.pretrained_model = apply_peft(self.pretrained_model, self.config.peft_type, self.config.peft_config)
# DeepSpeed
if self.config.deepspeed_inference:
self.logger.info("\t+ Initializing DeepSpeed Inference Engine")
self.pretrained_model = deepspeed.init_inference(
model=self.pretrained_model, config=self.config.deepspeed_inference_config
)
# Torch compile
if self.config.torch_compile:
if self.config.torch_compile_target == "model":
self.logger.info("\t+ Using torch.compile on model")
self.pretrained_model = torch.compile(self.pretrained_model, **self.config.torch_compile_config)
# elif self.config.torch_compile_target == "regions":
# self.logger.info("\t+ Using torch.compile on regions")
# self.pretrained_model = compile_regions(self.pretrained_model, **self.config.torch_compile_config)
elif self.config.torch_compile_target == "forward":
self.logger.info("\t+ Using torch.compile on forward")
self.pretrained_model.forward = torch.compile(
self.pretrained_model.forward, **self.config.torch_compile_config
)
else:
raise ValueError(f"Target {self.config.torch_compile_target} not supported")