def load_transformers_model()

in optimum_benchmark/backends/pytorch/backend.py [0:0]


    def load_transformers_model(self):
        if self.config.deepspeed_inference and self.is_quantized:
            raise ValueError("Deepspeed-Inference is not compatible with Transformers quantization")

        # Quantization
        if self.is_quantized:
            self.logger.info("\t+ Processing AutoQuantization config")
            self.quantization_config = AutoQuantizationConfig.from_dict(
                dict(
                    getattr(self.pretrained_config, "quantization_config", {}),
                    **self.config.quantization_config,
                )
            )

        # Model loading
        if self.config.no_weights:
            self.logger.info("\t+ Creating no weights model")
            if self.config.tp_plan is not None:
                self.create_no_weights_model_slow()
            else:
                self.create_no_weights_model_fast()
            self.logger.info("\t+ Loading model with random weights")
            self.load_transformers_model_with_no_weights()
        else:
            self.logger.info("\t+ Loading model with pretrained weights")
            self.load_transformers_model_from_pretrained()

        # KV-Cache
        if self.config.cache_implementation is not None:
            self.logger.info(f"\t+ Setting cache implementation to {self.config.cache_implementation}")
            self.pretrained_model.generation_config.cache_implementation = self.config.cache_implementation

        # BetterTransformer
        if self.config.to_bettertransformer:
            self.logger.info("\t+ To BetterTransformer")
            self.pretrained_model.to_bettertransformer()

        # Eval mode
        if self.config.eval_mode:
            self.logger.info("\t+ Enabling eval mode")
            self.pretrained_model.eval()

        # PEFT
        if self.config.peft_type is not None:
            self.logger.info("\t+ Applying PEFT")
            self.pretrained_model = apply_peft(self.pretrained_model, self.config.peft_type, self.config.peft_config)

        # DeepSpeed
        if self.config.deepspeed_inference:
            self.logger.info("\t+ Initializing DeepSpeed Inference Engine")
            self.pretrained_model = deepspeed.init_inference(
                model=self.pretrained_model, config=self.config.deepspeed_inference_config
            )

        # Torch compile
        if self.config.torch_compile:
            if self.config.torch_compile_target == "model":
                self.logger.info("\t+ Using torch.compile on model")
                self.pretrained_model = torch.compile(self.pretrained_model, **self.config.torch_compile_config)
            # elif self.config.torch_compile_target == "regions":
            #     self.logger.info("\t+ Using torch.compile on regions")
            #     self.pretrained_model = compile_regions(self.pretrained_model, **self.config.torch_compile_config)
            elif self.config.torch_compile_target == "forward":
                self.logger.info("\t+ Using torch.compile on forward")
                self.pretrained_model.forward = torch.compile(
                    self.pretrained_model.forward, **self.config.torch_compile_config
                )
            else:
                raise ValueError(f"Target {self.config.torch_compile_target} not supported")