def post_init_model()

in optimum/gptq/quantizer.py [0:0]


    def post_init_model(self, model):
        """
        Post-initialization that require device information, for example buffers initialization on device.

        Args:
            model (`nn.Module`):
                The input model
        """
        if self.bits == 4 and not self.disable_exllama:
            if get_device(model).type != "cuda" or (
                hasattr(model, "hf_device_map") and any(d in model.hf_device_map for d in ["cpu", "disk", "hpu"])
            ):
                if not self.disable_exllama:
                    logger.warning(
                        "Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`"
                    )
                    self.disable_exllama = True

        class StoreAttr(object):
            pass

        if is_gptqmodel_available():
            model, _ = hf_convert_gptq_v1_to_v2_format(
                model, self.bits, self.quant_linear, self.checkpoint_format, self.meta
            )

        model.quantize_config = StoreAttr()
        model.quantize_config.desc_act = self.desc_act
        model = gptq_post_init(model, use_act_order=self.desc_act)
        if (
            self.desc_act
            and (not self.disable_exllama and self.exllama_version == ExllamaVersion.ONE)
            and self.max_input_length is not None
        ):
            model = exllama_set_max_input_length(model, self.max_input_length)
        return model