def from_pretrained()

in vision/m4/models/custom_modules.py [0:0]
41 lines of code
20 McCabe index (conditional complexity)

    def from_pretrained(cls, *model_args, is_resume=False, new_model=False, **kwargs):
        """
        Use this method when loading an already pretrained vloom model - either from a checkpoint or from hub.
        For creating an untrained model use `pretrained_models` instead.
        """
        # config is:
        # 1. either not passed and then we use the model's default config (used by tests)
        # 2. passed and in which case it's one of:
        #   2a. `PretrainedConfig` (a new m4 model)
        #   2b. path to a json config (an already pretrained m4 model, usually resumed training)
        config = kwargs.get("config", None)
        if config is None:
            config = cls.config_class.from_pretrained(*model_args, **kwargs, return_unused_kwargs=False)
        elif not isinstance(config, PretrainedConfig):
            # adapted from https://github.com/huggingface/transformers/blob/d0acc9537829e7d067edbb791473bbceb2ecf056/src/transformers/modeling_utils.py#L1920
            assert isinstance(config, os.PathLike)
            config_path = str(config)
            config = cls.config_class.from_pretrained(
                config_path,
                return_unused_kwargs=False,
                **kwargs,
            )

        is_untrained_vloom_model, is_pretrained_vloom_model_resumed, is_pretrained_vloom_model_from_hub_or_path = (
            False,
            False,
            False,
        )
        # 3 Cases:
        # 1 - Model has never been trained. This means we need a vision_model_name to start the training with - as we never create one from scratch -
        # 2 - Model has been trained and is resuming. We load a random empty model in this case
        # 3 - Model has been trained and saved somewhere with a path or is on the hub and has a vision_model_name we initialize the vision model from the vision_model_name class.
        if new_model:
            is_untrained_vloom_model = True
        elif is_resume:
            is_pretrained_vloom_model_resumed = True
        else:
            is_pretrained_vloom_model_from_hub_or_path = True

        # torch_dtype is crucial for using the minimal amount of memory at load time
        torch_dtype = kwargs.get("torch_dtype", None)
        vision_model_name = config.vision_config.vision_model_name

        # Create an uninitialized vision_model to insert into the main model.
        vision_model_config = AutoConfig.from_pretrained(vision_model_name, trust_remote_code=True)
        # Override image_size if we want to increase it compared to pretraining
        if hasattr(vision_model_config, "vision_config"):
            vision_model_config.vision_config.image_size = config.vision_config.image_size
        else:
            vision_model_config.image_size = config.vision_config.image_size
        # model_with_vision_component = AutoModel.from_config(
        #     vision_model_config, torch_dtype=torch_dtype, trust_remote_code=True
        # )

        # Extracts the desired submodule if the part we want is nested (e.g. as in clip)
        # kwargs["vision_model"] = vision_model_name_to_model(vision_model_name, model_with_vision_component)

        # 1. We load a trained checkpoint but we are not resuming a training:
        # If the model is from_hub or from_path, the language model is loaded as well, and
        # the uninitialized vision_model is overriden by the checkpoint's weights (i.e. idefics' weights)```
        if is_pretrained_vloom_model_from_hub_or_path:
            model = super().from_pretrained(*model_args, **kwargs)

        # 2. We resume under deepspeed:
        # We create an empty model, and get deepspeed to load the weights from the checkpoint.
        # Not all models have these keys so handle the case they don't have them
        elif is_pretrained_vloom_model_resumed:
            _ = kwargs.pop("config", None)
            model = super().from_pretrained(None, config=config, state_dict={}, **kwargs)

        # 3. If is_untrained_vloom_model, we load the language model first, then we override
        # the uninitialized vision_model with one with pretrained weights from the model vision_model_name
        elif is_untrained_vloom_model:
            model = super().from_pretrained(*model_args, **kwargs)
            cls.override_vision_model_wrapper(
                model, config, vision_model_name, vision_model_config.to_dict(), torch_dtype
            )

        return model