def load_tf_weights_in_imagegpt()

in src/transformers/models/imagegpt/modeling_imagegpt.py [0:0]


def load_tf_weights_in_imagegpt(model, config, imagegpt_checkpoint_path):
    """
    Load tf checkpoints in a pytorch model
    """
    try:
        import re

        import tensorflow as tf
    except ImportError:
        logger.error(
            "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
            "https://www.tensorflow.org/install/ for installation instructions."
        )
        raise
    tf_path = os.path.abspath(imagegpt_checkpoint_path)
    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []

    for name, shape in init_vars:
        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array.squeeze())

    for name, array in zip(names, arrays):
        name = name[6:]  # skip "model/"
        name = name.split("/")

        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(
            n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
            for n in name
        ) or name[-1] in ["_step"]:
            logger.info("Skipping {}".format("/".join(name)))
            continue

        pointer = model
        if name[-1] not in ["wtet"]:
            pointer = getattr(pointer, "transformer")

        for m_name in name:
            if re.fullmatch(r"[A-Za-z]+\d+", m_name):
                scope_names = re.split(r"(\d+)", m_name)
            else:
                scope_names = [m_name]

            if scope_names[0] == "w" or scope_names[0] == "g":
                pointer = getattr(pointer, "weight")
            elif scope_names[0] == "b":
                pointer = getattr(pointer, "bias")
            elif scope_names[0] == "wpe" or scope_names[0] == "wte":
                pointer = getattr(pointer, scope_names[0])
                pointer = getattr(pointer, "weight")
            elif scope_names[0] in ["q_proj", "k_proj", "v_proj"]:
                pointer = getattr(pointer, "c_attn")
                pointer = getattr(pointer, "weight")
            elif len(name) == 3 and name[1] == "attn" and scope_names[0] == "c_proj":
                pointer = getattr(pointer, scope_names[0])
                pointer = getattr(pointer, "weight")
            elif scope_names[0] == "wtet":
                pointer = getattr(pointer, "lm_head")
                pointer = getattr(pointer, "weight")
            elif scope_names[0] == "sos":
                pointer = getattr(pointer, "wte")
                pointer = getattr(pointer, "weight")
            else:
                pointer = getattr(pointer, scope_names[0])
            if len(scope_names) >= 2:
                num = int(scope_names[1])
                pointer = pointer[num]

        if len(name) > 1 and name[1] == "attn" or name[-1] == "wtet" or name[-1] == "sos" or name[-1] == "wte":
            pass  # array is used to initialize only part of the pointer so sizes won't match
        else:
            try:
                assert pointer.shape == array.shape
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise

        logger.info(f"Initialize PyTorch weight {name}")

        if name[-1] == "q_proj":
            pointer.data[:, : config.n_embd] = torch.from_numpy(array.reshape(config.n_embd, config.n_embd)).T
        elif name[-1] == "k_proj":
            pointer.data[:, config.n_embd : 2 * config.n_embd] = torch.from_numpy(
                array.reshape(config.n_embd, config.n_embd)
            ).T
        elif name[-1] == "v_proj":
            pointer.data[:, 2 * config.n_embd :] = torch.from_numpy(array.reshape(config.n_embd, config.n_embd)).T
        elif len(name) == 3 and name[1] == "attn" and name[2] == "c_proj":
            pointer.data = torch.from_numpy(array.reshape(config.n_embd, config.n_embd))
        elif name[-1] == "wtet":
            pointer.data = torch.from_numpy(array)
        elif name[-1] == "wte":
            pointer.data[: config.vocab_size - 1, :] = torch.from_numpy(array)
        elif name[-1] == "sos":
            pointer.data[-1] = torch.from_numpy(array)
        else:
            pointer.data = torch.from_numpy(array)

    return model