def load_tf_weights_in_big_bird()

in src/transformers/models/big_bird/modeling_big_bird.py [0:0]


def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False):
    """Load tf checkpoints in a pytorch model."""

    def load_tf_weights_bert(init_vars, tf_path):
        names = []
        tf_weights = {}

        for name, shape in init_vars:
            array = tf.train.load_variable(tf_path, name)
            name = name.replace("bert/encoder/LayerNorm", "bert/embeddings/LayerNorm")
            logger.info(f"Loading TF weight {name} with shape {shape}")
            names.append(name)
            tf_weights[name] = array

        return names, tf_weights

    def load_tf_weights_trivia_qa(init_vars):
        names = []
        tf_weights = {}

        for i, var in enumerate(init_vars):
            name_items = var.name.split("/")

            if "transformer_scaffold" in name_items[0]:
                layer_name_items = name_items[0].split("_")
                if len(layer_name_items) < 3:
                    layer_name_items += [0]

                name_items[0] = f"bert/encoder/layer_{layer_name_items[2]}"

            name = "/".join([_TRIVIA_QA_MAPPING[x] if x in _TRIVIA_QA_MAPPING else x for x in name_items])[
                :-2
            ]  # remove last :0 in variable

            if "self/attention/output" in name:
                name = name.replace("self/attention/output", "output")

            if i >= len(init_vars) - 2:
                name = name.replace("intermediate", "output")

            logger.info(f"Loading TF weight {name} with shape {var.shape}")
            array = var.value().numpy()
            names.append(name)
            tf_weights[name] = array

        return names, tf_weights

    try:
        import re

        import numpy as np
        import tensorflow as tf
    except ImportError:
        logger.error(
            "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
            "https://www.tensorflow.org/install/ for installation instructions."
        )
        raise
    tf_path = os.path.abspath(tf_checkpoint_path)
    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")

    # Load weights from TF model
    init_vars = tf.saved_model.load(tf_path).variables if is_trivia_qa else tf.train.list_variables(tf_path)

    if len(init_vars) <= 0:
        raise ValueError("Loaded trained variables cannot be empty.")

    pt_names = list(model.state_dict().keys())

    if is_trivia_qa:
        names, tf_weights = load_tf_weights_trivia_qa(init_vars)
    else:
        names, tf_weights = load_tf_weights_bert(init_vars, tf_path)

    for txt_name in names:
        array = tf_weights[txt_name]
        name = txt_name.split("/")
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(
            n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
            for n in name
        ):
            logger.info(f"Skipping {'/'.join(name)}")
            continue
        pointer = model
        pt_name = []
        for m_name in name:
            if re.fullmatch(r"[A-Za-z]+_\d+", m_name):
                scope_names = re.split(r"_(\d+)", m_name)
            else:
                scope_names = [m_name]
            if scope_names[0] == "kernel" or scope_names[0] == "gamma":
                pointer = getattr(pointer, "weight")
                pt_name.append("weight")
            elif scope_names[0] == "output_bias" or scope_names[0] == "beta":
                pointer = getattr(pointer, "bias")
                pt_name.append("bias")
            elif scope_names[0] == "output_weights":
                pointer = getattr(pointer, "weight")
                pt_name.append("weight")
            elif scope_names[0] == "squad":
                pointer = getattr(pointer, "classifier")
                pt_name.append("classifier")
            elif scope_names[0] == "transform":
                pointer = getattr(pointer, "transform")
                pt_name.append("transform")
                if ("bias" in name) or ("kernel" in name):
                    pointer = getattr(pointer, "dense")
                    pt_name.append("dense")
                elif ("beta" in name) or ("gamma" in name):
                    pointer = getattr(pointer, "LayerNorm")
                    pt_name.append("LayerNorm")
            else:
                try:
                    pointer = getattr(pointer, scope_names[0])
                    pt_name.append(f"{scope_names[0]}")
                except AttributeError:
                    logger.info(f"Skipping {m_name}")
                    continue
            if len(scope_names) >= 2:
                num = int(scope_names[1])
                pointer = pointer[num]
                pt_name.append(f"{num}")
        if m_name[-11:] == "_embeddings" or m_name == "embeddings":
            pointer = getattr(pointer, "weight")
            pt_name.append("weight")
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
            if len(array.shape) > len(pointer.shape) and math.prod(array.shape) == math.prod(pointer.shape):
                # print(txt_name, array.shape)
                if (
                    txt_name.endswith("attention/self/key/kernel")
                    or txt_name.endswith("attention/self/query/kernel")
                    or txt_name.endswith("attention/self/value/kernel")
                ):
                    array = array.transpose(1, 0, 2).reshape(pointer.shape)
                elif txt_name.endswith("attention/output/dense/kernel"):
                    array = array.transpose(0, 2, 1).reshape(pointer.shape)
                else:
                    array = array.reshape(pointer.shape)

            if pointer.shape != array.shape:
                raise ValueError(
                    f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched of {txt_name}."
                )
        except ValueError as e:
            e.args += (pointer.shape, array.shape)
            raise
        pt_weight_name = ".".join(pt_name)
        logger.info(f"Initialize PyTorch weight {pt_weight_name} from {txt_name}.")
        pointer.data = torch.from_numpy(array)
        tf_weights.pop(txt_name, None)
        pt_names.remove(pt_weight_name)

    logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}.")
    logger.info(f"Weights not initialized in PyTorch model: {', '.join(pt_names)}.")
    return model