def __init__()

in optimum/bettertransformer/models/decoder_models.py [0:0]


    def __init__(self, layer: "nn.Module", config: "PretrainedConfig"):
        super().__init__(config)
        with torch.device("meta"):
            super(BetterTransformerBaseLayer, self).__init__(config)

        submodules = [
            "k_proj",
            "v_proj",
            "q_proj",
            "out_proj",
            "attn_dropout",
            "resid_dropout",
            "scale_attn",
        ]
        # Attribute only for transformers>=4.28
        if hasattr(layer, "embed_positions"):
            submodules.append("embed_positions")

        # Attribute only for transformers<4.45
        if hasattr(layer, "bias"):
            submodules.append("bias")
        if hasattr(layer, "masked_bias"):
            submodules.append("masked_bias")

        # Attribute only for transformers>=4.45
        if hasattr(layer, "layer_idx"):
            submodules.append("layer_idx")

        for attr in submodules:
            setattr(self, attr, getattr(layer, attr))

        self.module_mapping = None
        self.original_layers_mapping = {submodule: submodule for submodule in submodules}

        self.downcast_qk = True
        self.dropout_prob_attn = config.attn_pdrop