optimum/bettertransformer/models/encoder_models.py [1011:1037]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        self.is_last_layer = False
        self.norm_first = True

        self.original_layers_mapping = {
            "in_proj_weight": [
                "attention.attention.query.weight",
                "attention.attention.key.weight",
                "attention.attention.value.weight",
            ],
            "in_proj_bias": [
                "attention.attention.query.bias",
                "attention.attention.key.bias",
                "attention.attention.value.bias",
            ],
            "out_proj_weight": "attention.output.dense.weight",
            "out_proj_bias": "attention.output.dense.bias",
            "linear1_weight": "intermediate.dense.weight",
            "linear1_bias": "intermediate.dense.bias",
            "linear2_weight": "output.dense.weight",
            "linear2_bias": "output.dense.bias",
            "norm1_weight": "layernorm_before.weight",
            "norm1_bias": "layernorm_before.bias",
            "norm2_weight": "layernorm_after.weight",
            "norm2_bias": "layernorm_after.bias",
        }

        self.validate_bettertransformer()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


optimum/bettertransformer/models/encoder_models.py [1134:1160]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        self.is_last_layer = False
        self.norm_first = True

        self.original_layers_mapping = {
            "in_proj_weight": [
                "attention.attention.query.weight",
                "attention.attention.key.weight",
                "attention.attention.value.weight",
            ],
            "in_proj_bias": [
                "attention.attention.query.bias",
                "attention.attention.key.bias",
                "attention.attention.value.bias",
            ],
            "out_proj_weight": "attention.output.dense.weight",
            "out_proj_bias": "attention.output.dense.bias",
            "linear1_weight": "intermediate.dense.weight",
            "linear1_bias": "intermediate.dense.bias",
            "linear2_weight": "output.dense.weight",
            "linear2_bias": "output.dense.bias",
            "norm1_weight": "layernorm_before.weight",
            "norm1_bias": "layernorm_before.bias",
            "norm2_weight": "layernorm_after.weight",
            "norm2_bias": "layernorm_after.bias",
        }

        self.validate_bettertransformer()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -