def __init__()

in opacus_lab/models/GPT2/model/attention.py [0:0]


    def __init__(self, dropout: float = 0.1, max_position_embeddings: int = 1024):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        # register buffer for masked_bias and max_position_embeddings
        # copied from Huggingface's implementation (see causal_masking routine)
        self.mpe = max_position_embeddings
        self.register_buffer(
            "bias",
            torch.tril(torch.ones((self.mpe, self.mpe), dtype=torch.uint8)).view(
                1, 1, self.mpe, self.mpe
            ),
        )
        self.register_buffer("masked_bias", torch.tensor(-1e4))