def __init__()

in src/mlm/models/gpt2.py [0:0]


    def __init__(self, units, vocab_size, max_length, num_layers, num_heads, dropout=0.0,
                 prefix=None, params=None):
        super(GPT2Model, self).__init__(prefix=prefix, params=params)
        self._units = units
        self._max_length = max_length
        self._num_layers = num_layers
        self._num_heads = num_heads
        with self.name_scope():
            self._pos_embed = nn.Embedding(input_dim=max_length, output_dim=units,
                                           weight_initializer=mx.init.Normal(0.01),
                                           prefix='pos_embed_')
            self._embed = nn.Embedding(input_dim=vocab_size, output_dim=units, prefix='embed_',
                                       weight_initializer=mx.init.Normal(0.02))
            self._logits_proj = nn.Dense(units=vocab_size, in_units=units, use_bias=False,
                                         flatten=False, params=self._embed.params)
            self._self_attention_layers = nn.Sequential()
            self._ffn_layers = nn.HybridSequential()
            self._attn_ln = nn.HybridSequential()
            self._ffn_ln = nn.HybridSequential()
            for i in range(num_layers):
                self._self_attention_layers.add(GPT2SelfAttentionLayer(
                    units=units, num_heads=num_heads, dropout=dropout,
                    prefix='self_attn{}_'.format(i)))
                self._ffn_layers.add(GPT2FFNLayer(
                    units=units, hidden_size=units * 4, prefix='ffn{}_'.format(i)))
                self._attn_ln.add(nn.LayerNorm(prefix='attn_ln{}_'.format(i)))
                self._ffn_ln.add(nn.LayerNorm(prefix='ffn_ln{}_'.format(i)))
                self._final_ln = nn.LayerNorm(prefix='final_ln{}_'.format(i))