def __init__()

in models.py [0:0]


    def __init__(self, vocab_size, hidden_size, nb_heads, nb_layers,
                 attn_span, emb_dropout, adapt_io_params, **kargs):
        nn.Module.__init__(self)
        # token embeddings
        self.adapt_io = adapt_io_params['adapt_io_enabled']
        if self.adapt_io:
            self.in_emb, self.out_emb = build_adaptive_io(
                vocab_size, hidden_size, **adapt_io_params)
        else:
            self.in_emb = nn.Embedding(vocab_size, hidden_size)
            self.out_emb = nn.Linear(hidden_size, vocab_size)
        if emb_dropout > 0:
            self.emb_dropout = nn.Dropout(emb_dropout)
        else:
            self.emb_dropout = None
        # position embeddings
        self.key_pe = nn.Parameter(
            torch.randn(1, hidden_size // nb_heads, attn_span))

        self.layers = nn.ModuleList()
        self.layers.extend(
            TransformerSeqLayer(
                hidden_size=hidden_size, nb_heads=nb_heads,
                attn_span=attn_span, **kargs)
            for _ in range(nb_layers))