in models.py [0:0]
def __init__(self, vocab_size, hidden_size, nb_heads, nb_layers,
attn_span, emb_dropout, adapt_io_params, **kargs):
nn.Module.__init__(self)
# token embeddings
self.adapt_io = adapt_io_params['adapt_io_enabled']
if self.adapt_io:
self.in_emb, self.out_emb = build_adaptive_io(
vocab_size, hidden_size, **adapt_io_params)
else:
self.in_emb = nn.Embedding(vocab_size, hidden_size)
self.out_emb = nn.Linear(hidden_size, vocab_size)
if emb_dropout > 0:
self.emb_dropout = nn.Dropout(emb_dropout)
else:
self.emb_dropout = None
# position embeddings
self.key_pe = nn.Parameter(
torch.randn(1, hidden_size // nb_heads, attn_span))
self.layers = nn.ModuleList()
self.layers.extend(
TransformerSeqLayer(
hidden_size=hidden_size, nb_heads=nb_heads,
attn_span=attn_span, **kargs)
for _ in range(nb_layers))