in adaptive_io.py [0:0]
def __init__(self, n_tokens, d_embed, d_proj, cutoffs, div_val=4):
super(AdaptiveEmbedding, self).__init__()
self.n_tokens = n_tokens
self.d_embed = d_embed
self.d_proj = d_proj
assert 0 < min(cutoffs) <= max(cutoffs) < n_tokens
self.cutoffs = cutoffs + [n_tokens]
self.cutoff_ends = [0] + self.cutoffs
self.div_val = div_val
assert self.div_val > 1
assert len(self.cutoffs) > 1
self.emb_scale = d_proj ** 0.5
self.emb_layers = nn.ModuleList()
self.emb_projs = nn.ParameterList()
# embedding layers / projections
for i in range(len(self.cutoffs)):
l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]
d_emb_i = d_embed // (div_val ** i)
self.emb_layers.append(nn.Embedding(r_idx - l_idx, d_emb_i))
self.emb_projs.append(nn.Linear(d_emb_i, d_proj).weight)