in adaptive_io.py [0:0]
def __init__(self, n_tokens, d_embed, d_proj, cutoffs, div_val=4):
super(ProjectedAdaptiveLogSoftmax, self).__init__()
self.n_tokens = n_tokens
self.d_embed = d_embed
self.d_proj = d_proj
assert 0 < min(cutoffs) <= max(cutoffs) < n_tokens
self.cutoffs = cutoffs + [n_tokens]
self.cutoff_ends = [0] + self.cutoffs
self.div_val = div_val
assert self.div_val > 1
assert len(self.cutoffs) > 1
self.shortlist_size = self.cutoffs[0]
self.n_clusters = len(self.cutoffs) - 1
self.head_size = self.shortlist_size + self.n_clusters
# clusters parameters
self.cluster_proj = nn.Linear(self.d_embed, self.n_clusters)
self.out_layers = nn.ModuleList()
self.out_projs = nn.ParameterList()
# output layers / projections
for i in range(len(self.cutoffs)):
l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]
d_emb_i = d_embed // (div_val ** i)
self.out_projs.append(nn.Linear(d_emb_i, d_proj).weight)
self.out_layers.append(nn.Linear(d_emb_i, r_idx - l_idx))