in hype/graph.py [0:0]
def __init__(self, idx, objects, weights, nnegs, unigram_size=1e8):
assert idx.ndim == 2 and idx.shape[1] == 2
assert weights.ndim == 1
assert len(idx) == len(weights)
assert nnegs >= 0
assert unigram_size >= 0
print('Indexing data')
self.idx = idx
self.nnegs = nnegs
self.burnin = False
self.objects = objects
self._weights = ddict(lambda: ddict(int))
self._counts = np.ones(len(objects), dtype=np.float)
self.max_tries = self.nnegs * self._ntries
for i in range(idx.shape[0]):
t, h = self.idx[i]
self._counts[h] += weights[i]
self._weights[t][h] += weights[i]
self._weights = dict(self._weights)
nents = int(np.array(list(self._weights.keys())).max())
assert len(objects) > nents, f'Number of objects do no match'
if unigram_size > 0:
c = self._counts ** self._sample_dampening
self.unigram_table = choice(
len(objects),
size=int(unigram_size),
p=(c / c.sum())
)