in hype/graph_dataset.pyx [0:0]
def __cinit__(self, idx, objects, weights, nnegs, batch_size, num_workers,
burnin=False, sample_dampening=0.75):
'''
Create a dataset for training Hyperbolic embeddings. Rather than
allocating many tensors for individual dataset items, we instead
produce a single batch in each iteration. This allows us to do a single
Tensor allocation for the entire batch and filling it out in place.
Args:
idx (ndarray[ndims=2]): Indexes of objects corresponding to co-occurrence.
I.E. if `idx[0, :] == [4, 19]`, then item 4 co-occurs with item 19
weights (ndarray[ndims=1]): Weights for each co-occurence. Corresponds
to the number of times a pair co-occurred. (Equal length to `idx`)
nnegs (int): Number of negative samples to produce with each positive
objects (list[str]): Mapping from integer ID to hashtag string
nnegs (int): Number of negatives to produce with each positive
batch_size (int): Size of each minibatch
num_workers (int): Number of threads to use to produce each batch
burnin (bool): ???
'''
self.idx = idx
self.objects = objects
self.nnegs = nnegs
self.burnin = burnin
self.N = len(objects)
self.counts = np.zeros((self.N), dtype=np.double)
self.num_workers = num_workers
self.batch_size = batch_size
self.sample_dampening = sample_dampening
self._mk_weights(idx, weights)
self.max_tries = 10 * nnegs
self.neg_multiplier = 1
self.queue = queue.Queue(maxsize=num_workers)