in backup/baselines/ruber/train_word2vec.py [0:0]
def train(self):
self.log.write_message_logs("starting training ...")
for epoch in range(self.args.word2vec_epochs):
losses = []
# ------- Embedding layers are trained as well here ----#
# lookup_tensor = torch.tensor([word_to_ix["poor"]], dtype=torch.long)
# hello_embed = model.embeddings(lookup_tensor)
# print(hello_embed)
# -----------------------------------------------------#
num_batches = len(range(0, len(self.ngrams), self.args.word2vec_batchsize))
self.log.write_message_logs("Number of batches : {}".format(num_batches))
for minibatch in range(0, len(self.ngrams), self.args.word2vec_batchsize):
contexts, targets = zip(
*self.ngrams[minibatch : minibatch + self.args.word2vec_batchsize]
)
context_idxs = torch.tensor(
[
[self.data.get_word_id(w) for w in context]
for context in contexts
],
dtype=torch.long,
device=self.device,
)
self.optimizer.zero_grad()
log_probs = self.model(context_idxs)
y = torch.tensor(
[[self.data.get_word_id(target)] for target in targets],
dtype=torch.long,
device=self.device,
)
y = y.squeeze(1)
loss = self.loss_function(log_probs, y)
loss.backward()
self.optimizer.step()
losses.append(loss.mean().item())
if minibatch % 1000 == 0:
metrics = {
"mode": "train",
"loss": np.mean(losses),
"epoch": epoch,
"minibatch": self.train_step,
}
self.train_step += 1
self.log.write_metric_logs(metrics)
losses = []
# done
metrics = {
"mode": "train",
"loss": np.mean(losses),
"epoch": epoch,
"minibatch": self.train_step,
}
self.train_step += 1
self.log.write_metric_logs(metrics)
self.losses.append(np.mean(losses))
self.save_embedding()