in codes/rnn_training/train_nli_w2v.py [0:0]
def trainepoch(nli_net, train_iter, optimizer, loss_fn, epoch, params):
print("\nTRAINING : Epoch " + str(epoch))
nli_net.train()
all_costs = []
logs = []
words_count = 0
last_time = time.time()
correct = 0.0
# shuffle the data
optimizer.param_groups[0]["lr"] = (
optimizer.param_groups[0]["lr"] * params.decay
if epoch > 1 and "sgd" in params.optimizer
else optimizer.param_groups[0]["lr"]
)
print("Learning rate : {0}".format(optimizer.param_groups[0]["lr"]))
total_samples = 0
for i, batch in enumerate(train_iter):
# prepare batch
s1_batch, s1_len = batch.Sentence1
s2_batch, s2_len = batch.Sentence2
s1_batch, s2_batch = (
Variable(s1_batch.to(device)),
Variable(s2_batch.to(device)),
)
tgt_batch = batch.Label.to(device)
k = s1_batch.size(1) # actual batch size
total_samples += k
# model forward
output, (s1_out, s2_out) = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
pred = output.data.max(1)[1]
correct += pred.long().eq(tgt_batch.data.long()).cpu().sum().item()
# loss
# pdb.set_trace()
loss = loss_fn(output, tgt_batch)
all_costs.append(loss.item())
words_count += s1_batch.nelement() + s2_batch.nelement()
# backward
optimizer.zero_grad()
loss.backward()
# gradient clipping (off by default)
shrink_factor = 1
total_norm = 0
for p in nli_net.parameters():
if p.requires_grad:
p.grad.div_(k) # divide by the actual batch size
total_norm += p.grad.norm() ** 2
total_norm = np.sqrt(total_norm.item())
if total_norm > params.max_norm:
shrink_factor = params.max_norm / total_norm
current_lr = optimizer.param_groups[0][
"lr"
] # current lr (no external "lr", for adam)
optimizer.param_groups[0]["lr"] = current_lr * shrink_factor # just for update
# optimizer step
optimizer.step()
optimizer.param_groups[0]["lr"] = current_lr
if len(all_costs) == 100:
logs.append(
"{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}".format(
(i) * params.batch_size,
round(np.mean(all_costs), 2),
int(len(all_costs) * params.batch_size / (time.time() - last_time)),
int(words_count * 1.0 / (time.time() - last_time)),
round(100.0 * correct / ((i + 1) * params.batch_size), 2),
)
)
print(logs[-1])
last_time = time.time()
words_count = 0
all_costs = []
train_acc = round(100 * correct / total_samples, 2)
print("results : epoch {0} ; mean accuracy train : {1}".format(epoch, train_acc))
# ex.log_metric('train_accuracy', train_acc, step=epoch)
return train_acc