in infersent_comp/train_nli.py [0:0]
def trainepoch(epoch):
print('\nTRAINING : Epoch ' + str(epoch))
nli_net.train()
all_costs = []
logs = []
words_count = 0
last_time = time.time()
correct = 0.
# shuffle the data
permutation = np.random.permutation(len(train['s1']))
s1 = train['s1'][permutation]
s2 = train['s2'][permutation]
target = train['label'][permutation]
optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\
and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr']
print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr']))
for stidx in range(0, len(s1), params.batch_size):
# prepare batch
s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size],
word_vec, params.word_emb_dim)
s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size],
word_vec, params.word_emb_dim)
s1_batch, s2_batch = Variable(s1_batch.to(device)), Variable(s2_batch.to(device))
tgt_batch = Variable(torch.LongTensor(target[stidx:stidx + params.batch_size])).to(device)
k = s1_batch.size(1) # actual batch size
# model forward
output, (s1_out, s2_out) = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
pred = output.data.max(1)[1]
correct += pred.long().eq(tgt_batch.data.long()).cpu().sum().item()
assert len(pred) == len(s1[stidx:stidx + params.batch_size])
# loss
# pdb.set_trace()
loss = loss_fn(output, tgt_batch)
all_costs.append(loss.item())
words_count += (s1_batch.nelement() + s2_batch.nelement()) / params.word_emb_dim
# backward
optimizer.zero_grad()
loss.backward()
# gradient clipping (off by default)
shrink_factor = 1
total_norm = 0
for p in nli_net.parameters():
if p.requires_grad:
p.grad.div_(k) # divide by the actual batch size
total_norm += p.grad.norm() ** 2
total_norm = np.sqrt(total_norm.item())
if total_norm > params.max_norm:
shrink_factor = params.max_norm / total_norm
current_lr = optimizer.param_groups[0]['lr'] # current lr (no external "lr", for adam)
optimizer.param_groups[0]['lr'] = current_lr * shrink_factor # just for update
# optimizer step
optimizer.step()
optimizer.param_groups[0]['lr'] = current_lr
if len(all_costs) == 100:
logs.append('{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}'.format(
stidx, round(np.mean(all_costs), 2),
int(len(all_costs) * params.batch_size / (time.time() - last_time)),
int(words_count * 1.0 / (time.time() - last_time)),
round(100.*correct/(stidx+k), 2)))
print(logs[-1])
last_time = time.time()
words_count = 0
all_costs = []
train_acc = round(100 * correct/len(s1), 2)
print('results : epoch {0} ; mean accuracy train : {1}'
.format(epoch, train_acc))
ex.log_metric('train_accuracy', train_acc, step=epoch)
return train_acc