in trainer.py [0:0]
def train_iteration(model, optimizer, scheduler, data, nb_batches_per_iter,
block_size, eval_only, train_pos, h_cache, batch_split):
"""Single training iteration."""
if eval_only:
model.eval()
else:
model.train()
nb_batches_per_iter_max = nb_batches_per_iter
if eval_only:
# eval on fewer batches during training for speed-up
nb_batches_per_iter_max = max(1, nb_batches_per_iter // 10)
nb_batches_per_iter_max = min(nb_batches_per_iter_max,
math.ceil(data.size(1) / block_size))
loss_all = 0
actual_nb_batches_per_iter = 0
for _ in range(nb_batches_per_iter_max):
actual_nb_batches_per_iter += 1
X = data[:, train_pos: train_pos + block_size].contiguous()
Y = data[:, train_pos + 1: train_pos + block_size + 1].contiguous()
loss, h_cache = _train_batch(
model=model,
optimizer=optimizer,
scheduler=scheduler,
X=X, Y=Y,
h_cache=h_cache,
eval_only=eval_only,
batch_split=batch_split)
loss_all += loss
train_pos += block_size
if train_pos >= data.size(1) - block_size:
# reached the end. randomize the offset to reduce overfitting
train_pos = random.randrange(block_size)
# reset the cache
for h in h_cache:
h.fill_(0)
loss_all = loss_all / actual_nb_batches_per_iter
return loss_all, train_pos, h_cache