in utils.py [0:0]
def log_iter(self, iter_no, nb_batches_per_iter, loss_train, loss_val,
elapsed, model):
step = (iter_no + 1) * nb_batches_per_iter
self._log(title='step', value=step)
msg = 'steps: {}'.format(step)
if self.data_unit == 'bpc':
train_bpc = float(loss_train / math.log(2))
val_bpc = float(loss_val / math.log(2))
msg += '\ttrain: {:.3f}bpc\tval: {:.3f}bpc'.format(train_bpc, val_bpc)
self._log(title='train_bpc', value=train_bpc)
self._log(title='val_bpc', value=val_bpc)
else:
train_ppl = math.exp(loss_train)
val_ppl = math.exp(loss_val)
msg += '\ttrain: {:.2f}ppl\tval: {:.2f}ppl'.format(train_ppl, val_ppl)
self._log(title='train_ppl', value=train_ppl)
self._log(title='val_ppl', value=val_ppl)
msg += '\tms/batch: {:.1f}'.format(elapsed)
if model.module.layers[0].attn.attn.adapt_span_enabled:
avg_spans = []
max_spans = []
for layer in model.module.layers:
avg_spans.append(
layer.attn.attn.adaptive_span.get_current_avg_span())
max_spans.append(
layer.attn.attn.adaptive_span.get_current_max_span())
span_avg = float(sum(avg_spans)) / len(avg_spans)
span_max = float(max(max_spans))
self._log('span_avg', span_avg)
self._log('span_max', span_max)
msg += "\tspan_avg: {:.0f}\tspan_max: {:.0f}".format(span_avg, span_max)
print(msg)