in train.py [0:0]
def get_losses(logits, labels, mask=None):
with tf.name_scope('loss'):
n, t, nx = shape_list(logits)
ln, lt = shape_list(labels)
assert lt == t
labels = tf.reshape(labels, [-1])
logits = tf.reshape(logits, [-1, nx])
if H.float16 and logits.shape[-1].value <= 65536 and logits.dtype == tf.float16:
# much faster fused fp16 implementation that also saves memory
losses = bs.softmax_cross_entropy(logits=logits, labels=labels)
else:
logits = tf.cast(logits, tf.float32)
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels)
losses = tf.reshape(losses, [n, t])
if mask is not None:
# X_mask can be either boolean or scalar (weighted) mask
return (tf.reduce_sum(losses * mask) / tf.reduce_sum(mask)), losses
return tf.reduce_mean(losses), losses