in train.py [0:0]
def residual_dropout(x, train, key, pdrop=None):
resid_pdrop = pdrop if pdrop else H.resid_pdrop
if train and resid_pdrop > 0.0:
mask_shape = x.shape.as_list()
key += str(mask_shape)
mask_shape = None
x, H.dropout_cache[key] = bs.dropout(
x, keep_prob=1.0 - resid_pdrop,
mask=H.dropout_cache.get(key),
mask_shape=mask_shape)
return x