in adaptive_io.py [0:0]
def compute_dummy_loss(in_emb, out_emb):
# hack to fix adaptive ou/in with distributed code
dummy_loss = 0 * (
sum(x.weight[0, 0] for x in in_emb.emb_layers) +
sum(x[0, 0] for x in in_emb.emb_projs) +
sum(x[0, 0] for x in out_emb.out_projs) +
sum(x.weight[0, 0] for x in out_emb.out_layers) +
sum(x.bias[0] for x in out_emb.out_layers)
)
return dummy_loss