in distributed/rpc/rnn/main.py [0:0]
def _run_trainer():
r"""
The trainer creates a distributed RNNModel and a DistributedOptimizer. Then,
it performs training using random input data.
"""
batch = 5
ntoken = 7
ninp = 2
nhid = 3
nindices = 6
nlayers = 4
hidden = (
torch.randn(nlayers, nindices, nhid),
torch.randn(nlayers, nindices, nhid)
)
model = rnn.RNNModel('ps', ntoken, ninp, nhid, nlayers)
# setup distributed optimizer
opt = DistributedOptimizer(
optim.SGD,
model.parameter_rrefs(),
lr=0.05,
)
criterion = torch.nn.CrossEntropyLoss()
def get_next_batch():
for _ in range(5):
data = torch.LongTensor(batch, nindices) % ntoken
target = torch.LongTensor(batch, ntoken) % nindices
yield data, target
# train for 10 iterations
for epoch in range(10):
# create distributed autograd context
for data, target in get_next_batch():
with dist_autograd.context() as context_id:
hidden[0].detach_()
hidden[1].detach_()
output, hidden = model(data, hidden)
loss = criterion(output, target)
# run distributed backward pass
dist_autograd.backward(context_id, [loss])
# run distributed optimizer
opt.step(context_id)
# not necessary to zero grads as each iteration creates a different
# distributed autograd context which hosts different grads
print("Training epoch {}".format(epoch))