in src/losses.py [0:0]
def convert_to_distributed_tensor(tensor):
"""
For some backends, such as NCCL, communication only works if the
tensor is on the GPU. This helper function converts to the correct
device and returns the tensor + original device.
"""
orig_device = 'cpu' if not tensor.is_cuda else 'gpu'
if (
torch.distributed.is_available()
and torch.distributed.get_backend() == torch.distributed.Backend.NCCL
and not tensor.is_cuda
):
tensor = tensor.cuda()
return (tensor, orig_device)