in src/transformers/utils/model_utils.py [0:0]
def deploy_model(model, optimizer, cfg):
"""
Deploy model to multiple GPUs for DDP training.
"""
if cfg.DDP_CONFIG.DISTRIBUTED:
if cfg.DDP_CONFIG.GPU is not None:
torch.cuda.set_device(cfg.DDP_CONFIG.GPU)
model.cuda(cfg.DDP_CONFIG.GPU)
else:
model.cuda()
elif cfg.DDP_CONFIG.GPU is not None:
torch.cuda.set_device(cfg.DDP_CONFIG.GPU)
model = model.cuda(cfg.DDP_CONFIG.GPU)
else:
# DataParallel will divide and allocate batch_size to all available GPUs
model = torch.nn.DataParallel(model).cuda()
# Important: AMP should be after cuda()
if cfg.CONFIG.TRAIN.USE_AMP is True and amp is not None:
amp_level = cfg.CONFIG.TRAIN.AMP_LEVEL
if cfg.CONFIG.TRAIN.AMP_LEVEL not in ["O0", "O1", "O2"]:
print("Warning: Unrecognized level {} for AMP, setting to O0 ".format(cfg.CONFIG.TRAIN.AMP_LEVEL))
amp_level = "O0"
model, optimizer = amp.initialize(model, optimizer, opt_level=amp_level)
# Important: EMA should be after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
model_ema = None
if cfg.CONFIG.TRAIN.USE_EMA is True:
model_ema = ModelEma(model,
decay=cfg.CONFIG.TRAIN.EMA_DECAY,
device='cpu' if cfg.CONFIG.TRAIN.EMA_FORCE_CPU else '',
resume='')
if cfg.DDP_CONFIG.DISTRIBUTED:
if cfg.DDP_CONFIG.GPU is not None:
model = torch.nn.parallel.DistributedDataParallel(model,
device_ids=[cfg.DDP_CONFIG.GPU],
find_unused_parameters=False)
else:
model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=False)
return model, optimizer, model_ema