def deploy_model()

in src/transformers/utils/model_utils.py [0:0]


def deploy_model(model, optimizer, cfg):
    """
    Deploy model to multiple GPUs for DDP training.
    """
    if cfg.DDP_CONFIG.DISTRIBUTED:
        if cfg.DDP_CONFIG.GPU is not None:
            torch.cuda.set_device(cfg.DDP_CONFIG.GPU)
            model.cuda(cfg.DDP_CONFIG.GPU)
        else:
            model.cuda()
    elif cfg.DDP_CONFIG.GPU is not None:
        torch.cuda.set_device(cfg.DDP_CONFIG.GPU)
        model = model.cuda(cfg.DDP_CONFIG.GPU)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        model = torch.nn.DataParallel(model).cuda()

    # Important: AMP should be after cuda()
    if cfg.CONFIG.TRAIN.USE_AMP is True and amp is not None:
        amp_level = cfg.CONFIG.TRAIN.AMP_LEVEL
        if cfg.CONFIG.TRAIN.AMP_LEVEL not in ["O0", "O1", "O2"]:
            print("Warning: Unrecognized level {} for AMP, setting to O0 ".format(cfg.CONFIG.TRAIN.AMP_LEVEL))
            amp_level = "O0"
        model, optimizer = amp.initialize(model, optimizer, opt_level=amp_level)

    # Important: EMA should be after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
    model_ema = None
    if cfg.CONFIG.TRAIN.USE_EMA is True:
        model_ema = ModelEma(model,
                             decay=cfg.CONFIG.TRAIN.EMA_DECAY,
                             device='cpu' if cfg.CONFIG.TRAIN.EMA_FORCE_CPU else '',
                             resume='')

    if cfg.DDP_CONFIG.DISTRIBUTED:
        if cfg.DDP_CONFIG.GPU is not None:
            model = torch.nn.parallel.DistributedDataParallel(model,
                                                              device_ids=[cfg.DDP_CONFIG.GPU],
                                                              find_unused_parameters=False)
        else:
            model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=False)

    return model, optimizer, model_ema