def start_ddp_workers()

in dora/executor.py [0:0]


def start_ddp_workers(package, main, argv):
    import torch as th

    world_size = th.cuda.device_count()
    if not world_size:
        fatal(
            "DDP is only available on GPU. Make sure GPUs are properly configured with cuda.")
        sys.exit(1)

    xp = main.get_xp(argv)
    xp.folder.mkdir(exist_ok=True, parents=True)
    if xp.rendezvous_file.exists():
        xp.rendezvous_file.unlink()
    log(f"Starting {world_size} worker processes for DDP.")
    with ChildrenManager() as manager:
        for rank in range(world_size):
            kwargs = {}
            env = dict(os.environ)
            env['RANK'] = str(rank)
            env['WORLD_SIZE'] = str(world_size)
            args = ["-m", "dora", "-P", package, "run", "--"]
            args += argv
            if rank > 0:
                kwargs['stdin'] = sp.DEVNULL
                kwargs['stdout'] = open(xp.folder / f'worker_{rank}.log', 'w')
                kwargs['stderr'] = sp.STDOUT
            manager.add(
                sp.Popen([sys.executable] + args, env=env, **kwargs))
    sys.exit(int(manager.failed))