in src/train.py [0:0]
def main():
args = parser.parse_args()
args.distributed = False
if 'WORLD_SIZE' in os.environ:
args.distributed = int(os.environ['WORLD_SIZE']) > 1
if args.distributed:
# import ipdb; ipdb.set_trace()
# if 'MASTER_ADDR' not in os.environ:
# var = os.environ["SLURM_NODELIST"]
# match = re.match(r'learnfair\[(\d+).*', var)
# master_id = match.group(1)
# os.environ["MASTER_ADDR"] = "learnfair" + master_id
# print('Set MASTER_ADDR to', os.environ['MASTER_ADDR'])
if int(os.environ['RANK']) == 0:
args.is_master = True
else:
args.is_master = False
args.rank = int(os.environ['RANK'])
print('Before init_process_group')
dist.init_process_group(backend=args.dist_backend,
init_method=args.dist_url)
else:
args.rank = 0
args.is_master = True
Trainer(args).train()