in horovod/runner/js_run.py [0:0]
def generate_jsrun_rankfile(settings, path=None):
"""
Generates rankfile to use with jsrun.
It splits the cores among the processes, which leads to best performance according to experiments.
Args:
settings: Settings for running jsrun.
Note: settings.num_proc and settings.hosts must not be None.
path: Optional path of the rankfile.
Note: this file will be overwritten.
"""
cpu_per_gpu = (lsf.LSFUtils.get_num_cores() * lsf.LSFUtils.get_num_threads()) // lsf.LSFUtils.get_num_gpus()
host_list = (x.split(':') for x in settings.hosts.split(','))
# Verify and truncate host list if necessary
validated_list = []
remaining_slots = settings.num_proc
for host, slots in host_list:
slots = int(slots)
if slots > lsf.LSFUtils.get_num_gpus():
raise ValueError('Invalid host input, slot count for host \'{host}:{slots}\' is greater '
'than number of GPUs per host \'{gpus}\'.'.format(
host=host, slots=slots, gpus=lsf.LSFUtils.get_num_gpus()))
needed_slots = min(slots, remaining_slots)
validated_list.append((host, needed_slots))
remaining_slots -= needed_slots
if remaining_slots == 0:
break
if remaining_slots != 0:
raise ValueError('Not enough slots on the hosts to fulfill the {slots} requested.'.format(
slots=settings.num_proc))
# Generate rankfile
path = tempfile.mktemp() if path is None else path
with open(path, 'w') as tmp:
tmp.write('overlapping_rs: allow\n')
tmp.write('cpu_index_using: logical\n')
rank = 0
for host, slots in validated_list:
cpu_val = 0
tmp.write('\n')
for s in range(slots):
tmp.write('rank: {rank}: {{ hostname: {host}; cpu: {{{scpu}-{ecpu}}} ; gpu: * ; mem: * }}\n'.format(
rank=rank,
host=host,
scpu=cpu_val,
ecpu=cpu_val + cpu_per_gpu - 1
))
rank += 1
cpu_val += cpu_per_gpu
return path