def get_common_interfaces()

in horovod/runner/driver/driver_service.py [0:0]


def get_common_interfaces(settings, all_host_names, remote_host_names=None, fn_cache=None):
    '''
    Find the set of common and routed interfaces on all the hosts.
    :param settings: the object that contains the setting for running horovod
    :type settings: horovod.runner.common.util.settings.Settings
    :param all_host_names: list of the host names
    :type all_host_names: list(string)
    :param remote_host_names: list of the remote host names.
    :type remote_host_names: list(string)
    :param fn_cache: Cache storing the results of checks performed by horovod
    :type fn_cache: horovod.runner.util.cache.Cache
    :return: List of common interfaces
    '''
    # Skipping interface discovery for LSF cluster as it slows down considerably the job start
    if lsf.LSFUtils.using_lsf():
        return None

    if remote_host_names is None:
        remote_host_names = network.filter_local_addresses(all_host_names)

    if len(remote_host_names) > 0:
        if settings.nics:
            # If args.nics is provided, we will use those interfaces. All the workers
            # must have at least one of those interfaces available.
            nics = settings.nics
        else:
            # Find the set of common, routed interfaces on all the hosts (remote
            # and local) and specify it in the args to be used by NCCL. It is
            # expected that the following function will find at least one interface
            # otherwise, it will raise an exception.
            if settings.verbose >= 2:
                print('Testing interfaces on all the hosts.')

            local_host_names = set(all_host_names) - set(remote_host_names)
            nics = _driver_fn(all_host_names, local_host_names, settings, fn_cache=fn_cache)

            if settings.verbose >= 2:
                print('Interfaces on all the hosts were successfully checked.')
                print('Common interface found: ' + ' '.join(nics))

    else:
        if settings.verbose >= 2:
            print('All hosts are local, finding the interfaces '
                  'with address 127.0.0.1')
        # If all the given hosts are local, find the interfaces with address
        # 127.0.0.1
        nics = set()
        for iface, addrs in net_if_addrs().items():
            if settings.nics and iface not in settings.nics:
                continue
            for addr in addrs:
                if addr.family == AF_INET and addr.address == '127.0.0.1':
                    nics.add(iface)
                    break

        if len(nics) == 0:
            raise ValueError('No interface is found for address 127.0.0.1.')

        if settings.verbose >= 2:
            print('Local interface found ' + ' '.join(nics))
    return nics