def _build_mpi_command()

in Vehicle Routing Problem/common/sagemaker_rl/mpi_launcher.py [0:0]


    def _build_mpi_command(self):
        """Build MPI command.
        """
        num_hosts = len(self.env.hosts)
        num_processes = self.process_per_host * num_hosts

        # By default, use one process per GPU, or one process per node (if training with CPU).
        host_list = self.env.hosts if self.process_per_host == 1 else \
            [host + ':{}'.format(self.process_per_host) for host in self.env.hosts]

        print("Env Hosts: {} Hosts: {} process_per_hosts: {} num_processes: {}".format(self.env.hosts, host_list,
                                                                                       self.process_per_host,
                                                                                       num_processes))
        credential_vars = ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_SESSION_TOKEN']

        interface_name = interface_name = self.env.network_interface_name
        if self.instance_type == "local":
            interface_name = "eth0"

        print('network interface name:' + interface_name + " " + str(self.instance_type))

        mpi_command = 'mpirun --host {}'.format(",".join(host_list)) \
                      + " -np {} ".format(num_processes) \
                      + " --allow-run-as-root" \
                      + " --display-map" \
                      + " --tag-output" \
                      + " -mca btl_tcp_if_include {}".format(interface_name) \
                      + " -mca oob_tcp_if_include {}".format(interface_name) \
                      + " -x NCCL_SOCKET_IFNAME={}".format(interface_name) \
                      + " --mca plm_rsh_no_tree_spawn 1" \
                      + " -mca orte_abort_on_non_zero_status 1" \
                      + " -x NCCL_MIN_NRINGS=8 -x NCCL_DEBUG=INFO" \
                      + " -x LD_LIBRARY_PATH -x PATH" \
                      + " -x LD_PRELOAD={}".format(_CHANGE_HOSTNAME_LIBRARY)

        for v in credential_vars:
            if v in os.environ:
                mpi_command += " -x {}".format(v)

        for name, value in self.env.to_env_vars().items():
            mpi_command += ' -x {}="{}"'.format(name, value)

        mpi_command += " {}".format(_MPI_SCRIPT)

        return mpi_command