def _build_mpi_command()

in common/sagemaker_rl/mpi_launcher.py [0:0]


    def _build_mpi_command(self):
        """Build MPI command."""
        num_hosts = len(self.env.hosts)
        num_processes = self.process_per_host * num_hosts

        # By default, use one process per GPU, or one process per node (if training with CPU).
        host_list = (
            self.env.hosts
            if self.process_per_host == 1
            else [host + ":{}".format(self.process_per_host) for host in self.env.hosts]
        )

        print(
            "Env Hosts: {} Hosts: {} process_per_hosts: {} num_processes: {}".format(
                self.env.hosts, host_list, self.process_per_host, num_processes
            )
        )
        credential_vars = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN"]

        interface_name = interface_name = self.env.network_interface_name
        if self.instance_type == "local":
            interface_name = "eth0"

        print("network interface name:" + interface_name + " " + str(self.instance_type))

        mpi_command = (
            "mpirun --host {}".format(",".join(host_list))
            + " -np {} ".format(num_processes)
            + " --allow-run-as-root"
            + " --display-map"
            + " --tag-output"
            + " -mca btl_tcp_if_include {}".format(interface_name)
            + " -mca oob_tcp_if_include {}".format(interface_name)
            + " -x NCCL_SOCKET_IFNAME={}".format(interface_name)
            + " --mca plm_rsh_no_tree_spawn 1"
            + " -mca orte_abort_on_non_zero_status 1"
            + " -x NCCL_MIN_NRINGS=8 -x NCCL_DEBUG=INFO"
            + " -x LD_LIBRARY_PATH -x PATH"
            + " -x LD_PRELOAD={}".format(_CHANGE_HOSTNAME_LIBRARY)
        )

        for v in credential_vars:
            if v in os.environ:
                mpi_command += " -x {}".format(v)

        for name, value in self.env.to_env_vars().items():
            mpi_command += ' -x {}="{}"'.format(name, value)

        mpi_command += " {}".format(_MPI_SCRIPT)

        return mpi_command