def _setup()

in src/sagemaker_training/torch_distributed.py [0:0]


    def _setup(self):
        logger.info("Starting distributed training through torchrun")
        # EFA settings
        if self._instance_type in SM_EFA_NCCL_INSTANCES:
            # Enable EFA use
            os.environ["FI_PROVIDER"] = "efa"
        if self._instance_type in SM_EFA_RDMA_INSTANCES:
            # Use EFA's RDMA functionality for one-sided and two-sided transfer
            os.environ["FI_EFA_USE_DEVICE_RDMA"] = "1"
            os.environ["RDMAV_FORK_SAFE"] = "1"
        os.environ["NCCL_SOCKET_IFNAME"] = str(self._network_interface_name)
        os.environ["NCCL_PROTO"] = "simple"