static int write_topo_file()

in src/nccl_ofi_rdma.cpp [371:455]


static int write_topo_file(nccl_ofi_topo_t *topo)
{
	int ret = 0;
	int topo_fd = -1;
	FILE *file = NULL;

	/**
	 * If `NCCL_TOPO_FILE` is already set, don't set it again.
	 *
	 * Note about forking behavior: in some Python applications, after calling
	 * plugin init, the process will fork(). This `NCCL_TOPO_FILE` environment
	 * variable, as well as the file descriptor it refers to, will be copied
	 * to the child process, and will continue to point to a valid topology
	 * file until the child process exits.
	 */
	if (getenv("NCCL_TOPO_FILE")) {
		NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET,
			       "NCCL_TOPO_FILE environment variable is already set to %s",
			       getenv("NCCL_TOPO_FILE"));
		goto exit;
	}

	/* Create file descriptor */
	topo_fd = memfd_create("ofi_nccl_topo", 0);
	if (topo_fd == -1) {
		NCCL_OFI_WARN("Failed to create anonymous topology file. ERROR: %s",
			      strerror(errno));
		ret = -errno;
		goto exit;
	}

	/* Open file from file descriptor */
	file = fdopen(topo_fd, "w");
	if (file == NULL) {
		NCCL_OFI_WARN("Failed to open NCCL topology file using file descriptor. ERROR %s",
			      strerror(errno));
		ret = -errno;
		close(topo_fd);
		goto exit;
	}

	ret = nccl_ofi_topo_write(topo, file);
	if (ret) {
		NCCL_OFI_WARN("Failed to write NCCL topology using file descriptor. RC: %d", ret);
		goto error;
	}

	/* Flush buffered writes to file. We don't close the file here so that
	   the underlying descriptor remains open, which we will reference
	   in `NCCL_TOPO_FILE`. */
	if (fflush(file) == EOF) {
		NCCL_OFI_WARN("Unable to flush NCCL topology file. ERROR: %s",
			      strerror(errno));
		ret = -errno;
		goto error;
	}

	char filename[32];
	if (snprintf(filename, sizeof(filename), "/proc/self/fd/%d", topo_fd) < 0) {
		NCCL_OFI_WARN("Errror preparing topo file name");
		ret = -EIO;
		goto error;
	}

	/* Set topology file path environment variable `NCCL_TOPO_FILE` */
	NCCL_OFI_INFO(NCCL_INIT | NCCL_NET,
		      "Setting NCCL_TOPO_FILE environment variable to %s",
		      filename);
	if (setenv("NCCL_TOPO_FILE", filename, 1) != 0) {
		NCCL_OFI_WARN("Unable to set NCCL_TOPO_FILE. ERROR: %s",
			      strerror(errno));
		ret = -errno;
		goto error;
	}

	goto exit;

error:
	if (file) {
		fclose(file);
	}

exit:
	return ret;
}