static int write_nccl_topo_rec()

in src/nccl_ofi_topo.cpp [1514:1597]


static int write_nccl_topo_rec(hwloc_topology_t topo, hwloc_obj_t node, FILE *file, int indent, int bridge_depth)
{
	int ret = 0;
	int indent_offset = 2;
	bool close_numanode = false;
	bool close_bridge = false;
	hwloc_obj_t numa_mem_child = NULL;
	hwloc_obj_t child = NULL;
	nccl_ofi_topo_data_t *topo_data = (nccl_ofi_topo_data_t *)node->userdata;

	/* Only nodes with NICs or Nvidia GPUs in its subtree store
	 * store userdata. Use this information to avoid printing
	 * parts of the topology without NICs and Nvidia GPUs. */
	if (!topo_data) return ret;

	if (node->type == HWLOC_OBJ_BRIDGE) {
		if (!node->attr) {
			NCCL_OFI_WARN("Bridge is missing attribute struct");
			return -EINVAL;
		}

		/* Do not print Host PCIe switch, represented by the
		 * two PCI bridges on depth 0 and 1. Print remaining
		 * PCIe switches, represented by two devices each */
		if (bridge_depth >= 2 && bridge_depth % 2 == 0) {
			if ((ret = write_bridge_opening_tag(node, file, indent))) {
				return ret;
			}
			close_bridge = true;
			indent += indent_offset;
		}

		++bridge_depth;
	} else if (node->type == HWLOC_OBJ_PCI_DEVICE &&
		   topo_data->info_list) {
			/* Topology nodes which store NIC info lists
			 * are topology nodes of leader NICs. The
			 * leader NIC is the first NIC in the list. */
			if ((ret = write_nic(node, file, indent))) {
				return ret;
			}
			indent += indent_offset;
	} else if (node->type == HWLOC_OBJ_NUMANODE) {
		/* Before HWLOC 2.0, NUMA topology nodes are stored in
		 * the normal topology tree */
		if ((ret = write_cpu_opening_tag(node, file, indent))) {
			return ret;
		}
		close_numanode = true;
		indent += indent_offset;
	} else if ((numa_mem_child = get_numa_mem_child(node))) {
		/* HWLOC 2.0 moved NUMA nodes from the normal topology
		 * tree to list of memory children. The consequence is
		 * that NUMA nodes may "float" in a package, and thus,
		 * no NUMA node will be found on the path from the
		 * root to a PCI device that needs to be
		 * written. Collect those NUMA nodes and write
		 * them. However, in case a NUMA node is found in the memory
		 * children list and the NUMA node stores user data,
		 * the NUMA node is on the path to a PCI device and
		 * will be printed in the next recursion. */
		if (!numa_mem_child->userdata) {
			if ((ret = write_cpu_opening_tag(numa_mem_child, file, indent))) {
				return ret;
			}
			close_numanode = true;
			indent += indent_offset;
		}
	}

	/* Recurse */
	while ((child = hwloc_get_next_child(topo, node, child))) {
		if ((ret = write_nccl_topo_rec(topo, child,
					       file, indent,
					       bridge_depth))) {
			return ret;
		}
	}

	if (close_numanode) ret = write_cpu_closing_tag(file, indent - indent_offset);
	else if (close_bridge) ret = write_pci_closing_tag(file, indent - indent_offset);

	return ret;
}