in src/nccl_ofi_topo.cpp [366:412]
static int count_nodes_with_accel_or_nic_in_subtree(hwloc_topology_t topo,
struct fi_info *info_list,
int *count)
{
int ret = 0;
hwloc_obj_t obj = NULL;
while ((obj = hwloc_get_next_pcidev(topo, obj))) {
bool is_accel = false;
struct fi_info *info;
ret = is_accelerator_dev(obj, &is_accel);
if (ret != 0) {
NCCL_OFI_WARN("Error while checking whether hwloc topology node is nvidia GPU");
return ret;
}
ret = get_info_for_node(obj, info_list, &info);
if (ret != 0) {
NCCL_OFI_WARN("Error while retrieving libfabric NIC info struct corresponding to hwloc topology node");
return ret;
}
if (is_accel || info) {
/* Walk towards root, set counter and increment counter each time counter is set */
hwloc_obj_t node = obj;
while (node) {
/* Skip node if this function is counting and
* if node it has already contributed to the
* counter (indicated by set user data
* pointer) */
if (count && node->userdata) break;
node->userdata = count;
if (count) ++(*count);
node = node->parent;
}
}
}
/* While counting, the function sets the user data pointer of
* the topology nodes to avoid counting nodes
* twice. Afterwards, invoke this function another time to
* clear the user data pointers. */
if (count != NULL) return count_nodes_with_accel_or_nic_in_subtree(topo, info_list, NULL);
else return 0;
}