int nccl_net_ofi_endpoint_release()

in src/nccl_ofi_net.cpp [1123:1165]


int nccl_net_ofi_endpoint_release(nccl_net_ofi_ep_t *ep, bool skip_lock, bool force_cleanup)
{
	int ret = 0;
	nccl_net_ofi_domain_t *domain;

	assert(ep != NULL);
	domain = ep->domain;

	if (!skip_lock) {
		nccl_net_ofi_mutex_lock(&domain->domain_lock);
	}

	ep->ref_cnt--;

	if (ep->ref_cnt == 0 || force_cleanup) {
		domain->endpoint = NULL;

		if (force_cleanup && ep->ref_cnt != 0) {
			NCCL_OFI_INFO(NCCL_NET, "Endpoint %p still have ref count %d when released",
			      ep, ep->ref_cnt);
		}

		ret = ep->free_ep(ep);
		if (ret != 0) {
			NCCL_OFI_WARN("Freeing endpoint failed: %d", ret);
			goto cleanup;
		}
	}

cleanup:

	if (!skip_lock) {
		nccl_net_ofi_mutex_unlock(&domain->domain_lock);
	}

	/* Skip domain->release when handled by device->release_all_domain_and_ep()
	 * to avoid domain lock issue after the domain freed */
	if (!force_cleanup && ret == 0) {
		ret = domain->release(domain, skip_lock, false);
	}

	return ret;
}