int nccl_ofi_freelist_add()

in src/nccl_ofi_freelist.cpp [238:366]


int nccl_ofi_freelist_add(nccl_ofi_freelist_t *freelist,
			  size_t num_entries)
{
	int ret;
	size_t allocation_count = num_entries;
	size_t block_mem_size = 0;
	char *buffer = NULL;
	struct nccl_ofi_freelist_block_t *block = NULL;
	char *b_end = NULL;
	char *b_end_aligned = NULL;

	if (freelist->max_entry_count > 0 &&
	    freelist->max_entry_count - freelist->num_allocated_entries < allocation_count) {
		allocation_count = freelist->max_entry_count - freelist->num_allocated_entries;
	}

	if (allocation_count == 0) {
		NCCL_OFI_WARN("freelist %p is full", freelist);
		return -ENOMEM;
	}

	/* init guarantees that entry_size is a multiple of the
	   pointer size, so we know that eact entry will be pointer
	   aligned.  We allocate our allocation block tracking
	   structure at the end of the allocation so that large
	   buffers are more likely to be page aligned (or aligned to
	   their size, as the case may be). */
	block_mem_size = freelist_buffer_mem_size_full_pages(freelist->entry_size, allocation_count);
	ret = nccl_net_ofi_alloc_mr_buffer(block_mem_size, (void **)&buffer);
	if (OFI_UNLIKELY(ret != 0)) {
		NCCL_OFI_WARN("freelist extension allocation failed (%d)", ret);
		return ret;
	}

	block = (struct nccl_ofi_freelist_block_t *)
		calloc(1, sizeof(struct nccl_ofi_freelist_block_t));
	if (block == NULL) {
		NCCL_OFI_WARN("Failed to allocate freelist block metadata");
		goto error;
	}
	block->memory = buffer;
	block->memory_size = block_mem_size;
	block->next = freelist->blocks;

	/* Mark unused memory after block structure as noaccess */
	b_end = (char *)((uintptr_t)buffer + block_mem_size);
	b_end_aligned = (char *)NCCL_OFI_ROUND_DOWN((uintptr_t)b_end,
							  (uintptr_t)MEMCHECK_GRANULARITY);
	nccl_net_ofi_mem_noaccess(b_end_aligned,
				  block_mem_size - (b_end_aligned - buffer));
	nccl_net_ofi_mem_undefined(b_end_aligned, b_end - b_end_aligned);

	if (freelist->regmr_fn) {

		ret = freelist->regmr_fn(freelist->regmr_opaque, buffer,
					 block_mem_size,
					 &block->mr_handle);
		if (ret != 0) {
			NCCL_OFI_WARN("freelist extension registration failed: %d", ret);
			goto error;
		}
	} else {
		block->mr_handle = NULL;
	}

	block->entries = (nccl_ofi_freelist_elem_t *)
		calloc(allocation_count, sizeof(*(block->entries)));
	if (block->entries == NULL) {
		NCCL_OFI_WARN("Failed to allocate entries");
		goto error;
	}

	block->num_entries = allocation_count;

	freelist->blocks = block;

	for (size_t i = 0 ; i < allocation_count ; ++i) {
		nccl_ofi_freelist_elem_t *entry = &block->entries[i];

		size_t user_entry_size = freelist->entry_size - freelist->memcheck_redzone_size;

		/* Add redzone before entry */
		nccl_net_ofi_mem_noaccess(buffer, freelist->memcheck_redzone_size);
		buffer += freelist->memcheck_redzone_size;

		if (freelist->have_reginfo) {
			entry->mr_handle = block->mr_handle;
		} else {
			entry->mr_handle = NULL;
		}
		entry->ptr = buffer;
		entry->next = freelist->entries;

		freelist->entries = entry;
		freelist->num_allocated_entries++;

		nccl_net_ofi_mem_noaccess(entry->ptr, user_entry_size);

		if (freelist->entry_init_fn) {
			ret = freelist->entry_init_fn(entry->ptr);
			if (ret != 0) {
				goto error;
			}
		}

		buffer += user_entry_size;
	}

	/* Block structure will not be accessed until freelist is destroyed */
	nccl_net_ofi_mem_noaccess(block, sizeof(struct nccl_ofi_freelist_block_t));

	return 0;

error:
	if (block != NULL) {
		free(block);
		block = NULL;
	}
	if (buffer != NULL) {
		/* Reset memcheck guards of block memory. This step
		 * needs to be performed manually since reallocation
		 * of the same memory via mmap() is invisible to
		 * ASAN. */
		nccl_net_ofi_mem_undefined(buffer, block_mem_size);
		nccl_net_ofi_dealloc_mr_buffer(buffer, block_mem_size);
		buffer = NULL;
	}
	return ret;
}