static inline void fi_bgq_readv_internal()

in prov/bgq/include/rdma/fi_direct_rma.h [66:305]


static inline void fi_bgq_readv_internal (struct fi_bgq_ep * bgq_ep,
		const struct iovec * iov,
		const size_t niov,
		const union fi_bgq_addr * bgq_target_addr,
		const uint64_t * addr,
		const uint64_t * key,
		union fi_bgq_context * bgq_context,
		const uint64_t tx_op_flags,
		const uint64_t enable_cq,
		const uint64_t enable_cntr,
		const int lock_required)
{
#ifdef FI_BGQ_TRACE
fprintf(stderr,"fi_bgq_readv_internal starting - niov is %ld do_cntr is %d\n",niov,(enable_cntr && ( bgq_ep->write_cntr != 0)));
fflush(stderr);
#endif
	assert(niov <= 8);

	const uint64_t do_cq = enable_cq && (tx_op_flags & FI_COMPLETION);

	struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr;
	const uint64_t do_cntr = enable_cntr && (write_cntr != 0);

	MUHWI_Descriptor_t * model = &bgq_ep->tx.read.emulation.mfifo_model;

	const uint64_t fifo_map = fi_bgq_addr_get_fifo_map(bgq_target_addr->fi);

	/* busy-wait until a fifo slot is available .. */
	MUHWI_Descriptor_t * desc =
		fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo);

	/* copy the descriptor model into the injection fifo */
	qpx_memcpy64((void*)desc, (const void *)model);

	/* set the target torus address and fifo map */
	desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_target_addr->uid.fi);
	desc->Torus_FIFO_Map = fifo_map;

	/* locate the payload lookaside slot */
	MUHWI_Descriptor_t * dput_desc =
		(MUHWI_Descriptor_t *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo,
			desc, &desc->Pa_Payload);
	desc->Message_Length = (niov << BGQ_MU_DESCRIPTOR_SIZE_IN_POWER_OF_2);


	desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id =
	fi_bgq_addr_rec_fifo_id(bgq_target_addr->fi);

	union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
	hdr->rma.ndesc = niov;

	/* TODO - how to specify multiple remote injection fifos? */

	union fi_bgq_mu_descriptor * fi_dput_desc = (union fi_bgq_mu_descriptor *) dput_desc;

	unsigned i;
	for (i = 0; i < niov; ++i) {	/* on fence this loop will compile out (niov is 0) */

		qpx_memcpy64((void*)&dput_desc[i],
			(const void*)&bgq_ep->tx.read.emulation.dput_model);

		dput_desc[i].Torus_FIFO_Map = fifo_map;
		dput_desc[i].Message_Length = iov[i].iov_len;
		dput_desc[i].Pa_Payload = addr[i];

		/* determine the physical address of the destination data location */
		uint64_t iov_base_paddr = 0;
		uint32_t cnk_rc __attribute__ ((unused));
		cnk_rc = fi_bgq_cnk_vaddr2paddr(iov[i].iov_base, iov[i].iov_len, &iov_base_paddr);
		assert(cnk_rc==0);
		MUSPI_SetRecPayloadBaseAddressInfo(&dput_desc[i], FI_BGQ_MU_BAT_ID_GLOBAL, iov_base_paddr);

		assert((key[i] & 0xFFFF000000000000ul) == 0);	/* TODO - change this when key size > 48b */
		fi_dput_desc[i].rma.key_lsb = key[i];
	}

	if (do_cntr && niov < 8) {	/* likely */
#ifdef FI_BGQ_TRACE
fprintf(stderr,"fi_bgq_readv_internal do_cntr && niov %ld < 8\n",niov);
fflush(stderr);
#endif
		/* add the counter update direct-put descriptor to the
		 * tail of the rget/mfifo payload */

		qpx_memcpy64((void*)&dput_desc[niov],
			(const void*)&bgq_ep->tx.read.cntr_model);

		dput_desc[niov].Torus_FIFO_Map = fifo_map;
		MUSPI_SetRecPayloadBaseAddressInfo(&dput_desc[niov],
			FI_BGQ_MU_BAT_ID_GLOBAL,
			MUSPI_GetAtomicAddress(write_cntr->std.paddr, MUHWI_ATOMIC_OPCODE_STORE_ADD));

		desc->Message_Length += sizeof(MUHWI_Descriptor_t);
		union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
		hdr->rma.ndesc += 1;

		if (!do_cq) {	/* likely */

#ifdef FI_BGQ_TRACE
fprintf(stderr,"fi_bgq_readv_internal do_cntr && niov < 8 AND (!do_cq)\n");
fflush(stderr);
#endif
			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);

		} else 	if (niov < 7) {

			/* add the cq update direct-put descriptor to the
			 * tail of the rget/mfifo payload (after the cntr update) */

			/* initialize the completion entry */
			assert(bgq_context);
			assert(((uintptr_t)bgq_context & 0x07ull) == 0);	/* must be 8 byte aligned */
			bgq_context->flags = FI_RMA | FI_READ;
			bgq_context->len = 0;
			bgq_context->buf = NULL;
			bgq_context->byte_counter = 1;
			bgq_context->tag = 0;

			uint64_t byte_counter_paddr = 0;
			uint32_t cnk_rc __attribute__ ((unused));
			cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter,
						sizeof(uint64_t), &byte_counter_paddr);
			assert(cnk_rc == 0);

			MUHWI_Descriptor_t * cq_desc = &dput_desc[niov+1];

			qpx_memcpy64((void*)cq_desc,
				(const void*)&bgq_ep->tx.read.cq_model);

			cq_desc->Torus_FIFO_Map = fifo_map;
			MUSPI_SetRecPayloadBaseAddressInfo(cq_desc,
				FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr);

			desc->Message_Length += sizeof(MUHWI_Descriptor_t);
			union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
			hdr->rma.ndesc += 1;

			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);

			fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required);

		} else {

			/* the rget/mfifo payload is full - inject the data
			 * movement descriptors, then inject the counter
			 * completion descriptor */
			MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);

			/* be lazy and do a single recursive call */
			fi_bgq_readv_internal(bgq_ep,
				NULL, 0,		/* no iovec array */
				bgq_target_addr,
				NULL, NULL,		/* no addr array, no key array */
				bgq_context, tx_op_flags,
				1,			/* enable cq */
				0,			/* disable cntr */
				lock_required);
		}

	} else if (do_cntr) {	/* unlikely */

		/* the rget/mfifo payload is full - inject the data
		 * movement descriptors, then inject any counter or cq
		 * completion descriptor(s) via a recursive call */
		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);

		fi_bgq_readv_internal(bgq_ep,
			NULL, 0,		/* no iovec array */
			bgq_target_addr,
			NULL, NULL,		/* no addr array, no key array */
			bgq_context, tx_op_flags,
			do_cq,
			1,			/* enable cntr */
			lock_required);

	} else if (do_cq && niov < 8) {

		/* no cntr completion
		 *
		 * add the cq byte counter decrement direct-put
		 * descriptor to the tail of the rget/mfifo payload */

		/* initialize the completion entry */
		assert(bgq_context);
		assert(((uintptr_t)bgq_context & 0x07ull) == 0);	/* must be 8 byte aligned */
		bgq_context->flags = FI_RMA | FI_READ;
		bgq_context->len = 0;
		bgq_context->buf = NULL;
		bgq_context->byte_counter = 1;
		bgq_context->tag = 0;

		uint64_t byte_counter_paddr = 0;
		uint32_t cnk_rc __attribute__ ((unused));
		cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter,
				sizeof(uint64_t), &byte_counter_paddr);
		assert(cnk_rc == 0);

		MUHWI_Descriptor_t * cq_desc = &dput_desc[niov];

		qpx_memcpy64((void*)cq_desc,
			(const void*)&bgq_ep->tx.read.cq_model);

		cq_desc->Torus_FIFO_Map = fifo_map;
		MUSPI_SetRecPayloadBaseAddressInfo(cq_desc,
			FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr);

		desc->Message_Length += sizeof(MUHWI_Descriptor_t);
		union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader;
		hdr->rma.ndesc += 1;

		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);

		fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required);

	} else if (do_cq) {

		/* the rget/mfifo payload is full - inject the data
		 * movement descriptors, then inject the cq completion
		 * descriptor via a recursive call */
		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);

		fi_bgq_readv_internal(bgq_ep,
			NULL, 0,		/* no iovec array */
			bgq_target_addr,
			NULL, NULL,		/* no addr array, no key array */
			bgq_context, tx_op_flags,
			1,	/* enable cq */
			0,	/* disable cntr */
			lock_required);

	} else {
		/* no cntr and no cq? very unlikely, if not invalid */

		/* if there are no completion operations then there *must* be
		 * at least one data movement operations */
		assert(niov > 0);

		MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo);
	}
}