in prov/bgq/src/fi_bgq_domain.c [108:230]
static int fi_bgq_mu_init(struct fi_bgq_domain *bgq_domain,
struct fi_info *info)
{
int rc;
rc = fi_bgq_node_mu_lock_init(&bgq_domain->fabric->node, &bgq_domain->mu.lock);
if (rc) {
goto err;
}
l2atomic_lock_acquire(&bgq_domain->mu.lock);
const uint32_t ppn = Kernel_ProcessCount();
const uint32_t tcoord = Kernel_MyTcoord();
const uint32_t subgroup_total = BGQ_MU_NUM_REC_FIFO_SUBGROUPS*(BGQ_MU_NUM_REC_FIFO_GROUPS-1); /* do not consider 17th core subgroups */
const uint32_t subgroups_per_process = subgroup_total / ppn;
const uint32_t subgroup_offset = subgroups_per_process * tcoord;
const uint32_t recfifo_total = BGQ_MU_NUM_REC_FIFOS_PER_GROUP*(BGQ_MU_NUM_REC_FIFO_GROUPS-1); /* do not mess with 17th core group recfifos */
/*
* Create four mu reception fifos in each of the subgroups "owned" by
* this process.
*/
uint8_t * memptr;
size_t nbytes = FI_BGQ_MU_RECFIFO_BYTES * BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP * subgroups_per_process;
rc = posix_memalign((void**)&memptr, 32, nbytes);
if (rc) goto err;
Kernel_MemoryRegion_t mregion;
rc = Kernel_CreateMemoryRegion(&mregion, (void*)memptr, nbytes);
if (rc) goto err;
bgq_domain->rfifo_mem = (void*)memptr;
bgq_domain->rx.max = 0;
bgq_domain->rx.count = 0;
uint32_t n;
for (n = 0; n < recfifo_total; ++n) {
bgq_domain->rx.rfifo[n] = NULL;
}
const uint32_t subgroups_to_allocate_per_process = ppn == 64 ? 1 : ppn == 32 ? 2 : ppn == 16 ? 4 : ppn == 8 ? 8 : ppn == 4 ? 16 : ppn == 2 ? 32 : 64;
for (n = 0; n < subgroups_to_allocate_per_process; ++n) {
const uint32_t requested_subgroup = subgroup_offset + n;
uint32_t free_fifo_num;
uint32_t free_fifo_ids[BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP];
rc = Kernel_QueryRecFifos(requested_subgroup, &free_fifo_num, free_fifo_ids);
if (rc) goto err;
if (free_fifo_num < 4) goto err;
if (free_fifo_ids[0] != 0) goto err;
if (free_fifo_ids[1] != 1) goto err;
if (free_fifo_ids[2] != 2) goto err;
if (free_fifo_ids[3] != 3) goto err;
Kernel_RecFifoAttributes_t rfifo_attrs[4];
memset((void*)&rfifo_attrs[0], 0, sizeof(Kernel_RecFifoAttributes_t)*4);
rc = Kernel_AllocateRecFifos(requested_subgroup,
&bgq_domain->rfifo_subgroup[requested_subgroup],
4, free_fifo_ids, rfifo_attrs);
if (rc) goto err;
uint32_t i;
for (i = 0; i < BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; ++i) {
rc = Kernel_RecFifoInit(&bgq_domain->rfifo_subgroup[requested_subgroup],
i,
&mregion,
((uint64_t)memptr) - (uint64_t)mregion.BaseVa,
FI_BGQ_MU_RECFIFO_BYTES - 1);
if (rc) goto err;
memptr += FI_BGQ_MU_RECFIFO_BYTES;
}
uint64_t shift = (BGQ_MU_NUM_REC_FIFOS_PER_GROUP-1) -
((requested_subgroup&3)*BGQ_MU_NUM_FIFO_SUBGROUPS);
rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << shift);
if (rc) goto err;
rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << (shift-1));
if (rc) goto err;
rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << (shift-2));
if (rc) goto err;
rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << (shift-3));
if (rc) goto err;
for (i = 0; i< BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; ++i) {
bgq_domain->rx.rfifo[requested_subgroup*BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP+i] =
&bgq_domain->rfifo_subgroup[requested_subgroup]._recfifos[i];
}
bgq_domain->rx.max += 4; /* initialized 4 mu reception fifos, 1 mu reception fifo is used in each fi rx ctx */
}
bgq_domain->tx.count = 0;
/* initialize the mu gi barrier */
bgq_domain->gi.leader_tcoord = bgq_domain->fabric->node.leader_tcoord;
bgq_domain->gi.is_leader = bgq_domain->fabric->node.is_leader;
if (bgq_domain->gi.is_leader) {
rc = MUSPI_GIBarrierInit(&bgq_domain->gi.barrier, 0);
assert(rc==0);
}
bgq_domain->subgroups_per_process = 64 / Kernel_ProcessCount();
l2atomic_lock_release(&bgq_domain->mu.lock);
/* global barrier after mu initialization is complete */
l2atomic_barrier_enter(&bgq_domain->fabric->node.barrier);
if (bgq_domain->gi.is_leader) {
rc = MUSPI_GIBarrierEnterAndWait(&bgq_domain->gi.barrier);
assert(rc==0);
}
l2atomic_barrier_enter(&bgq_domain->fabric->node.barrier);
return 0;
err:
if (l2atomic_lock_isbusy(&bgq_domain->mu.lock)) {
l2atomic_lock_release(&bgq_domain->mu.lock);
}
return -1;
}