prov/bgq/include/rdma/bgq/fi_bgq_l2atomic.h (560 lines of code) (raw):

/* * Copyright (C) 2016 by Argonne National Laboratory. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef _FI_PROV_BGQ_L2ATOMIC_H_ #define _FI_PROV_BGQ_L2ATOMIC_H_ #include <unistd.h> #include "rdma/bgq/fi_bgq_hwi.h" #include "rdma/bgq/fi_bgq_spi.h" /* * l2atomic lock access structure */ struct l2atomic_lock { uintptr_t ticket_l2vaddr; uintptr_t serving_l2vaddr; }; /* * l2atomic lock data structure */ struct l2atomic_lock_data { volatile uint64_t ticket; volatile uint64_t serving; } __attribute((aligned(32))); static inline void l2atomic_lock_initialize (struct l2atomic_lock * lock, struct l2atomic_lock_data * data) { uint32_t cnk_rc __attribute__ ((unused)); cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_lock_data)); assert(0==cnk_rc); lock->ticket_l2vaddr = (uintptr_t)&data->ticket; lock->serving_l2vaddr = (uintptr_t)&data->serving; L2_AtomicStore((volatile uint64_t *)lock->ticket_l2vaddr, 0); L2_AtomicStore((volatile uint64_t *)lock->serving_l2vaddr, 0); }; static inline uint64_t l2atomic_lock_acquire (struct l2atomic_lock * lock) { const uint64_t ticket = L2_AtomicLoadIncrement((volatile uint64_t *)lock->ticket_l2vaddr); while (L2_AtomicLoad((volatile uint64_t *)lock->serving_l2vaddr) != ticket); return ticket; }; static inline void l2atomic_lock_release (struct l2atomic_lock * lock) { L2_AtomicStoreAdd((volatile uint64_t *)lock->serving_l2vaddr, 1); }; static inline uint64_t l2atomic_lock_depth (struct l2atomic_lock * lock) { return L2_AtomicLoad((volatile uint64_t *)lock->ticket_l2vaddr) - L2_AtomicLoad((volatile uint64_t *)lock->serving_l2vaddr); }; static inline uint64_t l2atomic_lock_isbusy (struct l2atomic_lock * lock) { return (l2atomic_lock_depth(lock) != 0); }; #define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_NONE (0) #define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_LOW (1) #define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_HIGH (2) #ifndef L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL #define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL (L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_HIGH) #endif #define L2ATOMIC_FIFO_CONSUMER_ALGORITHM_L2BOUNDED (0) #define L2ATOMIC_FIFO_CONSUMER_ALGORITHM_STBOUNDED (1) #ifndef L2ATOMIC_FIFO_CONSUMER_ALGORITHM #define L2ATOMIC_FIFO_CONSUMER_ALGORITHM (L2ATOMIC_FIFO_CONSUMER_ALGORITHM_L2BOUNDED) #endif // Change this default to L2ATOMIC_FIFO_CONSUMER_MULTIPLE ?? #ifndef L2ATOMIC_FIFO_CONSUMER_SINGLE #ifndef L2ATOMIC_FIFO_CONSUMER_MULTIPLE #define L2ATOMIC_FIFO_CONSUMER_SINGLE #endif #endif #define L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED (0) #define L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE (1) #ifndef L2ATOMIC_FIFO_PRODUCER_ALGORITHM #define L2ATOMIC_FIFO_PRODUCER_ALGORITHM (L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED) #endif #define L2_CACHE_LINE_COUNT_UINT64 (L2_CACHE_LINE_SIZE >> 2) // Change this default to L2ATOMIC_FIFO_MSYNC_PRODUCER ?? #ifndef L2ATOMIC_FIFO_MSYNC_CONSUMER #ifndef L2ATOMIC_FIFO_MSYNC_PRODUCER #define L2ATOMIC_FIFO_MSYNC_PRODUCER #endif #endif // Change this default to L2ATOMIC_FIFO_PRODUCER_STORE_FAST ?? #ifndef L2ATOMIC_FIFO_PRODUCER_STORE_FAST #ifndef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC #define L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC #endif #endif #ifndef L2ATOMIC_FIFO_CONSUMER_CLEAR_FAST #ifndef L2ATOMIC_FIFO_CONSUMER_CLEAR_ATOMIC #define L2ATOMIC_FIFO_CONSUMER_CLEAR_ATOMIC #endif #endif #ifndef L2ATOMIC_FIFO_BOUNDS_FAST #ifndef L2ATOMIC_FIFO_BOUNDS_ATOMIC #define L2ATOMIC_FIFO_BOUNDS_ATOMIC #endif #endif /* * l2atomic fifo access structures */ struct l2atomic_fifo_consumer { uint64_t head; uintptr_t bounds_l2vaddr; uint64_t mask; uintptr_t packet_base_l2vaddr; }; struct l2atomic_fifo_producer { uint64_t mask; uintptr_t packet_base_l2vaddr; uintptr_t tail_l2vaddr; #if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE uintptr_t bounds_l2vaddr; #ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS uint64_t local_bounds; #endif #endif } __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); struct l2atomic_fifo { struct l2atomic_fifo_consumer consumer; struct l2atomic_fifo_producer producer; }; /* * l2atomic fifo data structure */ #if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED struct l2atomic_fifo_data { volatile uint64_t tail; volatile uint64_t bounds; volatile uint64_t packet[0]; } __attribute((aligned(32))); #elif L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE struct l2atomic_fifo_data { volatile uint64_t tail __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); /* producer rw, consumer na */ uint64_t pad0[(L2_CACHE_LINE_SIZE-sizeof(uint64_t))/sizeof(uint64_t)]; volatile uint64_t bounds __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); /* producer ro, consumer rw */ uint64_t pad1[(L2_CACHE_LINE_SIZE-sizeof(uint64_t))/sizeof(uint64_t)]; volatile uint64_t packet[0]; /* producer wo, consumer rw */ uint64_t pad2[L2_CACHE_LINE_COUNT_UINT64<<1]; } __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); #endif static inline void l2atomic_fifo_enable (struct l2atomic_fifo_consumer * consumer, struct l2atomic_fifo_producer * producer) { uint64_t npackets = producer->mask+1; unsigned n; for (n = 0; n < npackets; ++n) L2_AtomicStore(&((uint64_t*)producer->packet_base_l2vaddr)[n], 0); consumer->head = 0; L2_AtomicStore((void *)producer->tail_l2vaddr, 0); L2_AtomicStore((void *)consumer->bounds_l2vaddr, npackets-L2_CACHE_LINE_COUNT_UINT64-1); } static inline void l2atomic_fifo_disable (struct l2atomic_fifo_consumer * consumer, struct l2atomic_fifo_producer * producer) { L2_AtomicStore((void *)consumer->bounds_l2vaddr, 0); } static inline void l2atomic_fifo_initialize (struct l2atomic_fifo_consumer * consumer, struct l2atomic_fifo_producer * producer, struct l2atomic_fifo_data * data, uint64_t npackets) { assert(consumer); assert(producer); assert(data); assert(((uintptr_t)data & 0x01F) == 0); /* 32 byte aligned */ #ifdef L2ATOMIC_FIFO_NPACKETS npackets = L2ATOMIC_FIFO_NPACKETS; #endif assert(npackets >= (1 << 8)); /* 256 -- see consume16() */ assert((npackets == (1 << 3)) || /* 8 */ (npackets == (1 << 4)) || /* 16 */ (npackets == (1 << 5)) || /* 32 */ (npackets == (1 << 6)) || /* 64 */ (npackets == (1 << 7)) || /* 128 */ (npackets == (1 << 8)) || /* 256 */ (npackets == (1 << 9)) || /* 512 */ (npackets == (1 << 10)) || /* 1k */ (npackets == (1 << 11)) || /* 2k */ (npackets == (1 << 12)) || /* 4k */ (npackets == (1 << 13)) || /* 8k */ (npackets == (1 << 14)) || /* 16k */ (npackets == (1 << 15)) || /* 32k */ (npackets == (1 << 16)) || /* 64k */ (npackets == (1 << 17)) || /* 128k */ (npackets == (1 << 18)) || /* 256k */ (npackets == (1 << 19))); /* 512k */ consumer->mask = npackets-1; consumer->bounds_l2vaddr = (uintptr_t)&data->bounds; consumer->packet_base_l2vaddr = (uintptr_t)&data->packet[0]; producer->mask = npackets-1; producer->tail_l2vaddr = (uintptr_t)&data->tail; producer->packet_base_l2vaddr = (uintptr_t)&data->packet[0]; #if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE producer->bounds_l2vaddr = (uintptr_t)&data->bounds; #ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS producer->local_bounds = npackets-L2_CACHE_LINE_COUNT_UINT64-1; #endif #endif uint32_t cnk_rc __attribute__ ((unused)); cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_fifo_data) + sizeof(uint64_t) * npackets); assert(0==cnk_rc); l2atomic_fifo_enable(consumer, producer); return; } static inline int l2atomic_fifo_produce (struct l2atomic_fifo_producer * fifo, const uint64_t data) { #if L2ATOMIC_FIFO_PRODUCER_ALGORITHM != L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED assert(0); #endif const uint64_t tail = L2_AtomicLoadIncrementBounded((volatile uint64_t *)fifo->tail_l2vaddr); if (tail != 0x8000000000000000ull) { #ifdef L2ATOMIC_FIFO_NPACKETS const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; #else const uint64_t mask = fifo->mask; #endif const uint64_t offset = (tail & mask) << 0x03ull; #ifdef L2ATOMIC_FIFO_PRODUCER_STORE_FAST volatile uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); *pkt = 0x8000000000000000ull | data; #endif { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ volatile uint64_t *mu_register = (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + 0x030 - PHYMAP_PRIVILEGEDOFFSET); *mu_register = 0; } #ifdef L2ATOMIC_FIFO_MSYNC_PRODUCER ppc_msync(); #endif #ifdef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC L2_AtomicStore((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset), 0x8000000000000000ull | data); #endif return 0; } return -1; } static inline int l2atomic_fifo_produce_wait (struct l2atomic_fifo_producer * fifo, const uint64_t data) { #if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED while (0 != l2atomic_fifo_produce(fifo, data)); return 0; #elif L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE const uint64_t tail = L2_AtomicLoadIncrement((volatile uint64_t *)fifo->tail_l2vaddr); uint64_t bounds = 0; volatile uint64_t * const bounds_l2vaddr = (volatile uint64_t * const)fifo->bounds_l2vaddr; #ifdef L2ATOMIC_FIFO_NPACKETS const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; #else const uint64_t mask = fifo->mask; #endif #ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS const uint64_t local_bounds = fifo->local_bounds; if (local_bounds < tail) { const uint64_t offset = (tail & mask) << 0x03ull; #ifdef L2ATOMIC_FIFO_PRODUCER_STORE_FAST uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); *pkt = 0x8000000000000000ull | data; #endif { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ volatile uint64_t *mu_register = (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + 0x030 - PHYMAP_PRIVILEGEDOFFSET); *mu_register = 0; } #ifdef L2ATOMIC_FIFO_MSYNC_PRODUCER ppc_msync(); #endif #ifdef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC L2_AtomicStore((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset), #endif 0x8000000000000000ull | data); } else { #endif #ifdef L2ATOMIC_FIFO_PRODUCER_CHECK_BOUNDS_FAST while ((bounds = *bounds_l2vaddr) < tail); #endif #ifdef L2ATOMIC_FIFO_PRODUCER_CHECK_BOUNDS_ATOMIC while ((bounds = L2_AtomicLoad((volatile uint64_t *)bounds_l2vaddr)) < tail); #endif const uint64_t offset = (tail & mask) << 0x03ull; #ifdef L2ATOMIC_FIFO_PRODUCER_STORE_FAST uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); *pkt = 0x8000000000000000ull | data; #endif { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ volatile uint64_t *mu_register = (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + 0x030 - PHYMAP_PRIVILEGEDOFFSET); *mu_register = 0; } #ifdef L2ATOMIC_FIFO_MSYNC_PRODUCER ppc_msync(); #endif #ifdef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC L2_AtomicStore((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset), 0x8000000000000000ull | data); #endif #ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS fifo->local_bounds = bounds; } #endif return 0; #endif } static inline int l2atomic_fifo_consume (struct l2atomic_fifo_consumer * fifo, uint64_t * data) { #if L2ATOMIC_FIFO_CONSUMER_ALGORITHM == L2ATOMIC_FIFO_CONSUMER_ALGORITHM_STBOUNDED uint64_t * bounds_l2vaddr = (uint64_t *)fifo->bounds_l2vaddr; const uint64_t bounds = *bounds_l2vaddr; #endif const uint64_t head = fifo->head; #ifdef L2ATOMIC_FIFO_NPACKETS const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; #else const uint64_t mask = fifo->mask; #endif const uint64_t offset = (head & mask) << 0x03ull; #ifdef L2ATOMIC_FIFO_CONSUMER_CLEAR_FAST volatile uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); const uint64_t value = *pkt; #else #ifdef L2ATOMIC_FIFO_CONSUMER_CLEAR_ATOMIC const uint64_t value = L2_AtomicLoadClear((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); #endif #endif if (value & 0x8000000000000000ull) { #ifdef L2ATOMIC_FIFO_CONSUMER_CLEAR_FAST *pkt = 0; #endif *data = value & (~0x8000000000000000ull); fifo->head = head + 1; #if L2ATOMIC_FIFO_CONSUMER_ALGORITHM == L2ATOMIC_FIFO_CONSUMER_ALGORITHM_L2BOUNDED L2_AtomicStoreAdd((volatile uint64_t *)fifo->bounds_l2vaddr, 1); #elif L2ATOMIC_FIFO_CONSUMER_ALGORITHM == L2ATOMIC_FIFO_CONSUMER_ALGORITHM_STBOUNDED *bounds_l2vaddr = bounds + 1; #endif #ifdef L2ATOMIC_FIFO_MSYNC_CONSUMER ppc_msync(); #endif return 0; } return -1; } static inline unsigned l2atomic_fifo_consume16 (struct l2atomic_fifo_consumer * fifo, uint64_t * data) { #ifdef L2ATOMIC_FIFO_CONSUMER_SINGLE return (0 == l2atomic_fifo_consume(fifo, data)); #else #ifdef L2ATOMIC_FIFO_CONSUMER_MULTIPLE const uint64_t head_counter = fifo->head; #ifdef L2ATOMIC_FIFO_NPACKETS const uint64_t fifo_size = L2ATOMIC_FIFO_NPACKETS; const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; #else const uint64_t mask = fifo->mask; #endif const uint64_t head_offset = head_counter & mask; const uint64_t end_offset = head_offset + L2_CACHE_LINE_COUNT_UINT64; const uint64_t count = L2_CACHE_LINE_COUNT_UINT64 - (((~mask) & end_offset) * (end_offset - fifo_size)); volatile uint64_t *ptr = (volatile uint64_t *)(fifo->packet_base_l2vaddr + (head_offset << 0x03ull)); uint64_t i, num_processed = 0; #ifdef DO_CACHE uint64_t cache[L2_CACHE_LINE_COUNT_UINT64*2]; ppc_msync(); for (i = 0; i < count; ++i) { cache[i] = L2_AtomicLoad(ptr + i); } #endif for (i = 0; i < count; ++i) { #ifdef DO_CACHE if (cache[i] & 0x8000000000000000ull) { data[i] = cache[i] & (~0x8000000000000000ull); ++num_processed; L2_AtomicStore(ptr + i, 0); } else { break; } #else const uint64_t value = ptr[i]; //const uint64_t value = L2_AtomicLoadClear(ptr + i); if (value & 0x8000000000000000ull) { data[i] = value & (~0x8000000000000000ull); ++num_processed; ptr[i] = 0; } else { break; } #endif } fifo->head += num_processed; #ifdef L2ATOMIC_FIFO_BOUNDS_ATOMIC L2_AtomicStoreAdd((volatile uint64_t *)fifo->bounds_l2vaddr, num_processed); #else #ifdef L2ATOMIC_FIFO_BOUNDS_FAST uint64_t *bounds = (uint64_t *)fifo->bounds_l2vaddr; ++(*bounds); #endif #endif #ifdef L2ATOMIC_FIFO_MSYNC_CONSUMER ppc_msync(); #endif return num_processed; #endif #endif } static inline int l2atomic_fifo_drain (struct l2atomic_fifo_consumer * consumer, struct l2atomic_fifo_producer * producer, uint64_t * data) { /* The fifo must be disabled before it can be drained */ assert(0 == L2_AtomicLoad((volatile uint64_t *)consumer->bounds_l2vaddr)); const uint64_t head = consumer->head; const uint64_t tail = L2_AtomicLoad((void *)producer->tail_l2vaddr); if (head == tail) { /* The fifo is empty */ return -1; } #ifdef L2ATOMIC_FIFO_NPACKETS const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; #else const uint64_t mask = consumer->mask; #endif const uint64_t offset = (head & mask) << 0x03ull; /* Spin until the next packet is ready */ uint64_t value = 0; volatile uint64_t *ptr = (volatile uint64_t *)(consumer->packet_base_l2vaddr + offset); while (0 == (0x8000000000000000ull & (value = L2_AtomicLoadClear(ptr)))); *data = value & (~0x8000000000000000ull); consumer->head = head + 1; #ifdef L2ATOMIC_FIFO_MSYNC_CONSUMER ppc_msync(); #endif return 0; } static inline int l2atomic_fifo_peek (struct l2atomic_fifo_consumer * fifo, uint64_t * data) { const uint64_t head = fifo->head; const uint64_t mask = fifo->mask; const uint64_t offset = (head & mask) << 0x03ull; const uint64_t value = L2_AtomicLoad((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); if (value & 0x8000000000000000ull) { *data = value & (~0x8000000000000000ull); return 0; } return -1; } static inline void l2atomic_fifo_advance (struct l2atomic_fifo_consumer * fifo) { const uint64_t head = fifo->head; const uint64_t mask = fifo->mask; const uint64_t offset = (head & mask) << 0x03ull; L2_AtomicLoadClear((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); fifo->head = head + 1; L2_AtomicStoreAdd((volatile uint64_t *)fifo->bounds_l2vaddr, 1); return; } static inline unsigned l2atomic_fifo_isempty (struct l2atomic_fifo_consumer * fifo) { const uint64_t head = fifo->head; const uint64_t mask = fifo->mask; const uint64_t offset = (head & mask) << 0x03ull; const uint64_t value = L2_AtomicLoad((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); return (value & 0x8000000000000000ull) == 0; } /* * l2atomic counter data structure */ struct l2atomic_counter_data { volatile uint64_t value; } __attribute((aligned(8))); /* * l2atomic counter access structure */ struct l2atomic_counter { uintptr_t value_l2vaddr; }; static inline void l2atomic_counter_initialize (struct l2atomic_counter * counter, struct l2atomic_counter_data * data) { assert(counter); assert(data); assert(((uintptr_t)data & 0x07) == 0); /* 8 byte aligned */ uint32_t cnk_rc __attribute__ ((unused)); cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_counter_data)); assert(0==cnk_rc); counter->value_l2vaddr = (uintptr_t)&data->value; L2_AtomicStore(&data->value, 0); }; static inline uint64_t l2atomic_counter_increment (struct l2atomic_counter * counter) { return L2_AtomicLoadIncrement((volatile uint64_t *)(counter->value_l2vaddr)); }; static inline uint64_t l2atomic_counter_decrement (struct l2atomic_counter * counter) { return L2_AtomicLoadDecrement((volatile uint64_t *)(counter->value_l2vaddr)); }; static inline uint64_t l2atomic_counter_get (struct l2atomic_counter * counter) { return L2_AtomicLoad((volatile uint64_t *)(counter->value_l2vaddr)); } static inline void l2atomic_counter_set (struct l2atomic_counter * counter, uint64_t new_value) { L2_AtomicStore((volatile uint64_t *)(counter->value_l2vaddr), new_value); }; static inline void l2atomic_counter_add (struct l2atomic_counter * counter, uint64_t add_value) { L2_AtomicStoreAdd((volatile uint64_t *)(counter->value_l2vaddr), add_value); }; /* * l2atomic bounded counter data structure */ struct l2atomic_boundedcounter_data { volatile uint64_t value; volatile uint64_t bounds; } __attribute((aligned(32))); /* * l2atomic bounded counter access structure */ struct l2atomic_boundedcounter { uintptr_t value_l2vaddr; uintptr_t bounds_l2vaddr; }; static inline void l2atomic_boundedcounter_initialize (struct l2atomic_boundedcounter * counter, struct l2atomic_boundedcounter_data * data, uint64_t initial_bounds) { assert(counter); assert(data); assert(((uintptr_t)data & 0x01F) == 0); /* 32 byte aligned */ uint32_t cnk_rc __attribute__ ((unused)); cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_boundedcounter_data)); assert(0==cnk_rc); counter->value_l2vaddr = (uintptr_t)&data->value; counter->bounds_l2vaddr = (uintptr_t)&data->bounds; L2_AtomicStore(&data->value, 0); L2_AtomicStore(&data->bounds, initial_bounds); }; static inline uint64_t l2atomic_boundedcounter_increment_value (struct l2atomic_boundedcounter * counter) { return L2_AtomicLoadIncrementBounded((volatile uint64_t *)(counter->value_l2vaddr)); }; static inline void l2atomic_boundedcounter_add_bounds (struct l2atomic_boundedcounter * counter, uint64_t add_value) { L2_AtomicStoreAdd((volatile uint64_t *)(counter->bounds_l2vaddr), add_value); }; /* * l2atomic barrier data structure */ struct l2atomic_barrier_data { volatile __attribute__((aligned(L1D_CACHE_LINE_SIZE))) uint64_t start; uint64_t participants; volatile __attribute__((aligned(L1D_CACHE_LINE_SIZE))) uint64_t count; } __attribute__((aligned(L1D_CACHE_LINE_SIZE))); /* * l2atomic barrier access structure */ struct l2atomic_barrier { uintptr_t start_l2vaddr; uintptr_t count_l2vaddr; uint64_t participants; }; static inline void l2atomic_barrier_initialize (struct l2atomic_barrier * barrier, struct l2atomic_barrier_data * data, uint64_t participants) { assert(barrier); assert(data); assert(((uintptr_t)data & (L1D_CACHE_LINE_SIZE-1)) == 0); uint32_t cnk_rc __attribute__ ((unused)); cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_barrier_data)); assert(0==cnk_rc); barrier->start_l2vaddr = (uintptr_t)&data->start; barrier->count_l2vaddr = (uintptr_t)&data->count; barrier->participants = participants; data->participants = participants; L2_AtomicStore(&data->start, 0); L2_AtomicStore(&data->count, 0); } static inline void l2atomic_barrier_clone (struct l2atomic_barrier * barrier, struct l2atomic_barrier_data * data) { assert(barrier); assert(data); assert(((uintptr_t)data & (L1D_CACHE_LINE_SIZE-1)) == 0); barrier->start_l2vaddr = (uintptr_t)&data->start; barrier->count_l2vaddr = (uintptr_t)&data->count; barrier->participants = data->participants; } static inline void l2atomic_barrier_enter (struct l2atomic_barrier * barrier) { volatile uint64_t * start_l2vaddr = (volatile uint64_t *)(barrier->start_l2vaddr); const uint64_t start = L2_AtomicLoad(start_l2vaddr); const uint64_t current = L2_AtomicLoadIncrement((volatile uint64_t *)(barrier->count_l2vaddr)) + 1; const uint64_t target = start + barrier->participants; if (current == target) { L2_AtomicStoreAdd(start_l2vaddr, barrier->participants); } else { while (L2_AtomicLoad(start_l2vaddr) < current); } } #endif /* _FI_PROV_BGQ_L2ATOMIC_H_ */