prov/bgq/src/fi_bgq_atomic.c (732 lines of code) (raw):
/*
* Copyright (C) 2016 by Argonne National Laboratory.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "rdma/bgq/fi_bgq.h"
#include <ofi_enosys.h>
#include <complex.h>
/*
* --------------------------- begin: rx atomics ------------------------------
*/
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME(OP) \
FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME_(OP)
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME_(OP) \
FI_BGQ_RX_ATOMIC_DO_ ## OP
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(OP, DT, CTYPE) \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_(OP, DT, CTYPE)
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_(OP, DT, CTYPE) \
void fi_bgq_rx_atomic_ ## OP ## _ ## DT \
(void * buf, void * addr, size_t nbytes) \
{ \
FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME(OP)(buf, addr, CTYPE) \
}
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(OP, DT) \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME_(OP, DT)
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME_(OP, DT) \
fi_bgq_rx_atomic_ ## OP ## _ ## DT
#define FI_BGQ_RX_ATOMIC_DO_MIN(buf_, addr_, ctype) \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
if (buf__[i] < addr__[i]) \
addr__[i] = buf__[i]; \
#define FI_BGQ_RX_ATOMIC_DO_MAX(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
if (buf__[i] > addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_SUM(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] += buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_PROD(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = addr__[i] * buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_LOR(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = (addr__[i] || buf__[i]); \
}
#define FI_BGQ_RX_ATOMIC_DO_LAND(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = (addr__[i] && buf__[i]); \
}
#define FI_BGQ_RX_ATOMIC_DO_BOR_(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] |= buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_BOR(buf_, addr_, ctype) \
{ \
if (sizeof(uint8_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BOR_(buf_, addr_, uint8_t); \
} else if (sizeof(uint16_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BOR_(buf_, addr_, uint16_t); \
} else if (sizeof(uint32_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BOR_(buf_, addr_, uint32_t); \
} else if (sizeof(uint64_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BOR_(buf_, addr_, uint64_t); \
} else if (16 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BOR_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BOR_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
} else if (32 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BOR_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BOR_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BOR_(&(((uint64_t*)buf_)[2]), &(((uint64_t*)addr_)[2]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BOR_(&(((uint64_t*)buf_)[3]), &(((uint64_t*)addr_)[3]), uint64_t); \
} else { \
assert(0); \
} \
}
#define FI_BGQ_RX_ATOMIC_DO_BAND_(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] &= buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_BAND(buf_, addr_, ctype) \
{ \
if (sizeof(uint8_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BAND_(buf_, addr_, uint8_t); \
} else if (sizeof(uint16_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BAND_(buf_, addr_, uint16_t); \
} else if (sizeof(uint32_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BAND_(buf_, addr_, uint32_t); \
} else if (sizeof(uint64_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BAND_(buf_, addr_, uint64_t); \
} else if (16 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BAND_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BAND_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
} else if (32 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BAND_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BAND_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BAND_(&(((uint64_t*)buf_)[2]), &(((uint64_t*)addr_)[2]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BAND_(&(((uint64_t*)buf_)[3]), &(((uint64_t*)addr_)[3]), uint64_t); \
} else { \
assert(0); \
} \
}
#define FI_BGQ_RX_ATOMIC_DO_LXOR(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = ((addr__[i] && !buf__[i]) || \
(!addr__[i] && buf__[i])); \
}
#define FI_BGQ_RX_ATOMIC_DO_BXOR_(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = addr__[i] ^ buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_BXOR(buf_, addr_, ctype) \
{ \
if (sizeof(uint8_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BXOR_(buf_, addr_, uint8_t); \
} else if (sizeof(uint16_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BXOR_(buf_, addr_, uint16_t); \
} else if (sizeof(uint32_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BXOR_(buf_, addr_, uint32_t); \
} else if (sizeof(uint64_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BXOR_(buf_, addr_, uint64_t); \
} else if (16 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BXOR_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BXOR_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
} else if (32 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_BXOR_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BXOR_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BXOR_(&(((uint64_t*)buf_)[2]), &(((uint64_t*)addr_)[2]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_BXOR_(&(((uint64_t*)buf_)[3]), &(((uint64_t*)addr_)[3]), uint64_t); \
} else { \
assert(0); \
} \
}
#define FI_BGQ_RX_ATOMIC_DO_ATOMIC_READ(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
buf__[i] = addr__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_ATOMIC_WRITE(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_CSWAP(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
if (compare__[i] == addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_CSWAP_NE(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
if (compare__[i] != addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_CSWAP_LE(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
if (compare__[i] <= addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_CSWAP_LT(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
if (compare__[i] < addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_CSWAP_GE(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
if (compare__[i] >= addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_CSWAP_GT(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
if (compare__[i] > addr__[i]) \
addr__[i] = buf__[i]; \
}
#define FI_BGQ_RX_ATOMIC_DO_MSWAP_(buf_, addr_, ctype) \
{ \
ctype * buf__ = (ctype *)buf_; \
ctype * addr__ = (ctype *)addr_; \
const size_t count = nbytes / sizeof(ctype); \
const ctype * compare__ = &buf__[count]; \
unsigned i; \
for (i=0; i<count; ++i) \
addr__[i] = \
(buf__[i] & compare__[i]) | \
(addr__[i] & ~compare__[i]); \
}
#define FI_BGQ_RX_ATOMIC_DO_MSWAP(buf_, addr_, ctype) \
{ \
if (sizeof(uint8_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(buf_, addr_, uint8_t); \
} else if (sizeof(uint16_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(buf_, addr_, uint16_t); \
} else if (sizeof(uint32_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(buf_, addr_, uint32_t); \
} else if (sizeof(uint64_t) == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(buf_, addr_, uint64_t); \
} else if (16 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
} else if (32 == sizeof(ctype)) { \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(&(((uint64_t*)buf_)[0]), &(((uint64_t*)addr_)[0]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(&(((uint64_t*)buf_)[1]), &(((uint64_t*)addr_)[1]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(&(((uint64_t*)buf_)[2]), &(((uint64_t*)addr_)[2]), uint64_t); \
FI_BGQ_RX_ATOMIC_DO_MSWAP_(&(((uint64_t*)buf_)[3]), &(((uint64_t*)addr_)[3]), uint64_t); \
} else { \
assert(0); \
} \
}
#define FI_BGQ_RX_ATOMIC_DO_NOOP(buf_, addr_, ctype) {}
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(DT, CTYPE) \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(MIN, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(MAX, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(SUM, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(PROD, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(LOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(LAND, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(BOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(BAND, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(LXOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(BXOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(ATOMIC_READ, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(ATOMIC_WRITE, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP_NE, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP_LE, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP_LT, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP_GE, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP_GT, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(MSWAP, DT, CTYPE);
#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS_COMPLEX(DT, CTYPE) \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(SUM, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(PROD, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(LOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(LAND, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(BOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(LXOR, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(ATOMIC_READ, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(ATOMIC_WRITE, DT, CTYPE); \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(CSWAP, DT, CTYPE);
void fi_bgq_rx_atomic_NOOP (void * addr, void * buf, size_t nbytes) {}
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(INT8, int8_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(UINT8, uint8_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(INT16, int16_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(UINT16, uint16_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(INT32, int32_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(UINT32, uint32_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(INT64, int64_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(UINT64, uint64_t)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(FLOAT, float)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(DOUBLE, double)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS_COMPLEX(FLOAT_COMPLEX, complex float)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS_COMPLEX(DOUBLE_COMPLEX, complex double)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS(LONG_DOUBLE, long double)
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNCS_COMPLEX(LONG_DOUBLE_COMPLEX, complex long double)
#define FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(DT) \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(MIN, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(MAX, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(SUM, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(PROD, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(LOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(LAND, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(BOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(BAND, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(LXOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(BXOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(ATOMIC_READ, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(ATOMIC_WRITE, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP_NE, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP_LE, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP_LT, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP_GE, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP_GT, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(MSWAP, DT)
#define FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES_COMPLEX(DT) \
fi_bgq_rx_atomic_NOOP, \
fi_bgq_rx_atomic_NOOP, \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(SUM, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(PROD, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(LOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(LAND, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(BOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(BAND, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(LXOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(BXOR, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(ATOMIC_READ, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(ATOMIC_WRITE, DT), \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(CSWAP, DT), \
fi_bgq_rx_atomic_NOOP, \
fi_bgq_rx_atomic_NOOP, \
fi_bgq_rx_atomic_NOOP, \
fi_bgq_rx_atomic_NOOP, \
fi_bgq_rx_atomic_NOOP, \
FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(MSWAP, DT)
void
fi_bgq_rx_atomic_dispatch (void * buf, void * addr, size_t nbytes,
enum fi_datatype dt, enum fi_op op)
{
static void (*fi_bgq_rx_atomic_dispatch_table[FI_DATATYPE_LAST][FI_ATOMIC_OP_LAST])(void*, void*, size_t) =
{
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(INT8) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(UINT8) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(INT16) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(UINT16) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(INT32) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(UINT32) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(INT64) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(UINT64) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(FLOAT) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(DOUBLE) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES_COMPLEX(FLOAT) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES_COMPLEX(DOUBLE) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES(LONG_DOUBLE) },
{ FI_BGQ_RX_ATOMIC_DISPATCH_FUNC_NAMES_COMPLEX(LONG_DOUBLE) }
};
fi_bgq_rx_atomic_dispatch_table[dt][op](buf, addr, nbytes);
}
/*
* --------------------------- end: rx atomics --------------------------------
*/
ssize_t fi_bgq_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dst_addr, uint64_t addr,
uint64_t key, enum fi_datatype datatype,
enum fi_op op, void* context)
{
int lock_required = 0;
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
lock_required = 0;
break;
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
lock_required = 1;
break;
default:
return -FI_EINVAL;
}
return fi_bgq_atomic_generic(ep, buf, count, dst_addr,
addr, key, datatype, op, context,
lock_required);
}
ssize_t fi_bgq_fetch_atomic(struct fid_ep *ep,
const void *buf, size_t count,
void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr, uint64_t addr,
uint64_t key, enum fi_datatype datatype,
enum fi_op op, void *context)
{
int lock_required = 0;
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
lock_required = 0;
break;
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
lock_required = 1;
break;
default:
return -FI_EINVAL;
}
return fi_bgq_fetch_atomic_generic(ep,
buf, count, desc,
result, result_desc, dest_addr, addr,
key, datatype, op, context,
lock_required);
}
ssize_t fi_bgq_compare_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr, uint64_t addr,
uint64_t key, enum fi_datatype datatype,
enum fi_op op, void *context)
{
int lock_required = 0;
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
lock_required = 0;
break;
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
lock_required = 1;
break;
default:
return -FI_EINVAL;
}
return fi_bgq_compare_atomic_generic(ep,
buf, count, desc,
compare, compare_desc,
result, result_desc, dest_addr, addr,
key, datatype, op, context,
lock_required);
}
ssize_t fi_bgq_inject_atomic(struct fid_ep *ep,
const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op)
{
int lock_required = 0;
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
lock_required = 0;
break;
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
lock_required = 1;
break;
default:
return -FI_EINVAL;
}
return fi_bgq_inject_atomic_generic(ep,
buf, count,
dest_addr, addr,
key, datatype, op,
lock_required);
}
ssize_t fi_bgq_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
ssize_t fi_bgq_atomic_writemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags)
{
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
return fi_bgq_atomic_writemsg_generic(ep, msg, flags,
0);
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
return fi_bgq_atomic_writemsg_generic(ep, msg, flags,
1);
}
errno = FI_EINVAL;
return -errno;
}
ssize_t fi_bgq_atomic_readwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
struct fi_ioc *resultv,
void **result_desc, size_t result_count,
uint64_t flags)
{
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
return fi_bgq_atomic_readwritemsg_generic(ep, msg,
resultv, result_count, flags,
0);
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
return fi_bgq_atomic_readwritemsg_generic(ep, msg,
resultv, result_count, flags,
1);
}
errno = FI_EINVAL;
return -errno;
}
ssize_t fi_bgq_atomic_compwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev,
void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc,
size_t result_count,
uint64_t flags)
{
struct fi_bgq_ep * bgq_ep;
bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid);
switch (bgq_ep->threading) {
case FI_THREAD_ENDPOINT:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
return fi_bgq_atomic_compwritemsg_generic(ep, msg,
comparev, compare_count,
resultv, result_count,
flags, 0);
case FI_THREAD_FID:
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
return fi_bgq_atomic_compwritemsg_generic(ep, msg,
comparev, compare_count,
resultv, result_count,
flags, 1);
}
errno = FI_EINVAL;
return -errno;
}
int fi_bgq_atomic_writevalid(struct fid_ep *ep, enum fi_datatype datatype,
enum fi_op op, size_t *count)
{
static size_t sizeofdt[FI_DATATYPE_LAST] = {
sizeof(int8_t), /* FI_INT8 */
sizeof(uint8_t), /* FI_UINT8 */
sizeof(int16_t), /* FI_INT16 */
sizeof(uint16_t), /* FI_UINT16 */
sizeof(int32_t), /* FI_INT32 */
sizeof(uint32_t), /* FI_UINT32 */
sizeof(int64_t), /* FI_INT64 */
sizeof(uint64_t), /* FI_UINT64 */
sizeof(float), /* FI_FLOAT */
sizeof(double), /* FI_DOUBLE */
sizeof(complex float), /* FI_FLOAT_COMPLEX */
sizeof(complex double), /* FI_DOUBLE_COMPLEX */
sizeof(long double), /* FI_LONG_DOUBLE */
sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */
};
if ((op > FI_ATOMIC_WRITE) || (datatype >= FI_DATATYPE_LAST)) {
*count = 0;
errno = FI_EOPNOTSUPP;
return -errno;
}
*count = sizeof(union fi_bgq_mu_packet_payload) / sizeofdt[datatype];
return 0;
}
int fi_bgq_atomic_readwritevalid(struct fid_ep *ep,
enum fi_datatype datatype,
enum fi_op op, size_t *count)
{
static size_t sizeofdt[FI_DATATYPE_LAST] = {
sizeof(int8_t), /* FI_INT8 */
sizeof(uint8_t), /* FI_UINT8 */
sizeof(int16_t), /* FI_INT16 */
sizeof(uint16_t), /* FI_UINT16 */
sizeof(int32_t), /* FI_INT32 */
sizeof(uint32_t), /* FI_UINT32 */
sizeof(int64_t), /* FI_INT64 */
sizeof(uint64_t), /* FI_UINT64 */
sizeof(float), /* FI_FLOAT */
sizeof(double), /* FI_DOUBLE */
sizeof(complex float), /* FI_FLOAT_COMPLEX */
sizeof(complex double), /* FI_DOUBLE_COMPLEX */
sizeof(long double), /* FI_LONG_DOUBLE */
sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */
};
if ((op > FI_ATOMIC_WRITE) || (datatype >= FI_DATATYPE_LAST)) {
*count = 0;
errno = FI_EOPNOTSUPP;
return -errno;
}
*count = (sizeof(union fi_bgq_mu_packet_payload) - sizeof(struct fi_bgq_mu_fetch_metadata)) / sizeofdt[datatype];
return 0;
}
int fi_bgq_atomic_compwritevalid(struct fid_ep *ep,
enum fi_datatype datatype,
enum fi_op op, size_t *count)
{
static size_t sizeofdt[FI_DATATYPE_LAST] = {
sizeof(int8_t), /* FI_INT8 */
sizeof(uint8_t), /* FI_UINT8 */
sizeof(int16_t), /* FI_INT16 */
sizeof(uint16_t), /* FI_UINT16 */
sizeof(int32_t), /* FI_INT32 */
sizeof(uint32_t), /* FI_UINT32 */
sizeof(int64_t), /* FI_INT64 */
sizeof(uint64_t), /* FI_UINT64 */
sizeof(float), /* FI_FLOAT */
sizeof(double), /* FI_DOUBLE */
sizeof(complex float), /* FI_FLOAT_COMPLEX */
sizeof(complex double), /* FI_DOUBLE_COMPLEX */
sizeof(long double), /* FI_LONG_DOUBLE */
sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */
};
if ((op < FI_CSWAP) || (op >= FI_ATOMIC_OP_LAST) || (datatype >= FI_DATATYPE_LAST)) {
*count = 0;
errno = FI_EOPNOTSUPP;
return -errno;
}
*count = (sizeof(union fi_bgq_mu_packet_payload) / 2) / sizeofdt[datatype];
return 0;
}
static struct fi_ops_atomic fi_bgq_ops_atomic = {
.size = sizeof(struct fi_ops_atomic),
.write = fi_no_atomic_write,
.writev = fi_no_atomic_writev,
.writemsg = fi_bgq_atomic_writemsg,
.inject = fi_no_atomic_inject,
.readwrite = fi_no_atomic_readwrite,
.readwritev = fi_no_atomic_readwritev,
.readwritemsg = fi_bgq_atomic_readwritemsg,
.compwrite = fi_no_atomic_compwrite,
.compwritev = fi_no_atomic_compwritev,
.compwritemsg = fi_bgq_atomic_compwritemsg,
.writevalid = fi_bgq_atomic_writevalid,
.readwritevalid = fi_bgq_atomic_readwritevalid,
.compwritevalid = fi_bgq_atomic_compwritevalid
};
int fi_bgq_init_atomic_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info)
{
if (!info || !bgq_ep)
goto err;
if (info->caps & FI_ATOMICS ||
(info->tx_attr &&
(info->tx_attr->caps & FI_ATOMICS))) {
bgq_ep->ep_fid.atomic = &fi_bgq_ops_atomic;
}
return 0;
err:
errno = FI_EINVAL;
return -errno;
}
int fi_bgq_enable_atomic_ops(struct fi_bgq_ep *bgq_ep)
{
if (!bgq_ep || !bgq_ep->domain)
goto err;
if (!bgq_ep->ep_fid.atomic) {
/* atomic ops not enabled on this endpoint */
return 0;
}
/* fill in atomic formats */
return 0;
err:
errno = FI_EINVAL;
return -errno;
}
int fi_bgq_finalize_atomic_ops(struct fi_bgq_ep *bgq_ep)
{
return 0;
}