include/ylt/util/atomic_shared_ptr.hpp (475 lines of code) (raw):
#include <atomic>
#include <cassert>
#include <climits>
#include <memory>
#include <thread>
#include <type_traits>
#include <vector>
// copy from
// https://github.com/facebook/folly/blob/master/folly/concurrency/detail/AtomicSharedPtr-detail.h
// https://github.com/facebook/folly/blob/master/folly/PackedSyncPtr.h
// https://github.com/facebook/folly/blob/master/folly/concurrency/AtomicSharedPtr.h
// We use folly's atomic smart pointer in x86_64 arch
// In other archs, we use std::atomic_shared_ptr as impl
namespace ylt::util {
#if (defined _M_X64 || defined __x86_64__ || defined __amd64) && \
defined __GLIBCXX__
// This implementation is specific to libstdc++, now accepting
// diffs for other libraries.
// Specifically, this adds support for two things:
// 1) incrementing/decrementing the shared count by more than 1 at a time
// 2) Getting the thing the shared_ptr points to, which may be different from
// the aliased pointer.
namespace detail {
class shared_ptr_internals {
public:
template <typename T, typename... Args>
static std::shared_ptr<T> make_ptr(Args &&...args) {
return std::make_shared<T>(std::forward<Args...>(args...));
}
typedef std::__shared_count<std::_S_atomic> shared_count;
typedef std::_Sp_counted_base<std::_S_atomic> counted_base;
template <typename T>
using CountedPtr = std::shared_ptr<T>;
template <typename T>
static counted_base *get_counted_base(const std::shared_ptr<T> &bar);
static void inc_shared_count(counted_base *base, long count);
template <typename T>
static void release_shared(counted_base *base, long count);
template <typename T>
static T *get_shared_ptr(counted_base *base);
template <typename T>
static T *release_ptr(std::shared_ptr<T> &p);
template <typename T>
static std::shared_ptr<T> get_shared_ptr_from_counted_base(counted_base *base,
bool inc = true);
private:
/* Accessors for private members using explicit template instantiation */
struct access_shared_ptr {
typedef shared_count std::__shared_ptr<const void, std::_S_atomic>::*type;
friend type fieldPtr(access_shared_ptr);
};
struct access_base {
typedef counted_base *shared_count::*type;
friend type fieldPtr(access_base);
};
struct access_use_count {
typedef _Atomic_word counted_base::*type;
friend type fieldPtr(access_use_count);
};
struct access_weak_count {
typedef _Atomic_word counted_base::*type;
friend type fieldPtr(access_weak_count);
};
struct access_counted_ptr_ptr {
typedef const void
*std::_Sp_counted_ptr<const void *, std::_S_atomic>::*type;
friend type fieldPtr(access_counted_ptr_ptr);
};
struct access_shared_ptr_ptr {
typedef const void *std::__shared_ptr<const void, std::_S_atomic>::*type;
friend type fieldPtr(access_shared_ptr_ptr);
};
struct access_refcount {
typedef shared_count std::__shared_ptr<const void, std::_S_atomic>::*type;
friend type fieldPtr(access_refcount);
};
template <typename Tag, typename Tag::type M>
struct Rob {
friend typename Tag::type fieldPtr(Tag) { return M; }
};
};
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_shared_ptr,
&std::__shared_ptr<const void, std::_S_atomic>::_M_refcount>;
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_base,
&shared_ptr_internals::shared_count::_M_pi>;
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_use_count,
&shared_ptr_internals::counted_base::_M_use_count>;
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_weak_count,
&shared_ptr_internals::counted_base::_M_weak_count>;
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_counted_ptr_ptr,
&std::_Sp_counted_ptr<const void *, std::_S_atomic>::_M_ptr>;
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_shared_ptr_ptr,
&std::__shared_ptr<const void, std::_S_atomic>::_M_ptr>;
template struct shared_ptr_internals::Rob<
shared_ptr_internals::access_refcount,
&std::__shared_ptr<const void, std::_S_atomic>::_M_refcount>;
template <typename T>
inline shared_ptr_internals::counted_base *
shared_ptr_internals::get_counted_base(const std::shared_ptr<T> &bar) {
// reinterpret_pointer_cast<const void>
// Not quite C++ legal, but explicit template instantiation access to
// private members requires full type name (i.e. shared_ptr<const void>, not
// shared_ptr<T>)
const std::shared_ptr<const void> &ptr(
reinterpret_cast<const std::shared_ptr<const void> &>(bar));
return (ptr.*fieldPtr(access_shared_ptr{})).*fieldPtr(access_base{});
}
inline void shared_ptr_internals::inc_shared_count(counted_base *base,
long count) {
// Check that we don't exceed the maximum number of atomic_shared_ptrs.
// Consider setting EXTERNAL_COUNT lower if this CHECK is hit.
assert(base->_M_get_use_count() + count < INT_MAX);
__gnu_cxx::__atomic_add_dispatch(&(base->*fieldPtr(access_use_count{})),
count);
}
template <typename T>
inline void shared_ptr_internals::release_shared(counted_base *base,
long count) {
// If count == 1, this is equivalent to base->_M_release()
auto &a = base->*fieldPtr(access_use_count{});
if (__gnu_cxx::__exchange_and_add_dispatch(&a, -count) == count) {
base->_M_dispose();
auto &b = base->*fieldPtr(access_weak_count{});
if (__gnu_cxx::__exchange_and_add_dispatch(&b, -1) == 1) {
base->_M_destroy();
}
}
}
template <typename T>
inline T *shared_ptr_internals::get_shared_ptr(counted_base *base) {
// See if this was a make_shared allocation
auto inplace = base->_M_get_deleter(typeid(std::_Sp_make_shared_tag));
if (inplace) {
return (T *)inplace;
}
// Could also be a _Sp_counted_deleter, but the layout is the same
using derived_type = std::_Sp_counted_ptr<const void *, std::_S_atomic>;
auto ptr = reinterpret_cast<derived_type *>(base);
return (T *)(ptr->*fieldPtr(access_counted_ptr_ptr{}));
}
template <typename T>
inline T *shared_ptr_internals::release_ptr(std::shared_ptr<T> &p) {
auto res = p.get();
std::shared_ptr<const void> &ptr(
reinterpret_cast<std::shared_ptr<const void> &>(p));
ptr.*fieldPtr(access_shared_ptr_ptr{}) = nullptr;
(ptr.*fieldPtr(access_refcount{})).*fieldPtr(access_base{}) = nullptr;
return res;
}
template <typename T>
inline std::shared_ptr<T>
shared_ptr_internals::get_shared_ptr_from_counted_base(counted_base *base,
bool inc) {
if (!base) {
return nullptr;
}
std::shared_ptr<const void> newp;
if (inc) {
inc_shared_count(base, 1);
}
newp.*fieldPtr(access_shared_ptr_ptr{}) =
get_shared_ptr<const void>(base); // _M_ptr
(newp.*fieldPtr(access_refcount{})).*fieldPtr(access_base{}) = base;
// reinterpret_pointer_cast<T>
auto res = reinterpret_cast<std::shared_ptr<T> *>(&newp);
return std::move(*res);
}
template <class T>
class PackedSyncPtr {
// This just allows using this class even with T=void. Attempting
// to use the operator* or operator[] on a PackedSyncPtr<void> will
// still properly result in a compile error.
typedef typename std::add_lvalue_reference<T>::type reference;
public:
/*
* If you default construct one of these, you must call this init()
* function before using it.
*
* (We are avoiding a constructor to ensure gcc allows us to put
* this class in packed structures.)
*/
void init(T *initialPtr = nullptr, uint16_t initialExtra = 0) {
auto intPtr = reinterpret_cast<uintptr_t>(initialPtr);
assert(!(intPtr >> 48));
data_ = intPtr;
setExtra(initialExtra);
}
/*
* Sets a new pointer. You must hold the lock when calling this
* function, or else be able to guarantee no other threads could be
* using this PackedSyncPtr<>.
*/
void set(T *t) {
auto intPtr = reinterpret_cast<uintptr_t>(t);
auto shiftedExtra = uintptr_t(extra()) << 48;
assert(!(intPtr >> 48));
data_ = (intPtr | shiftedExtra);
}
/*
* Get the pointer.
*
* You can call any of these without holding the lock, with the
* normal types of behavior you'll get on x64 from reading a pointer
* without locking.
*/
T *get() const { return reinterpret_cast<T *>(data_ & (-1ull >> 16)); }
T *operator->() const { return get(); }
reference operator*() const { return *get(); }
reference operator[](std::ptrdiff_t i) const { return get()[i]; }
/*
* Access extra data stored in unused bytes of the pointer.
*
* It is ok to call this without holding the lock.
*/
uint16_t extra() const { return data_ >> 48; }
/*
* Don't try to put anything into this that has the high bit set:
* that's what we're using for the mutex.
*
* Don't call this without holding the lock.
*/
void setExtra(uint16_t extra) {
assert(!(extra & 0x8000));
auto ptr = data_ & (-1ull >> 16);
data_ = ((uintptr_t(extra) << 48) | ptr);
}
private:
uintptr_t data_;
};
static_assert(std::is_standard_layout_v<PackedSyncPtr<void>> &&
std::is_trivial_v<PackedSyncPtr<void>>,
"PackedSyncPtr must be kept a POD type.");
static_assert(sizeof(PackedSyncPtr<void>) == 8,
"PackedSyncPtr should be only 8 bytes---something is "
"messed up");
} // namespace detail
template <typename T, typename CountedDetail = detail::shared_ptr_internals>
class atomic_shared_ptr {
using SharedPtr = typename CountedDetail::template CountedPtr<T>;
using BasePtr = typename CountedDetail::counted_base;
using PackedPtr = detail::PackedSyncPtr<BasePtr>;
public:
atomic_shared_ptr() noexcept { init(); }
explicit atomic_shared_ptr(SharedPtr foo) /* noexcept */
: atomic_shared_ptr() {
store(std::move(foo));
}
atomic_shared_ptr(const atomic_shared_ptr<T> &) = delete;
~atomic_shared_ptr() { store(SharedPtr(nullptr)); }
void operator=(SharedPtr desired) /* noexcept */ {
store(std::move(desired));
}
void operator=(const atomic_shared_ptr<T> &) = delete;
bool is_lock_free() const noexcept {
// lock free unless more than EXTERNAL_OFFSET threads are
// contending and they all get unlucky and scheduled out during
// load().
//
// TODO: Could use a lock-free external map to fix this
// corner case.
return true;
}
SharedPtr load(
std::memory_order order = std::memory_order_seq_cst) const noexcept {
auto local = takeOwnedBase(order);
return get_shared_ptr(local, false);
}
/* implicit */ operator SharedPtr() const { return load(); }
void store(SharedPtr n, std::memory_order order =
std::memory_order_seq_cst) /* noexcept */ {
auto newptr = get_newptr(std::move(n));
auto old = ptr_.exchange(newptr, order);
release_external(old);
}
SharedPtr exchange(
SharedPtr n,
std::memory_order order = std::memory_order_seq_cst) /* noexcept */ {
auto newptr = get_newptr(std::move(n));
auto old = ptr_.exchange(newptr, order);
SharedPtr old_ptr;
if (old.get()) {
old_ptr = get_shared_ptr(old);
release_external(old);
}
return old_ptr;
}
bool compare_exchange_weak(
SharedPtr &expected, const SharedPtr &n,
std::memory_order mo = std::memory_order_seq_cst) noexcept {
return compare_exchange_weak(expected, n, mo, mo);
}
bool compare_exchange_weak(SharedPtr &expected, const SharedPtr &n,
std::memory_order success,
std::memory_order failure) /* noexcept */ {
auto newptr = get_newptr(n);
PackedPtr oldptr, expectedptr;
oldptr = takeOwnedBase(success);
if (!owners_eq(oldptr, CountedDetail::get_counted_base(expected))) {
expected = get_shared_ptr(oldptr, false);
release_external(newptr);
return false;
}
expectedptr = oldptr; // Need oldptr to release if failed
if (ptr_.compare_exchange_weak(expectedptr, newptr, success, failure)) {
if (oldptr.get()) {
release_external(oldptr, -1);
}
return true;
}
else {
if (oldptr.get()) {
expected = get_shared_ptr(oldptr, false);
}
else {
expected = SharedPtr(nullptr);
}
release_external(newptr);
return false;
}
}
bool compare_exchange_weak(
SharedPtr &expected, SharedPtr &&desired,
std::memory_order mo = std::memory_order_seq_cst) noexcept {
return compare_exchange_weak(expected, desired, mo, mo);
}
bool compare_exchange_weak(SharedPtr &expected, SharedPtr &&desired,
std::memory_order success,
std::memory_order failure) /* noexcept */ {
return compare_exchange_weak(expected, desired, success, failure);
}
bool compare_exchange_strong(
SharedPtr &expected, const SharedPtr &n,
std::memory_order mo = std::memory_order_seq_cst) noexcept {
return compare_exchange_strong(expected, n, mo, mo);
}
bool compare_exchange_strong(SharedPtr &expected, const SharedPtr &n,
std::memory_order success,
std::memory_order failure) /* noexcept */ {
auto local_expected = expected;
do {
if (compare_exchange_weak(expected, n, success, failure)) {
return true;
}
} while (local_expected == expected);
return false;
}
bool compare_exchange_strong(
SharedPtr &expected, SharedPtr &&desired,
std::memory_order mo = std::memory_order_seq_cst) noexcept {
return compare_exchange_strong(expected, desired, mo, mo);
}
bool compare_exchange_strong(SharedPtr &expected, SharedPtr &&desired,
std::memory_order success,
std::memory_order failure) /* noexcept */ {
return compare_exchange_strong(expected, desired, success, failure);
}
private:
// Matches packed_sync_pointer. Must be > max number of local
// counts. This is the max number of threads that can access this
// atomic_shared_ptr at once before we start blocking.
static constexpr unsigned EXTERNAL_OFFSET{0x2000};
// Bit signifying aliased constructor
static constexpr unsigned ALIASED_PTR{0x4000};
mutable std::atomic<PackedPtr> ptr_;
void add_external(BasePtr *res, int64_t c = 0) const {
assert(res);
CountedDetail::inc_shared_count(res, EXTERNAL_OFFSET + c);
}
void release_external(PackedPtr &res, int64_t c = 0) const {
if (!res.get()) {
return;
}
int64_t count = get_local_count(res) + c;
int64_t diff = EXTERNAL_OFFSET - count;
assert(diff >= 0);
CountedDetail::template release_shared<T>(res.get(), diff);
}
PackedPtr get_newptr(const SharedPtr &n) const {
BasePtr *newval;
unsigned count = 0;
if (!n) {
newval = nullptr;
}
else {
newval = CountedDetail::get_counted_base(n);
if (n.get() != CountedDetail::template get_shared_ptr<T>(newval)) {
// This is an aliased sharedptr. Make an un-aliased one
// by wrapping in *another* shared_ptr.
auto data = CountedDetail::template make_ptr<SharedPtr>(n);
newval = CountedDetail::get_counted_base(data);
count = ALIASED_PTR;
// (add external must happen before data goes out of scope)
add_external(newval);
}
else {
add_external(newval);
}
}
PackedPtr newptr;
newptr.init(newval, count);
return newptr;
}
PackedPtr get_newptr(SharedPtr &&n) const {
BasePtr *newval;
unsigned count = 0;
if (!n) {
newval = nullptr;
}
else {
newval = CountedDetail::get_counted_base(n);
if (n.get() != CountedDetail::template get_shared_ptr<T>(newval)) {
// This is an aliased sharedptr. Make an un-aliased one
// by wrapping in *another* shared_ptr.
auto data = CountedDetail::template make_ptr<SharedPtr>(std::move(n));
newval = CountedDetail::get_counted_base(data);
count = ALIASED_PTR;
CountedDetail::release_ptr(data);
add_external(newval, -1);
}
else {
CountedDetail::release_ptr(n);
add_external(newval, -1);
}
}
PackedPtr newptr;
newptr.init(newval, count);
return newptr;
}
void init() {
PackedPtr data;
data.init();
ptr_.store(data);
}
unsigned int get_local_count(const PackedPtr &p) const {
return p.extra() & ~ALIASED_PTR;
}
// Check pointer equality considering wrapped aliased pointers.
bool owners_eq(PackedPtr &p1, BasePtr *p2) {
bool aliased1 = p1.extra() & ALIASED_PTR;
if (aliased1) {
auto p1a = CountedDetail::template get_shared_ptr_from_counted_base<T>(
p1.get(), false);
return CountedDetail::get_counted_base(p1a) == p2;
}
return p1.get() == p2;
}
SharedPtr get_shared_ptr(const PackedPtr &p, bool inc = true) const {
bool aliased = p.extra() & ALIASED_PTR;
auto res = CountedDetail::template get_shared_ptr_from_counted_base<T>(
p.get(), inc);
if (aliased) {
auto aliasedp =
CountedDetail::template get_shared_ptr_from_counted_base<SharedPtr>(
p.get());
res = *aliasedp;
}
return res;
}
/* Get a reference to the pointer, either from the local batch or
* from the global count.
*
* return is the base ptr, and the previous local count, if it is
* needed for compare_and_swap later.
*/
PackedPtr takeOwnedBase(std::memory_order order) const noexcept {
PackedPtr local, newlocal;
local = ptr_.load(std::memory_order_acquire);
while (true) {
if (!local.get()) {
return local;
}
newlocal = local;
if (get_local_count(newlocal) + 1 > EXTERNAL_OFFSET) {
// spinlock in the rare case we have more than
// EXTERNAL_OFFSET threads trying to access at once.
std::this_thread::yield();
// Force DeterministicSchedule to choose a different thread
local = ptr_.load(std::memory_order_acquire);
}
else {
newlocal.setExtra(newlocal.extra() + 1);
assert(get_local_count(newlocal) > 0);
if (ptr_.compare_exchange_weak(local, newlocal, order)) {
break;
}
}
}
// Check if we need to push a batch from local -> global
auto batchcount = EXTERNAL_OFFSET / 2;
if (get_local_count(newlocal) > batchcount) {
CountedDetail::inc_shared_count(newlocal.get(), batchcount);
putOwnedBase(newlocal.get(), batchcount, order);
}
return newlocal;
}
void putOwnedBase(BasePtr *p, unsigned int count,
std::memory_order mo) const noexcept {
PackedPtr local = ptr_.load(std::memory_order_acquire);
while (true) {
if (local.get() != p) {
break;
}
auto newlocal = local;
if (get_local_count(local) > count) {
newlocal.setExtra(local.extra() - count);
}
else {
// Otherwise it may be the same pointer, but someone else won
// the compare_exchange below, local count was already made
// global. We decrement the global count directly instead of
// the local one.
break;
}
if (ptr_.compare_exchange_weak(local, newlocal, mo)) {
return;
}
}
CountedDetail::template release_shared<T>(p, count);
}
};
#else
#include <shared_mutex>
template <typename T>
class atomic_shared_ptr {
public:
atomic_shared_ptr() noexcept {}
explicit atomic_shared_ptr(std::shared_ptr<T> foo) /* noexcept */
: atomic_shared_ptr() {
store(std::move(foo));
}
atomic_shared_ptr(const atomic_shared_ptr<T> &) = delete;
~atomic_shared_ptr() { store(std::shared_ptr<T>(nullptr)); }
void operator=(std::shared_ptr<T> desired) /* noexcept */ {
store(std::move(desired));
}
void operator=(const atomic_shared_ptr<T> &) = delete;
bool is_lock_free() const noexcept { return false; }
std::shared_ptr<T> load(
std::memory_order order = std::memory_order_seq_cst) const noexcept {
std::shared_lock lock{m};
return ptr;
}
/* implicit */ operator std::shared_ptr<T>() const { return load(); }
void store(
std::shared_ptr<T> n,
std::memory_order order = std::memory_order_seq_cst) /* noexcept */ {
std::unique_lock lock{m};
ptr = std::move(n);
}
private:
mutable std::shared_mutex m;
std::shared_ptr<T> ptr;
};
#endif
} // namespace ylt::util