cachelib/shm/SysVShmSegment.cpp (261 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cachelib/shm/SysVShmSegment.h"
#include <folly/hash/Hash.h>
#include <folly/logging/xlog.h>
#include <sys/mman.h>
#include <sys/shm.h>
#include "cachelib/common/Utils.h"
namespace facebook {
namespace cachelib {
constexpr static uint32_t kHugePageSize = 1 << 21; // 2MB
constexpr static uint32_t kModeRWFlags = 0666; // octal rw for ugo
constexpr static uint32_t kModeRFlags = 0444; // octal ro for ugo
namespace detail {
int shmGetImpl(key_t key, size_t size, int flags) {
if (key == IPC_PRIVATE) {
util::throwSystemError(EINVAL, "Cannot create private segment");
}
const int shmid = shmget(key, size, flags);
if (shmid != kInvalidShmId) {
return shmid;
}
switch (errno) {
case EACCES:
case EINVAL:
case ENOSPC:
case ENOMEM:
case ENFILE:
util::throwSystemError(
errno,
folly::sformat(
"Failed to create segment with key {}, size {}, flags {}",
key,
size,
flags));
break;
case ENOENT:
if (!(flags & IPC_CREAT)) { // trying to attach
util::throwSystemError(errno);
} else {
util::throwSystemError(errno, "Invalid errno");
}
break;
case EEXIST:
if (flags & (IPC_CREAT | IPC_EXCL)) { // trying to create
util::throwSystemError(errno);
} else {
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
break;
case EPERM:
#ifdef __linux__
if (flags & SHM_HUGETLB) {
util::throwSystemError(errno);
break;
}
#endif
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
return kInvalidShmId;
}
bool isAddrAligned(const void* const addr) {
return reinterpret_cast<uintptr_t>(addr) % SHMLBA == 0;
}
void* shmAttachImpl(int shmid, const void* addr, int shmFlag) {
if (!isAddrAligned(addr) && (shmFlag & SHM_RND)) {
util::throwSystemError(EINVAL, "address to attach must be page aligned");
}
void* retAddr = shmat(shmid, addr, shmFlag);
if (retAddr != reinterpret_cast<void*>(-1)) {
return retAddr;
}
switch (errno) {
case EACCES:
case EIDRM:
case EINVAL:
case ENOMEM:
util::throwSystemError(
errno,
folly::sformat(
"Failed to attach segment with key {} at addr {}, flags {}",
shmid,
addr,
shmFlag));
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
return nullptr;
}
void shmDtImpl(const void* addr) {
if (!isAddrAligned(addr)) {
util::throwSystemError(EINVAL, "Address not aligned");
}
const int ret = shmdt(addr);
if (ret == 0) {
return;
}
if (errno == EINVAL) {
util::throwSystemError(errno, "Address not attached");
}
}
void shmCtlImpl(int shmid, int cmd, shmid_ds* buf) {
if (cmd != IPC_STAT && cmd != IPC_RMID && cmd != IPC_SET && cmd != SHM_LOCK) {
util::throwSystemError(EINVAL, "Unsupported shmctl operation");
}
const int ret = shmctl(shmid, cmd, buf);
if (ret == 0) {
return;
}
switch (errno) {
// EOVERFLOW and EACCES make sense only for stat
case EOVERFLOW:
case EACCES:
XDCHECK_EQ(cmd, IPC_STAT);
if (cmd != IPC_STAT) {
util::throwSystemError(errno, "Invalid errno");
}
case EFAULT:
util::throwSystemError(errno);
break;
case EINVAL:
case EIDRM:
// EINVAL can only mean invalid shmid in our case.
util::throwSystemError(EIDRM, "Segment does not exist");
break;
case EPERM:
if (cmd != IPC_RMID && cmd != IPC_SET) {
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
util::throwSystemError(errno);
break;
case ENOMEM:
XDCHECK_EQ(cmd, SHM_LOCK);
if (cmd != SHM_LOCK) {
util::throwSystemError(errno, "Invalid errno");
}
util::throwSystemError(errno, "Could not lock memory");
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
}
} // namespace detail
void ensureSizeforHugePage(size_t size) {
if (size % kHugePageSize) {
util::throwSystemError(EINVAL, "Page not aligned to Huge page size");
}
}
int SysVShmSegment::createNewSegment(key_t key,
size_t size,
const ShmSegmentOpts& opts) {
size = detail::getPageAlignedSize(size, opts.pageSize);
int extraFlags = 0;
// size is required and expected to be non zero, page aligned
XDCHECK(detail::isPageAlignedSize(size, opts.pageSize));
#ifndef SHM_HUGE_2MB
#define SHM_HUGE_2MB (21 << MAP_HUGE_SHIFT)
#endif
#ifndef SHM_HUGE_1GB
#define SHM_HUGE_1GB (30 << MAP_HUGE_SHIFT)
#endif
if (opts.pageSize == PageSizeT::TWO_MB) {
extraFlags |= SHM_HUGETLB | SHM_HUGE_2MB;
} else if (opts.pageSize == PageSizeT::ONE_GB) {
extraFlags |= SHM_HUGETLB | SHM_HUGE_1GB;
}
const int flags = IPC_CREAT | IPC_EXCL | kModeRWFlags | extraFlags;
return detail::shmGetImpl(key, size, flags);
}
int SysVShmSegment::attachToExisting(key_t key, const ShmSegmentOpts& opts) {
// size is optional for attach. using anything smaller than existing will
// attach.
const int flags = opts.readOnly ? kModeRFlags : kModeRWFlags;
return detail::shmGetImpl(key, 0, flags);
}
SysVShmSegment::SysVShmSegment(ShmAttachT,
const std::string& name,
ShmSegmentOpts opts)
: ShmBase(std::move(opts), name),
key_(createKeyForName(name)),
shmid_(attachToExisting(key_, opts_)) {
markActive();
createReferenceMapping();
XDCHECK(isActive());
}
SysVShmSegment::SysVShmSegment(ShmNewT,
const std::string& name,
size_t size,
ShmSegmentOpts opts)
: ShmBase(std::move(opts), name),
key_(createKeyForName(name)),
shmid_(createNewSegment(key_, size, opts_)) {
markActive();
createReferenceMapping();
XDCHECK(isActive());
}
void* SysVShmSegment::mapAddress(void* addr) const {
if (!isActive()) {
util::throwSystemError(EINVAL, "Attaching to invalid segment");
}
if (!detail::isPageAlignedAddr(addr, opts_.pageSize)) {
util::throwSystemError(EINVAL, "Unaligned address");
}
int shmFlags = 0; // 0 means RW in shmat
// If users pass in an address, they must make sure that address is unused.
if (addr != nullptr) {
shmFlags |= SHM_REMAP;
}
if (opts_.readOnly) {
shmFlags |= SHM_RDONLY;
}
void* retAddr = detail::shmAttachImpl(shmid_, addr, shmFlags);
XDCHECK(retAddr == addr || addr == nullptr);
return retAddr;
}
void SysVShmSegment::unMap(void* addr) const { detail::shmDtImpl(addr); }
void SysVShmSegment::markForRemoval() {
if (isMarkedForRemoval()) {
return;
}
detail::shmCtlImpl(shmid_, IPC_RMID, nullptr);
markForRemove();
}
size_t SysVShmSegment::getSize() const {
shmid_ds buf = {};
detail::shmCtlImpl(shmid_, IPC_STAT, &buf);
return buf.shm_segsz;
}
bool SysVShmSegment::removeByName(const std::string& name) {
try {
auto key = createKeyForName(name);
const int shmid = detail::shmGetImpl(key, 0, kModeRWFlags);
detail::shmCtlImpl(shmid, IPC_RMID, nullptr);
return true;
} catch (const std::system_error& e) {
if (e.code().value() != ENOENT) {
throw;
}
return false;
}
}
key_t SysVShmSegment::createKeyForName(const std::string& name) noexcept {
// we dont need ftok.
static_assert(std::is_same<KeyType, key_t>::value,
"key type is incompatible");
// key_t is an int
return folly::hash::fnv32(name);
}
void SysVShmSegment::createReferenceMapping() {
referenceMapping_ = detail::shmAttachImpl(shmid_, nullptr, SHM_RDONLY);
const int rv = mprotect(referenceMapping_, getSize(), PROT_NONE);
if (rv != 0) {
util::throwSystemError(errno, "Failure to mprotect reference mapping");
}
}
void SysVShmSegment::deleteReferenceMapping() const {
if (referenceMapping_ != nullptr) {
detail::shmDtImpl(referenceMapping_);
}
}
} // namespace cachelib
} // namespace facebook