cachelib/shm/PosixShmSegment.cpp (274 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cachelib/shm/PosixShmSegment.h"
#include <fcntl.h>
#include <folly/logging/xlog.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "cachelib/common/Utils.h"
namespace facebook {
namespace cachelib {
constexpr static mode_t kRWMode = 0666;
typedef struct stat stat_t;
namespace detail {
int shmOpenImpl(const char* name, int flags) {
const int fd = shm_open(name, flags, kRWMode);
if (fd != -1) {
return fd;
}
switch (errno) {
case EEXIST:
case EMFILE:
case ENFILE:
case EACCES:
util::throwSystemError(errno);
break;
case ENAMETOOLONG:
case EINVAL:
util::throwSystemError(errno, "Invalid segment name");
break;
case ENOENT:
if (!(flags & O_CREAT)) {
util::throwSystemError(errno);
} else {
XDCHECK(false);
// FIXME: posix says that ENOENT is thrown only when O_CREAT
// is not set. However, it seems to be set even when O_CREAT
// was set and the parent of path name does not exist.
util::throwSystemError(errno, "Invalid errno");
}
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
return kInvalidFD;
}
void unlinkImpl(const char* const name) {
const int ret = shm_unlink(name);
if (ret == 0) {
return;
}
switch (errno) {
case ENOENT:
case EACCES:
util::throwSystemError(errno);
break;
case ENAMETOOLONG:
case EINVAL:
util::throwSystemError(errno, "Invalid segment name");
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
}
void ftruncateImpl(int fd, size_t size) {
const int ret = ftruncate(fd, size);
if (ret == 0) {
return;
}
switch (errno) {
case EBADF:
case EINVAL:
util::throwSystemError(errno);
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
}
void fstatImpl(int fd, stat_t* buf) {
const int ret = fstat(fd, buf);
if (ret == 0) {
return;
}
switch (errno) {
case EBADF:
case ENOMEM:
case EOVERFLOW:
util::throwSystemError(errno);
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
}
void* mmapImpl(
void* addr, size_t length, int prot, int flags, int fd, off_t offset) {
void* ret = mmap(addr, length, prot, flags, fd, offset);
if (ret != MAP_FAILED) {
return ret;
}
switch (errno) {
case EACCES:
case EAGAIN:
if (flags & MAP_LOCKED) {
util::throwSystemError(ENOMEM);
break;
}
case EBADF:
case EINVAL:
case ENFILE:
case ENODEV:
case ENOMEM:
case EPERM:
case ETXTBSY:
case EOVERFLOW:
util::throwSystemError(errno);
break;
default:
XDCHECK(false);
util::throwSystemError(errno, "Invalid errno");
}
return nullptr;
}
void munmapImpl(void* addr, size_t length) {
const int ret = munmap(addr, length);
if (ret == 0) {
return;
} else if (errno == EINVAL) {
util::throwSystemError(errno);
} else {
XDCHECK(false);
util::throwSystemError(EINVAL, "Invalid errno");
}
}
} // namespace detail
PosixShmSegment::PosixShmSegment(ShmAttachT,
const std::string& name,
ShmSegmentOpts opts)
: ShmBase(std::move(opts), createKeyForName(name)),
fd_(getExisting(getName(), opts_)) {
XDCHECK_NE(fd_, kInvalidFD);
markActive();
createReferenceMapping();
}
PosixShmSegment::PosixShmSegment(ShmNewT,
const std::string& name,
size_t size,
ShmSegmentOpts opts)
: ShmBase(std::move(opts), createKeyForName(name)),
fd_(createNewSegment(getName())) {
markActive();
resize(size);
XDCHECK(isActive());
XDCHECK_NE(fd_, kInvalidFD);
// this ensures that the segment lives while the object lives.
createReferenceMapping();
}
PosixShmSegment::~PosixShmSegment() {
try {
// delete the reference mapping so the segment can be deleted if its
// marked to be.
deleteReferenceMapping();
} catch (const std::system_error& e) {
}
// need to close the fd without throwing any exceptions. so we call close
// directly.
if (fd_ != kInvalidFD) {
const int ret = close(fd_);
if (ret != 0) {
XDCHECK_NE(errno, EIO);
XDCHECK_NE(errno, EINTR);
XDCHECK_EQ(errno, EBADF);
XDCHECK(!errno);
}
}
}
int PosixShmSegment::createNewSegment(const std::string& name) {
constexpr static int createFlags = O_RDWR | O_CREAT | O_EXCL;
return detail::shmOpenImpl(name.c_str(), createFlags);
}
int PosixShmSegment::getExisting(const std::string& name,
const ShmSegmentOpts& opts) {
int flags = opts.readOnly ? O_RDONLY : O_RDWR;
return detail::shmOpenImpl(name.c_str(), flags);
}
void PosixShmSegment::markForRemoval() {
if (isActive()) {
// we still have the fd open. so we can use it to perform ftruncate
// even after marking for removal through unlink. The fd does not get
// recycled until we actually destroy this object.
removeByName(getName());
markForRemove();
} else {
XDCHECK(false);
}
}
bool PosixShmSegment::removeByName(const std::string& segmentName) {
try {
auto key = createKeyForName(segmentName);
detail::unlinkImpl(key.c_str());
return true;
} catch (const std::system_error& e) {
// unlink is opaque unlike sys-V api where its through the shmid. Hence
// if someone has already unlinked it for us, we just let it pass.
if (e.code().value() != ENOENT) {
throw;
}
return false;
}
}
size_t PosixShmSegment::getSize() const {
if (isActive() || isMarkedForRemoval()) {
stat_t buf = {};
detail::fstatImpl(fd_, &buf);
return buf.st_size;
} else {
throw std::runtime_error(folly::sformat(
"Trying to get size of segment with name {} in an invalid state",
getName()));
}
return 0;
}
void PosixShmSegment::resize(size_t size) const {
size = detail::getPageAlignedSize(size, opts_.pageSize);
XDCHECK(isActive() || isMarkedForRemoval());
if (isActive() || isMarkedForRemoval()) {
XDCHECK_NE(fd_, kInvalidFD);
detail::ftruncateImpl(fd_, size);
} else {
throw std::runtime_error(folly::sformat(
"Trying to resize segment with name {} in an invalid state",
getName()));
}
}
void* PosixShmSegment::mapAddress(void* addr) const {
size_t size = getSize();
if (!detail::isPageAlignedSize(size, opts_.pageSize) ||
!detail::isPageAlignedAddr(addr, opts_.pageSize)) {
util::throwSystemError(EINVAL, "Address/size not aligned");
}
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
#endif
#ifndef MAP_HUGE_1GB
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
#endif
int flags = MAP_SHARED;
if (opts_.pageSize == PageSizeT::TWO_MB) {
flags |= MAP_HUGETLB | MAP_HUGE_2MB;
} else if (opts_.pageSize == PageSizeT::ONE_GB) {
flags |= MAP_HUGETLB | MAP_HUGE_1GB;
}
// If users pass in an address, they must make sure that address is unused.
if (addr != nullptr) {
flags |= MAP_FIXED;
}
const int prot = opts_.readOnly ? PROT_READ : PROT_WRITE | PROT_READ;
void* retAddr = detail::mmapImpl(addr, size, prot, flags, fd_, 0);
// if there was hint for mapping, then fail if we cannot respect this
// because we want to be specific about mapping to exactly that address.
if (retAddr != nullptr && addr != nullptr && retAddr != addr) {
util::throwSystemError(EINVAL, "Address already mapped");
}
XDCHECK(retAddr == addr || addr == nullptr);
return retAddr;
}
void PosixShmSegment::unMap(void* addr) const {
detail::munmapImpl(addr, getSize());
}
std::string PosixShmSegment::createKeyForName(
const std::string& name) noexcept {
// ensure that the slash is always there in the head. repetitive
// slash is fine.
if (name.empty() || name[0] != '/') {
return "/" + name;
} else {
return name;
}
}
void PosixShmSegment::createReferenceMapping() {
// create a mapping that lasts the life of this object. mprotect it to
// ensure there are no actual accesses.
referenceMapping_ = detail::mmapImpl(
nullptr, detail::getPageSize(), PROT_NONE, MAP_SHARED, fd_, 0);
XDCHECK(referenceMapping_ != nullptr);
}
void PosixShmSegment::deleteReferenceMapping() const {
if (referenceMapping_ != nullptr) {
detail::munmapImpl(referenceMapping_, detail::getPageSize());
}
}
} // namespace cachelib
} // namespace facebook