cachelib/allocator/nvmcache/NavySetup.cpp (212 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cachelib/allocator/nvmcache/NavySetup.h"
#include <folly/File.h>
#include <folly/logging/xlog.h>
#include "cachelib/allocator/nvmcache/NavyConfig.h"
#include "cachelib/navy/Factory.h"
#include "cachelib/navy/scheduler/JobScheduler.h"
namespace facebook {
namespace cachelib {
namespace {
// Default value for (almost) 1TB flash device = 5GB reserved for metadata
constexpr double kDefaultMetadataPercent = 0.5;
uint64_t megabytesToBytes(uint64_t mb) { return mb << 20; }
// Return a number that's equal or smaller than @num and aligned on @alignment
uint64_t alignDown(uint64_t num, uint64_t alignment) {
return num - num % alignment;
}
// Return a number that's equal or bigger than @num and aligned on @alignment
uint64_t alignUp(uint64_t num, uint64_t alignment) {
return alignDown(num + alignment - 1, alignment);
}
uint64_t setupBigHash(const navy::BigHashConfig& bigHashConfig,
uint32_t ioAlignSize,
uint64_t totalCacheSize,
uint64_t metadataSize,
cachelib::navy::CacheProto& proto) {
auto bucketSize = bigHashConfig.getBucketSize();
if (bucketSize != alignUp(bucketSize, ioAlignSize)) {
throw std::invalid_argument(
folly::sformat("Bucket size: {} is not aligned to ioAlignSize: {}",
bucketSize, ioAlignSize));
}
// If enabled, BigHash's storage starts after BlockCache's.
const auto sizeReservedForBigHash =
totalCacheSize * bigHashConfig.getSizePct() / 100ul;
const uint64_t bigHashCacheOffset =
alignUp(totalCacheSize - sizeReservedForBigHash, bucketSize);
const uint64_t bigHashCacheSize =
alignDown(totalCacheSize - bigHashCacheOffset, bucketSize);
auto bigHash = cachelib::navy::createBigHashProto();
bigHash->setLayout(bigHashCacheOffset, bigHashCacheSize, bucketSize);
// Bucket Bloom filter size, bytes
//
// Experiments showed that if we have 16 bytes for BF with 25 entries,
// then optimal number of hash functions is 4 and false positive rate
// below 10%.
if (bigHashConfig.isBloomFilterEnabled()) {
// We set 4 hash function unconditionally. This seems to be the best
// for our use case. If BF size to bucket size ratio gets lower, try
// to reduce number of hashes.
constexpr uint32_t kNumHashes = 4;
const uint32_t bitsPerHash =
bigHashConfig.getBucketBfSize() * 8 / kNumHashes;
bigHash->setBloomFilter(kNumHashes, bitsPerHash);
}
proto.setBigHash(std::move(bigHash), bigHashConfig.getSmallItemMaxSize());
if (bigHashCacheOffset <= metadataSize) {
throw std::invalid_argument("NVM cache size is not big enough!");
}
XLOG(INFO) << "metadataSize: " << metadataSize
<< " bigHashCacheOffset: " << bigHashCacheOffset
<< " bigHashCacheSize: " << bigHashCacheSize;
return bigHashCacheOffset;
}
void setupBlockCache(const navy::BlockCacheConfig& blockCacheConfig,
uint64_t blockCacheSize,
uint32_t ioAlignSize,
uint64_t metadataSize,
bool usesRaidFiles,
bool itemDestructorEnabled,
cachelib::navy::CacheProto& proto) {
auto regionSize = blockCacheConfig.getRegionSize();
if (regionSize != alignUp(regionSize, ioAlignSize)) {
throw std::invalid_argument(
folly::sformat("Region size: {} is not aligned to ioAlignSize: {}",
regionSize, ioAlignSize));
}
// Adjust starting size of block cache to ensure it is aligned to region
// size which is what we use for the stripe size when using RAID0Device.
uint64_t blockCacheOffset = metadataSize;
if (usesRaidFiles) {
auto adjustedBlockCacheOffset = alignUp(blockCacheOffset, regionSize);
auto cacheSizeAdjustment = adjustedBlockCacheOffset - blockCacheOffset;
XDCHECK_LT(cacheSizeAdjustment, blockCacheSize);
blockCacheSize -= cacheSizeAdjustment;
blockCacheOffset = adjustedBlockCacheOffset;
}
blockCacheSize = alignDown(blockCacheSize, regionSize);
XLOG(INFO) << "blockcache: starting offset: " << blockCacheOffset
<< ", block cache size: " << blockCacheSize;
auto blockCache = cachelib::navy::createBlockCacheProto();
blockCache->setLayout(blockCacheOffset, blockCacheSize, regionSize);
blockCache->setChecksum(blockCacheConfig.getDataChecksum());
// set eviction policy
auto segmentRatio = blockCacheConfig.getSFifoSegmentRatio();
if (!segmentRatio.empty()) {
blockCache->setSegmentedFifoEvictionPolicy(std::move(segmentRatio));
} else if (blockCacheConfig.isLruEnabled()) {
blockCache->setLruEvictionPolicy();
} else {
blockCache->setFifoEvictionPolicy();
}
blockCache->setCleanRegionsPool(blockCacheConfig.getCleanRegions());
blockCache->setReinsertionConfig(blockCacheConfig.getReinsertionConfig());
blockCache->setNumInMemBuffers(blockCacheConfig.getNumInMemBuffers());
blockCache->setItemDestructorEnabled(itemDestructorEnabled);
blockCache->setPreciseRemove(blockCacheConfig.isPreciseRemove());
proto.setBlockCache(std::move(blockCache));
}
// Setup the CacheProto, includes BigHashProto and BlockCacheProto,
// which is the configuration interface from Navy engine, and can be used to
// create BigHash and BlockCache engines.
//
// @param config the configured NavyConfig
// @param device the flash device
// @param proto the output CacheProto
//
// @throw std::invalid_argument if input arguments are invalid
void setupCacheProtos(const navy::NavyConfig& config,
const navy::Device& device,
cachelib::navy::CacheProto& proto,
const bool itemDestructorEnabled) {
auto getDefaultMetadataSize = [](size_t size, size_t alignment) {
XDCHECK(folly::isPowTwo(alignment));
auto mask = ~(alignment - 1);
return (static_cast<size_t>(kDefaultMetadataPercent * size / 100) & mask);
};
auto ioAlignSize = device.getIOAlignmentSize();
const uint64_t totalCacheSize = device.getSize();
auto metadataSize = config.getDeviceMetadataSize();
if (metadataSize == 0) {
metadataSize = getDefaultMetadataSize(totalCacheSize, ioAlignSize);
}
metadataSize = alignUp(metadataSize, ioAlignSize);
if (metadataSize >= totalCacheSize) {
throw std::invalid_argument{
folly::sformat("Invalid metadata size: {}. Cache size: {}",
metadataSize,
totalCacheSize)};
}
proto.setMetadataSize(metadataSize);
uint64_t blockCacheSize = 0;
// Set up BigHash if enabled
if (config.isBigHashEnabled()) {
auto bigHashCacheOffset = setupBigHash(config.bigHash(), ioAlignSize,
totalCacheSize, metadataSize, proto);
blockCacheSize = bigHashCacheOffset - metadataSize;
} else {
XLOG(INFO) << "metadataSize: " << metadataSize << ". No bighash.";
blockCacheSize = totalCacheSize - metadataSize;
}
// Set up BlockCache if enabled
if (blockCacheSize > 0) {
setupBlockCache(config.blockCache(), blockCacheSize, ioAlignSize,
metadataSize, config.usesRaidFiles(), itemDestructorEnabled,
proto);
}
}
void setAdmissionPolicy(const cachelib::navy::NavyConfig& config,
cachelib::navy::CacheProto& proto) {
const std::string& policyName = config.getAdmissionPolicy();
if (policyName.empty()) {
return;
}
if (policyName == navy::NavyConfig::kAdmPolicyRandom) {
proto.setRejectRandomAdmissionPolicy(config.randomAdmPolicy());
} else if (policyName == navy::NavyConfig::kAdmPolicyDynamicRandom) {
proto.setDynamicRandomAdmissionPolicy(config.dynamicRandomAdmPolicy());
} else {
throw std::invalid_argument{
folly::sformat("invalid policy name {}", policyName)};
}
}
std::unique_ptr<cachelib::navy::JobScheduler> createJobScheduler(
const navy::NavyConfig& config) {
auto readerThreads = config.getReaderThreads();
auto writerThreads = config.getWriterThreads();
auto reqOrderShardsPower = config.getNavyReqOrderingShards();
return cachelib::navy::createOrderedThreadPoolJobScheduler(
readerThreads, writerThreads, reqOrderShardsPower);
}
} // namespace
std::unique_ptr<cachelib::navy::Device> createDevice(
const navy::NavyConfig& config,
std::shared_ptr<navy::DeviceEncryptor> encryptor) {
auto blockSize = config.getBlockSize();
auto maxDeviceWriteSize = config.getDeviceMaxWriteSize();
if (config.usesRaidFiles()) {
auto stripeSize = config.getRaidStripeSize();
return cachelib::navy::createRAIDDevice(
config.getRaidPaths(),
alignDown(config.getFileSize(), stripeSize),
config.getTruncateFile(),
blockSize,
stripeSize,
std::move(encryptor),
maxDeviceWriteSize > 0 ? alignDown(maxDeviceWriteSize, blockSize) : 0);
} else if (config.usesSimpleFile()) {
return cachelib::navy::createFileDevice(
config.getFileName(),
config.getFileSize(),
config.getTruncateFile(),
blockSize,
std::move(encryptor),
maxDeviceWriteSize > 0 ? alignDown(maxDeviceWriteSize, blockSize) : 0);
} else {
return cachelib::navy::createMemoryDevice(config.getFileSize(),
std::move(encryptor), blockSize);
}
}
std::unique_ptr<navy::AbstractCache> createNavyCache(
const navy::NavyConfig& config,
navy::DestructorCallback cb,
bool truncate,
std::shared_ptr<navy::DeviceEncryptor> encryptor,
bool itemDestructorEnabled) {
auto device = createDevice(config, std::move(encryptor));
auto proto = cachelib::navy::createCacheProto();
auto* devicePtr = device.get();
proto->setDevice(std::move(device));
proto->setJobScheduler(createJobScheduler(config));
proto->setMaxConcurrentInserts(config.getMaxConcurrentInserts());
proto->setMaxParcelMemory(megabytesToBytes(config.getMaxParcelMemoryMB()));
setAdmissionPolicy(config, *proto);
proto->setDestructorCallback(cb);
setupCacheProtos(config, *devicePtr, *proto, itemDestructorEnabled);
auto cache = createCache(std::move(proto));
XDCHECK(cache != nullptr);
if (truncate) {
cache->reset();
return cache;
}
if (!cache->recover()) {
XLOG(WARN) << "No recovery data found. Continuing with clean cache.";
}
return cache;
}
} // namespace cachelib
} // namespace facebook