cachelib/allocator/nvmcache/NavyConfig.h (283 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <folly/dynamic.h>
#include <folly/logging/xlog.h>
#include <stdexcept>
#include "cachelib/allocator/nvmcache/BlockCacheReinsertionPolicy.h"
namespace facebook {
namespace cachelib {
namespace cachebench {
template <typename Allocator>
class Cache;
}
namespace navy {
/**
* RandomAPConfig provides APIs for users to configure one of the admission
* policy - "random". Admission policy is one part of NavyConfig.
*
* By this class, users can:
* - set admission probability
* - get the value of admission probability
*/
class RandomAPConfig {
public:
// Set admission probability for "random" policy.
// @throw std::std::invalid_argument if the input value is not in the range
// of [0, 1].
RandomAPConfig& setAdmProbability(double admProbability);
double getAdmProbability() const { return admProbability_; }
private:
// Admission probability in decimal form.
double admProbability_{};
};
/**
* RandomDynamicAPConfig provides APIs for users to configure one of the
* admission policy - "dynamic_random". Admission policy is one part of
* NavyConfig.
*
*
* By this class, users can:
* - set admission target write rate
* - set max write rate
* - set admission suffix length
* - set base size of baseProbability calculation
* - get the values of the above parameters
*/
class DynamicRandomAPConfig {
public:
// Set admission policy's target rate in bytes/s.
// This target is enforced across a window in average. Default to be 0 if not
// set, meaning no rate limiting.
DynamicRandomAPConfig& setAdmWriteRate(uint64_t admWriteRate) noexcept {
admWriteRate_ = admWriteRate;
return *this;
}
// Set the max write rate to device in bytes/s.
// This ensures write at any given second don't exceed this limit despite a
// possibility of writing more to stay within the target rate above.
DynamicRandomAPConfig& setMaxWriteRate(uint64_t maxWriteRate) noexcept {
maxWriteRate_ = maxWriteRate;
return *this;
}
// Set the length of suffix in key to be ignored when hashing for
// probability.
DynamicRandomAPConfig& setAdmSuffixLength(size_t admSuffixLen) noexcept {
admSuffixLen_ = admSuffixLen;
return *this;
}
// Set the Navy item base size for base probability calculation.
// Set this closer to the mean size of objects. The probability is scaled for
// other sizes by using this size as the pivot.
DynamicRandomAPConfig& setAdmProbBaseSize(uint32_t admProbBaseSize) noexcept {
admProbBaseSize_ = admProbBaseSize;
return *this;
}
// Set the range for probability factor.
// Non-positive values in either of the field would make
// both values ignored and the default values from DynamicRandomAP::Config
// will be used.
DynamicRandomAPConfig& setProbFactorRange(double lowerBound,
double upperBound) noexcept {
probFactorLowerBound_ = lowerBound;
probFactorUpperBound_ = upperBound;
return *this;
}
uint64_t getAdmWriteRate() const { return admWriteRate_; }
uint64_t getMaxWriteRate() const { return maxWriteRate_; }
size_t getAdmSuffixLength() const { return admSuffixLen_; }
uint32_t getAdmProbBaseSize() const { return admProbBaseSize_; }
double getProbFactorLowerBound() const { return probFactorLowerBound_; }
double getProbFactorUpperBound() const { return probFactorUpperBound_; }
private:
// Admission policy target rate, bytes/s.
// Zero means no rate limiting.
uint64_t admWriteRate_{0};
// The max write rate to device in bytes/s to stay within the device limit
// of saturation to avoid latency increase.
uint64_t maxWriteRate_{0};
// Length of suffix in key to be ignored when hashing for probability.
size_t admSuffixLen_{0};
// Navy item base size of baseProbability calculation.
size_t admProbBaseSize_{0};
// Lower bound of the probability factor. Non-positive valu ewould be replaced
// the default value from DynamicRandomAP::Config
double probFactorLowerBound_{0};
// Upper bound of the probability factor. Non-positive value would be
// replaced the default value from DynamicRandomAP::Config
double probFactorUpperBound_{0};
};
/**
* BlockCacheReinsertionConfig provides APIs for users to configure BlockCache
* reinsertion policy, whic is a part of NavyConfig.
*
* By this class, user can:
* - enable hits-based OR probability based reinsertion policy (but not both)
*/
class BlockCacheReinsertionConfig {
public:
BlockCacheReinsertionConfig& enableHitsBased(uint8_t hitsThreshold) {
if (pctThreshold_ > 0 || custom_) {
throw std::invalid_argument(
"already set reinsertion percentage threshold, should not set "
"reinsertion hits threshold");
}
hitsThreshold_ = hitsThreshold;
return *this;
}
BlockCacheReinsertionConfig& enablePctBased(unsigned int pctThreshold) {
if (hitsThreshold_ > 0 || custom_) {
throw std::invalid_argument(
"already set reinsertion hits threshold, should not set reinsertion "
"probability threshold");
}
if (pctThreshold > 100) {
throw std::invalid_argument(folly::sformat(
"reinsertion percentage threshold should between 0 and "
"100, but {} is set",
pctThreshold));
}
pctThreshold_ = pctThreshold;
return *this;
}
BlockCacheReinsertionConfig& enableCustom(
std::shared_ptr<BlockCacheReinsertionPolicy> policy) {
if (hitsThreshold_ > 0 || pctThreshold_ > 0) {
throw std::invalid_argument(
"Already set reinsertion hits threshold {}, or reinsertion "
"probability threshold {} while trying to set a custom reinsertion "
"policy.");
}
custom_ = policy;
return *this;
}
BlockCacheReinsertionConfig& validate() {
if ((pctThreshold_ > 0) + (hitsThreshold_ > 0) + (custom_ != nullptr) > 1) {
throw std::invalid_argument(folly::sformat(
"More than one configuration for reinsertion policy is specified: "
"pctThreshold_ {}, hitsThreshold_ {}, custom_ {}",
pctThreshold_, hitsThreshold_, custom_ != nullptr));
}
return *this;
}
uint8_t getHitsThreshold() const { return hitsThreshold_; }
unsigned int getPctThreshold() const { return pctThreshold_; }
std::shared_ptr<BlockCacheReinsertionPolicy> getCustomPolicy() const {
return custom_;
}
private:
// Only one of the field below can be initialized.
// Threshold of a hits based reinsertion policy with Navy BlockCache.
// If an item had been accessed more than that threshold, it will be
// eligible for reinsertion.
uint8_t hitsThreshold_{0};
// Threshold of a percentage based reinsertion policy with Navy BlockCache.
// The percentage value is between 0 and 100 for reinsertion.
unsigned int pctThreshold_{0};
// Custom created reinsertion policy.
std::shared_ptr<BlockCacheReinsertionPolicy> custom_{nullptr};
};
/**
* BlockCacheConfig provides APIs for users to configure BlockCache engine,
* which is one part of NavyConfig.
*
* By this class, users can:
* - enable FIFO or segmented FIFO eviction policy (default is LRU)
* - set number of clean regions
* - enable in-mem buffer (once enabled, the number is 2 * clean regions)
* - set size classes
* - set region size
* - set data checksum
* - get the values of all the above parameters
*/
class BlockCacheConfig {
public:
// Enable FIFO eviction policy (LRU will be disabled).
BlockCacheConfig& enableFifo() noexcept {
lru_ = false;
return *this;
}
// Enable segmented FIFO eviction policy (LRU will be disabled)
// @param sFifoSegmentRatio maps to segments in the order from
// least-important to most-important.
// e.g. {1, 1, 1} gives equal share in each of the 3 segments;
// {1, 2, 3} gives the 1/6th of the items in the first segment (P0
// least important), 2/6th of the items in the second segment
// (P1), and finally 3/6th of the items in the third segment (P2).
BlockCacheConfig& enableSegmentedFifo(
std::vector<unsigned int> sFifoSegmentRatio) noexcept {
sFifoSegmentRatio_ = std::move(sFifoSegmentRatio);
lru_ = false;
return *this;
}
// Enable hit-based reinsertion policy.
// When evicting regions, items that exceed this threshold of access will be
// preserved by reinserting them internally.
// @throw std::invalid_argument if any other reinsertion policy has been
// enabled.
BlockCacheConfig& enableHitsBasedReinsertion(uint8_t hitsThreshold);
// Enable percentage based reinsertion policy.
// This is used for testing where a certain fraction of evicted items
// (governed by the percentage) are always reinserted.
// @throw std::invalid_argument if any other reinsertion policy has
// been enabled or the input value is not in the range of 0~100.
BlockCacheConfig& enablePctBasedReinsertion(unsigned int pctThreshold);
// Enable a customized reinsertion policy created by the user.
// @throw std::invalid_argument if any other reinsertion policy has been
// enabled.
BlockCacheConfig& enableCustomReinsertion(
std::shared_ptr<BlockCacheReinsertionPolicy> policy);
// Set number of clean regions that are maintained for incoming write and
// whether the writes are buffered in-memory.
// Navy needs to maintain sufficient buffers for each clean region that is
// reserved. This ensures each time we obtain a new in-mem buffer, we have a
// clean region to flush it to flash once it's ready.
BlockCacheConfig& setCleanRegions(uint32_t cleanRegions) noexcept;
BlockCacheConfig& setRegionSize(uint32_t regionSize) noexcept {
regionSize_ = regionSize;
return *this;
}
BlockCacheConfig& setDataChecksum(bool dataChecksum) noexcept {
dataChecksum_ = dataChecksum;
return *this;
}
BlockCacheConfig& setPreciseRemove(bool preciseRemove) noexcept {
preciseRemove_ = preciseRemove;
return *this;
}
bool isLruEnabled() const { return lru_; }
const std::vector<unsigned int>& getSFifoSegmentRatio() const {
return sFifoSegmentRatio_;
}
uint32_t getCleanRegions() const { return cleanRegions_; }
uint32_t getNumInMemBuffers() const { return numInMemBuffers_; }
uint32_t getRegionSize() const { return regionSize_; }
bool getDataChecksum() const { return dataChecksum_; }
const BlockCacheReinsertionConfig& getReinsertionConfig() const {
return reinsertionConfig_;
}
bool isPreciseRemove() const { return preciseRemove_; }
private:
// only for cachebench configuration
void setNumInMemBuffers(uint32_t numInMemBuffers) noexcept {
numInMemBuffers_ = numInMemBuffers;
}
private:
// Whether Navy BlockCache will use region-based LRU eviction policy.
bool lru_{true};
// The ratio of segments for segmented FIFO eviction policy.
// Once segmented FIFO is enabled, lru_ will be false.
std::vector<unsigned int> sFifoSegmentRatio_;
// Config for constructing reinsertion policy.
BlockCacheReinsertionConfig reinsertionConfig_;
// Buffer of clean regions to maintain for eviction.
uint32_t cleanRegions_{1};
// Number of Navy BlockCache in-memory buffers.
uint32_t numInMemBuffers_{2};
// Size for a region for Navy BlockCache (must be multiple of
// blockSize_).
uint32_t regionSize_{16 * 1024 * 1024};
// Whether enabling data checksum for Navy BlockCache.
bool dataChecksum_{true};
// Whether to remove an item by checking the key (true) or only the hash value
// (false).
bool preciseRemove_{false};
friend class NavyConfig;
template <typename Allocator>
friend class cachebench::Cache;
};
/**
* BigHashConfig provides APIs for users to configure BigHash engine, which is
* one part of NavyConfig.
*
* By this class, users can:
* - enable BigHash by setting sizePct > 0
* - set maximum item size
* - set bucket size
* - set bloom filter size (0 to disable bloom filter)
* - get the values of all the above parameters
*/
class BigHashConfig {
public:
// Set BigHash device percentage and maximum item size(in bytes) to enable
// BigHash engine. Default value of sizePct and smallItemMaxSize is 0,
// meaning BigHash is not enabled.
// @throw std::invalid_argument if sizePct is not in the range of
// [0, 100].
BigHashConfig& setSizePctAndMaxItemSize(unsigned int sizePct,
uint64_t smallItemMaxSize);
// Set the bucket size in bytes for BigHash engine.
// Default value is 4096.
BigHashConfig& setBucketSize(uint32_t bucketSize) noexcept {
bucketSize_ = bucketSize;
return *this;
}
// Set bloom filter size per bucket in bytes for BigHash engine.
// 0 means bloom filter will not be applied. Default value is 8.
BigHashConfig& setBucketBfSize(uint64_t bucketBfSize) noexcept {
bucketBfSize_ = bucketBfSize;
return *this;
}
bool isBloomFilterEnabled() const { return bucketBfSize_ > 0; }
unsigned int getSizePct() const { return sizePct_; }
uint32_t getBucketSize() const { return bucketSize_; }
uint64_t getBucketBfSize() const { return bucketBfSize_; }
uint64_t getSmallItemMaxSize() const { return smallItemMaxSize_; }
private:
// Percentage of how much of the device out of all is given to BigHash
// engine in Navy, e.g. 50.
unsigned int sizePct_{0};
// Navy BigHash engine's bucket size (must be multiple of the minimum
// device io block size).
// This size determines how big each bucket is and what is the physical
// write granularity onto the device.
uint32_t bucketSize_{4096};
// The bloom filter size per bucket in bytes for Navy BigHash engine
uint64_t bucketBfSize_{8};
// The maximum item size to put into Navy BigHash engine.
uint64_t smallItemMaxSize_{};
};
/**
* NavyConfig provides APIs for users to set up Navy related settings for
* NvmCache.
*
* Notes: the reason why these settings cannot be directly passed to Navy
* internal config navy/Factory.h and setup there is
* because we have logic in "NavySetup.cpp" that translates this input config
* into CacheProto. Therefore, we need this intermediary config in NvmCache
* Config.
*
*/
class NavyConfig {
public:
static constexpr folly::StringPiece kAdmPolicyRandom{"random"};
static constexpr folly::StringPiece kAdmPolicyDynamicRandom{"dynamic_random"};
public:
bool usesSimpleFile() const noexcept { return !fileName_.empty(); }
bool usesRaidFiles() const noexcept { return raidPaths_.size() > 0; }
bool isBigHashEnabled() const { return bigHashConfig_.getSizePct() > 0; }
std::map<std::string, std::string> serialize() const;
// Getters:
// ============ Admission Policy =============
const std::string& getAdmissionPolicy() const { return admissionPolicy_; }
// Get a const DynamicRandomAPConfig to read values of its parameters.
const DynamicRandomAPConfig& dynamicRandomAdmPolicy() const {
return dynamicRandomAPConfig_;
}
// Get a const RandomAPConfig to read values of its parameters.
const RandomAPConfig& randomAdmPolicy() const { return randomAPConfig_; }
// ============ Device settings =============
uint64_t getBlockSize() const { return blockSize_; }
const std::string& getFileName() const;
const std::vector<std::string>& getRaidPaths() const;
uint64_t getDeviceMetadataSize() const { return deviceMetadataSize_; }
uint64_t getFileSize() const { return fileSize_; }
bool getTruncateFile() const { return truncateFile_; }
uint32_t getDeviceMaxWriteSize() const { return deviceMaxWriteSize_; }
uint32_t getRaidStripeSize() const {
return blockCacheConfig_.getRegionSize();
}
// ============ Engine settings =============
// Returns the threshold of classifying an item as small item or large item
// for Navy engine.
uint64_t getSmallItemThreshold() const {
if (!isBigHashEnabled()) {
return 0;
}
return bigHashConfig_.getSmallItemMaxSize();
}
// Return a const BlockCacheConfig to read values of its parameters.
const BigHashConfig& bigHash() const { return bigHashConfig_; }
// Return a const BlockCacheConfig to read values of its parameters.
const BlockCacheConfig& blockCache() const { return blockCacheConfig_; }
// ============ Job scheduler settings =============
unsigned int getReaderThreads() const { return readerThreads_; }
unsigned int getWriterThreads() const { return writerThreads_; }
uint64_t getNavyReqOrderingShards() const { return navyReqOrderingShards_; }
// ============ other settings =============
uint32_t getMaxConcurrentInserts() const { return maxConcurrentInserts_; }
uint64_t getMaxParcelMemoryMB() const { return maxParcelMemoryMB_; }
// Setters:
// Enable "dynamic_random" admission policy.
// @return DynamicRandomAPConfig (for configuration)
// @throw invalid_argument if admissionPolicy_ is not empty
DynamicRandomAPConfig& enableDynamicRandomAdmPolicy();
// Enable "random" admission policy.
// @return RandomAPConfig (for configuration)
// @throw invalid_argument if admissionPolicy_ is not empty
RandomAPConfig& enableRandomAdmPolicy();
// ============ Device settings =============
void setBlockSize(uint64_t blockSize) noexcept { blockSize_ = blockSize; }
// Set the parameters for a simple file.
// @throw std::invalid_argument if RAID files have been already set.
void setSimpleFile(const std::string& fileName,
uint64_t fileSize,
bool truncateFile = false);
// Set the parameters for RAID files.
// @throw std::invalid_argument if a simple file has been already set
// or there is only one or fewer RAID paths.
void setRaidFiles(std::vector<std::string> raidPaths,
uint64_t fileSize,
bool truncateFile = false);
// Set the parameter for a in-memory file.
// This function is only for cachebench and unit tests to create
// a MemoryDevice when no file path is set.
void setMemoryFile(uint64_t fileSize) noexcept { fileSize_ = fileSize; }
void setDeviceMetadataSize(uint64_t deviceMetadataSize) noexcept {
deviceMetadataSize_ = deviceMetadataSize;
}
void setDeviceMaxWriteSize(uint32_t deviceMaxWriteSize) noexcept {
deviceMaxWriteSize_ = deviceMaxWriteSize;
}
// ============ BlockCache settings =============
// Return BlockCacheConfig for configuration.
BlockCacheConfig& blockCache() noexcept { return blockCacheConfig_; }
// ============ BigHash settings =============
// Return BigHashConfig for configuration.
BigHashConfig& bigHash() noexcept { return bigHashConfig_; }
// ============ Job scheduler settings =============
void setReaderAndWriterThreads(unsigned int readerThreads,
unsigned int writerThreads) noexcept {
readerThreads_ = readerThreads;
writerThreads_ = writerThreads;
}
// Set Navy request ordering shards (expressed as power of two).
// @throw std::invalid_argument if the input value is 0.
void setNavyReqOrderingShards(uint64_t navyReqOrderingShards);
// ============ Other settings =============
void setMaxConcurrentInserts(uint32_t maxConcurrentInserts) noexcept {
maxConcurrentInserts_ = maxConcurrentInserts;
}
void setMaxParcelMemoryMB(uint64_t maxParcelMemoryMB) noexcept {
maxParcelMemoryMB_ = maxParcelMemoryMB;
}
private:
// ============ AP settings =============
// Name of the admission policy.
// This could only be "dynamic_random" or "random" (or empty).
std::string admissionPolicy_{""};
DynamicRandomAPConfig dynamicRandomAPConfig_{};
RandomAPConfig randomAPConfig_{};
// ============ Device settings =============
// Navy specific device block size in bytes.
uint64_t blockSize_{4096};
// The file name/path for caching.
std::string fileName_;
// An array of Navy RAID device file paths.
std::vector<std::string> raidPaths_;
// The size of the metadata partition on the Navy device.
uint64_t deviceMetadataSize_{};
// The size of the file that Navy should use.
// 0 means to use the whole device.
uint64_t fileSize_{};
// Whether ask Navy to truncate the file it uses.
bool truncateFile_{false};
// This controls granularity of the writes when we flush the region.
// This is only used when in-mem buffer is enabled.
uint32_t deviceMaxWriteSize_{};
// ============ BlockCache settings =============
BlockCacheConfig blockCacheConfig_{};
// ============ BigHash settings =============
BigHashConfig bigHashConfig_{};
// ============ Job scheduler settings =============
// Number of asynchronous worker thread for read operation.
unsigned int readerThreads_{32};
// Number of asynchronous worker thread for write operation.
unsigned int writerThreads_{32};
// Number of shards expressed as power of two for native request ordering in
// Navy.
// This value needs to be non-zero.
uint64_t navyReqOrderingShards_{20};
// ============ Other settings =============
// Maximum number of concurrent inserts we allow globally for Navy.
// 0 means unlimited.
uint32_t maxConcurrentInserts_{1'000'000};
// Total memory limit for in-flight parcels.
// Once this is reached, requests will be rejected until the parcel
// memory usage gets under the limit.
uint64_t maxParcelMemoryMB_{256};
};
} // namespace navy
} // namespace cachelib
} // namespace facebook