cachelib/navy/block_cache/RegionManager.h (130 lines of code) (raw):

/* * Copyright (c) Facebook, Inc. and its affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <folly/container/F14Map.h> #include <cassert> #include <memory> #include <mutex> #include <utility> #include "cachelib/common/AtomicCounter.h" #include "cachelib/navy/block_cache/EvictionPolicy.h" #include "cachelib/navy/block_cache/Region.h" #include "cachelib/navy/block_cache/Types.h" #include "cachelib/navy/common/Buffer.h" #include "cachelib/navy/common/Device.h" #include "cachelib/navy/common/Types.h" #include "cachelib/navy/scheduler/JobScheduler.h" #include "cachelib/navy/serialization/RecordIO.h" #include "cachelib/navy/serialization/Serialization.h" namespace facebook { namespace cachelib { namespace navy { // Callback that is used to clear index. // @rid Region ID // @buffer Buffer with region data, valid during callback invocation // Returns number of slots evicted using RegionEvictCallback = std::function<uint32_t(RegionId rid, BufferView buffer)>; // Callback that is used to clean up region. // @rid Region ID // @buffer Buffer with region data, valid during callback invocation using RegionCleanupCallback = std::function<void(RegionId rid, BufferView buffer)>; // Size class or stack allocator. Thread safe. Syncs access, reclaims regions // Controls the allocation of regions, status (open for read/write), and // eviction. Region manager doesn't have internal locks. External caller must // take care of locking. class RegionManager { public: // Constructs a Region Manager. // // @param numRegions number of regions // @param regionSize size of the region // @param baseOffset base offset of the region // @param device reference to device // @param numCleanRegions How many regions reclamator maintains in // the clean pool // @param scheduler JobScheduler to run reclamation jobs // @param evictCb Callback invoked when region evicted // @param cleanupCb Callback invoked when region cleaned up // @param policy eviction policy // @param numInMemBuffers number of in memory buffers // @param numPriorities max number of priorities allowed for // regions // @param inMemBufFlushRetryLimit max number of flushing retry times for // in-mem buffer RegionManager(uint32_t numRegions, uint64_t regionSize, uint64_t baseOffset, Device& device, uint32_t numCleanRegions, JobScheduler& scheduler, RegionEvictCallback evictCb, RegionCleanupCallback cleanupCb, std::unique_ptr<EvictionPolicy> policy, uint32_t numInMemBuffers, uint16_t numPriorities, uint16_t inMemBufFlushRetryLimit); RegionManager(const RegionManager&) = delete; RegionManager& operator=(const RegionManager&) = delete; // Gets a region from a valid region ID. Region& getRegion(RegionId rid) { XDCHECK(rid.valid()); return *regions_[rid.index()]; } // Gets a const region from a valid region ID. const Region& getRegion(RegionId rid) const { XDCHECK(rid.valid()); return *regions_[rid.index()]; } // Flushes the in memory buffer attached to a region in either async or // sync mode. // In async mode, a flush job will be added to a job scheduler; // In sync mode, the function will not end until the flush work succeeds. void doFlush(RegionId rid, bool async); // Returns the size of one region. uint64_t regionSize() const { return regionSize_; } // Gets a region to evict. RegionId evict(); // Promote a region. If this region was still buffered in-mem, // this would be a no-op. void touch(RegionId rid); // Calling track on tracked regions is noop. void track(RegionId rid); // Resets all region internal state. void reset(); // Atomically loads the current sequence number (in memory_order_acquire // order). // Sequence number increases when a reclamation finished. Since reclamation // may start during reading, by checking whether the sequence number changes, // we avoid reading a region that has been reclaimed. uint64_t getSeqNumber() const { return seqNumber_.load(std::memory_order_acquire); } // Converts @RelAddress to @AbsAddress. AbsAddress toAbsolute(RelAddress ra) const { return AbsAddress{ra.offset() + ra.rid().index() * regionSize_}; } // Converts @AbsAddress to @RelAddress. RelAddress toRelative(AbsAddress aa) const { // Compiler optimizes to use one division instruction return RelAddress{RegionId(aa.offset() / regionSize_), uint32_t(aa.offset() % regionSize_)}; } // Assigns a buffer from buffer pool. std::unique_ptr<Buffer> claimBufferFromPool(); // Returns the buffer to the pool. void returnBufferToPool(std::unique_ptr<Buffer> buf) { { std::lock_guard<std::mutex> bufLock{bufferMutex_}; buffers_.push_back(std::move(buf)); } numInMemBufActive_.dec(); } // Writes buffer @buf at the @addr. // @addr must be the address returned by Region::open(OpenMode::Write) // @buf may be mutated and will be de-allocated at the end of this void write(RelAddress addr, Buffer buf); // Returns a buffer with data read from the device the @addr of size bytes // @addr must be the address returned by Region::open(OpenMode::Read). // // On success the returned buffer will have same size as "size" argument. // Caller must check the size of the buffer returned to determine if this // succeeded or not. Buffer read(const RegionDescriptor& desc, RelAddress addr, size_t size) const; // Flushes all in memory buffers to the device and then issues device flush. void flush(); // Flushes the in memory buffer attached to a region. // Returns true if the flush succeeds and the buffer is detached from the // region; false otherwise. // // Caller is expected to call flushBuffer until true is returned or retry // times reach the limit. This routine is idempotent and is safe to call // multiple times until detachBuffer is done. Region::FlushRes flushBuffer(const RegionId& rid); // Detaches the buffer from the region and returns the buffer to pool. // Caller is expected to call this until it returns true. // // @returns false if there are active readers when detaching the buffer; // true otherwise. bool detachBuffer(const RegionId& rid); // Cleans up the in memory buffer when flushing failure reach the retry limit. // Returns true if the cleanup succeeds and the buffer is detached from the // region; false otherwise. // // Caller is expected to call cleanupBufferOnFlushFailure until true is // returned. This routine is idempotent and is safe to call multiple times // until detachBuffer is done. bool cleanupBufferOnFlushFailure(const RegionId& rid); // Releases a region that was cleaned up due to in-mem buffer flushing // failure. void releaseCleanedupRegion(RegionId rid); // Stores region information in a Thrift object for all regions. void persist(RecordWriter& rw) const; // Resets RegionManager and recovers region data. Throws std::exception on // failure. void recover(RecordReader& rr); // Exports RegionManager stats via CounterVisitor. void getCounters(const CounterVisitor& visitor) const; // Opens a region for reading and returns the region descriptor. // // @param rid region ID // @param seqNumber the sequence number aqcuired before opening the region // for read; it is used to determine whether a reclamation // happened during reading RegionDescriptor openForRead(RegionId rid, uint64_t seqNumber); // Closes the region and consumes the region descriptor. void close(RegionDescriptor&& desc); // Fetches a clean region from the @cleanRegions_ list and schedules reclaim // jobs to refill the list. If in-mem buffer mode is enabled, a buffer will be // attached to the fetched clean region. // Returns OpenStatus::Ready if all the operations are successful; // OpenStatus::Retry otherwise. OpenStatus getCleanRegion(RegionId& rid); // Tries to get a free region first, otherwise evicts one and schedules region // cleanup job (which will add the region to the clean list). JobExitCode startReclaim(); // Releases a region that was evicted during region reclamation. // // @param rid region ID // @param startTime time when a reclamation starts; // it is used to count the reclamation time duration void releaseEvictedRegion(RegionId rid, std::chrono::nanoseconds startTime); // Evicts a region by calling @evictCb_ during region reclamation. void doEviction(RegionId rid, BufferView buffer) const; private: using LockGuard = std::lock_guard<std::mutex>; uint64_t physicalOffset(RelAddress addr) const { return baseOffset_ + toAbsolute(addr).offset(); } bool deviceWrite(RelAddress addr, Buffer buf); bool isValidIORange(uint32_t offset, uint32_t size) const; OpenStatus assignBufferToRegion(RegionId rid); // Initializes the eviction policy. Even on a clean start, we will track all // the regions. The difference is that these regions will have no items in // them and can be evicted right away. void resetEvictionPolicy(); const uint16_t numPriorities_{}; const uint16_t inMemBufFlushRetryLimit_{}; const uint32_t numRegions_{}; const uint64_t regionSize_{}; const uint64_t baseOffset_{}; Device& device_; const std::unique_ptr<EvictionPolicy> policy_; std::unique_ptr<std::unique_ptr<Region>[]> regions_; mutable AtomicCounter externalFragmentation_; mutable AtomicCounter physicalWrittenCount_; mutable AtomicCounter reclaimRegionErrors_; mutable std::mutex cleanRegionsMutex_; std::vector<RegionId> cleanRegions_; const uint32_t numCleanRegions_{}; std::atomic<uint64_t> seqNumber_{0}; uint32_t reclaimsScheduled_{0}; JobScheduler& scheduler_; const RegionEvictCallback evictCb_; const RegionCleanupCallback cleanupCb_; // To understand naming here, let me explain difference between "reclamation" // and "eviction". Cache evicts item and makes it inaccessible via lookup. It // is an item level operation. When we say "reclamation" about regions we // refer to wiping an entire region for reuse. As part of reclamation, every // item in the region gets evicted. mutable AtomicCounter reclaimCount_; mutable AtomicCounter reclaimTimeCountUs_; mutable AtomicCounter evictedCount_; // Stats to keep track of inmem buffer usage mutable AtomicCounter numInMemBufActive_; mutable AtomicCounter numInMemBufWaitingFlush_; mutable AtomicCounter numInMemBufFlushRetries_; mutable AtomicCounter numInMemBufFlushFailures_; mutable AtomicCounter numInMemBufCleanupRetries_; const uint32_t numInMemBuffers_{0}; // Locking order is region lock, followed by bufferMutex_; mutable std::mutex bufferMutex_; std::vector<std::unique_ptr<Buffer>> buffers_; }; } // namespace navy } // namespace cachelib } // namespace facebook