aios/storage/indexlib/indexlib/index/partition_info.cpp (417 lines of code) (raw):

/* * Copyright 2014-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "indexlib/index/partition_info.h" #include "indexlib/common/term_hint_parser.h" #include "indexlib/index/normal/deletionmap/deletion_map_reader.h" #include "indexlib/index_base/index_meta/partition_meta.h" #include "indexlib/index_base/index_meta/segment_temperature_meta.h" #include "indexlib/index_base/index_meta/temperature_doc_info.h" #include "indexlib/index_base/segment/in_memory_segment.h" #include "indexlib/index_base/segment/join_segment_directory.h" #include "indexlib/index_base/segment/online_segment_directory.h" #include "indexlib/index_base/segment/realtime_segment_directory.h" using namespace std; using namespace indexlib::index_base; namespace indexlib { namespace index { IE_LOG_SETUP(index, PartitionInfo); PartitionInfo::PartitionInfo(const PartitionInfo& other) : mBaseDocIds(other.mBaseDocIds) , mSegIdToBaseDocId(other.mSegIdToBaseDocId) , mVersion(other.mVersion) , mPartInfoHint(other.mPartInfoHint) , mOrderRanges(other.mOrderRanges) , mUnorderRange(other.mUnorderRange) , mPartMeta(other.mPartMeta) , mDeletionMapReader(other.mDeletionMapReader) , mPartitionMetrics(other.mPartitionMetrics) , mTemperatureDocRange(other.mTemperatureDocRange) { if (other.mSubPartitionInfo) { mSubPartitionInfo.reset(other.mSubPartitionInfo->Clone()); } } void PartitionInfo::Init(const Version& version, const PartitionMetaPtr& partitionMeta, const SegmentDataVector& segmentDatas, const std::vector<InMemorySegmentPtr>& dumpingSegments, const DeletionMapReaderPtr& deletionMapReader) { // kv/kkv table does not have deletionMapReader assert(version.GetSegmentCount() == segmentDatas.size()); mVersion = version; mPartMeta = partitionMeta; mDeletionMapReader = deletionMapReader; InitVersion(dumpingSegments); InitBaseDocIds(segmentDatas, dumpingSegments); InitPartitionMetrics(segmentDatas, dumpingSegments); InitOrderedDocIdRanges(segmentDatas); InitUnorderedDocIdRange(); InitPartitionInfoHint(segmentDatas, dumpingSegments); InitTemperatureMetaInfo(version, segmentDatas, dumpingSegments); } void PartitionInfo::InitTemperatureMetaInfo(const Version& version, const SegmentDataVector& segmentDatas, const std::vector<InMemorySegmentPtr>& dumpingSegments) { if (IsKeyValueTable()) { return; } mTemperatureDocRange.reset(new TemperatureDocInfo()); mTemperatureDocRange->Init(version, segmentDatas, dumpingSegments, mPartitionMetrics.totalDocCount); } bool PartitionInfo::GetTemperatureDocIdRanges(int32_t hintValues, DocIdRangeVector& ranges) const { if (IsKeyValueTable()) { return false; } if (!mTemperatureDocRange) { return false; } return mTemperatureDocRange->GetTemperatureDocIdRanges(hintValues, ranges); } globalid_t PartitionInfo::GetGlobalId(docid_t docId) const { if (IsKeyValueTable()) { return INVALID_GLOBALID; } if (mBaseDocIds.size() == 0 || docId >= (docid_t)mPartitionMetrics.totalDocCount) { return INVALID_GLOBALID; } globalid_t gid = INVALID_GLOBALID; for (int32_t i = (int32_t)mBaseDocIds.size() - 1; i >= 0; --i) { if (docId >= mBaseDocIds[i]) { docid_t localDocId = docId - mBaseDocIds[i]; segmentid_t segId = mVersion[i]; if (mDeletionMapReader->IsDeleted(docId)) { return INVALID_GLOBALID; } gid = localDocId; gid |= ((globalid_t)segId << 32); break; } } return gid; } docid_t PartitionInfo::GetSegmentDocCount(size_t idx) const { if (IsKeyValueTable()) { return INVALID_DOCID; } if (idx == mBaseDocIds.size() - 1) { return mPartitionMetrics.totalDocCount - mBaseDocIds[idx]; } return mBaseDocIds[idx + 1] - mBaseDocIds[idx]; } docid_t PartitionInfo::GetDocId(globalid_t gid) const { if (IsKeyValueTable()) { return INVALID_DOCID; } docid_t docId = INVALID_DOCID; int32_t segId = gid >> 32; for (size_t i = 0; i < mBaseDocIds.size(); ++i) { if (mVersion[i] == segId) { docid_t localDocId = gid & 0xFFFFFFFF; if (localDocId >= GetSegmentDocCount(i)) { return INVALID_GLOBALID; } docId = localDocId + mBaseDocIds[i]; if (mDeletionMapReader->IsDeleted(docId)) { return INVALID_DOCID; } break; } } return docId; } segmentid_t PartitionInfo::GetSegmentId(docid_t docId) const { if (IsKeyValueTable()) { return INVALID_DOCID; } if (docId == INVALID_DOCID || static_cast<size_t>(docId) >= GetTotalDocCount()) { return INVALID_SEGMENTID; } size_t pos = upper_bound(mBaseDocIds.begin(), mBaseDocIds.end(), docId) - mBaseDocIds.begin(); if (pos == 0) { return INVALID_SEGMENTID; } return mVersion[pos - 1]; } pair<segmentid_t, docid_t> PartitionInfo::GetLocalDocInfo(docid_t docId) const { if (docId == INVALID_DOCID || docId >= static_cast<int64_t>(GetTotalDocCount())) { return make_pair(INVALID_SEGMENTID, INVALID_DOCID); } size_t pos = upper_bound(mBaseDocIds.begin(), mBaseDocIds.end(), docId) - mBaseDocIds.begin(); if (pos == 0) { return make_pair(INVALID_SEGMENTID, INVALID_DOCID); } size_t idx = pos - 1; segmentid_t segId = mVersion[idx]; return make_pair(segId, docId - mBaseDocIds[idx]); } bool PartitionInfo::GetDiffDocIdRanges(const PartitionInfoHint& infoHint, DocIdRangeVector& docIdRanges) const { if (IsKeyValueTable()) { return false; } if (infoHint.lastIncSegmentId == INVALID_SEGMENTID && infoHint.lastRtSegmentId == INVALID_SEGMENTID) { return false; } docIdRanges.clear(); DocIdRange range; if (GetIncDiffDocIdRange(infoHint.lastIncSegmentId, range)) { docIdRanges.push_back(range); } if (GetRtDiffDocIdRange(infoHint.lastRtSegmentId, infoHint.lastRtSegmentDocCount, range)) { docIdRanges.push_back(range); } return !docIdRanges.empty(); } void PartitionInfo::InitVersion(const std::vector<InMemorySegmentPtr>& dumpingSegments) { for (const InMemorySegmentPtr& inMemSegment : dumpingSegments) { mVersion.AddSegment(inMemSegment->GetSegmentId()); } } void PartitionInfo::InitBaseDocIds(const SegmentDataVector& segmentDatas, const std::vector<InMemorySegmentPtr>& dumpingSegments) { if (IsKeyValueTable()) { return; } uint32_t totalDocCount = 0; for (size_t i = 0; i < segmentDatas.size(); i++) { const SegmentData& segData = segmentDatas[i]; mBaseDocIds.push_back(segData.GetBaseDocId()); mSegIdToBaseDocId.insert(make_pair(segData.GetSegmentId(), segData.GetBaseDocId())); totalDocCount += segData.GetSegmentInfo()->docCount; } for (const InMemorySegmentPtr& inMemSegment : dumpingSegments) { mBaseDocIds.push_back(totalDocCount); mSegIdToBaseDocId.insert(make_pair(inMemSegment->GetSegmentId(), totalDocCount)); totalDocCount += inMemSegment->GetSegmentInfo()->docCount; } // // for realtime building segment // if (segmentDatas.size() > 0) // { // const SegmentData& lastSegData = segmentDatas.back(); // mBaseDocIds.push_back(lastSegData.GetBaseDocId() + // lastSegData.GetSegmentInfo()->docCount); // } // else // { // mBaseDocIds.push_back(0); // } } docid_t PartitionInfo::GetBaseDocId(segmentid_t segId) const { if (IsKeyValueTable()) { return INVALID_DOCID; } const auto it = mSegIdToBaseDocId.find(segId); if (it == mSegIdToBaseDocId.end()) { return INVALID_DOCID; } return it->second; } void PartitionInfo::InitPartitionMetrics(const SegmentDataVector& segmentDatas, const std::vector<InMemorySegmentPtr>& dumpingSegments) { mPartitionMetrics.segmentCount = segmentDatas.size() + dumpingSegments.size(); mPartitionMetrics.delDocCount = mDeletionMapReader ? mDeletionMapReader->GetDeletedDocCount() : 0; size_t totalDocCount = 0; size_t incDocCount = 0; for (size_t i = 0; i < segmentDatas.size(); ++i) { const SegmentData& segData = segmentDatas[i]; totalDocCount += segData.GetSegmentInfo()->docCount; if (OnlineSegmentDirectory::IsIncSegmentId(segData.GetSegmentId())) { incDocCount += segData.GetSegmentInfo()->docCount; } } for (const InMemorySegmentPtr& inMemSegment : dumpingSegments) { totalDocCount += inMemSegment->GetSegmentInfo()->docCount; } mPartitionMetrics.totalDocCount = totalDocCount; mPartitionMetrics.incDocCount = incDocCount; } void PartitionInfo::InitOrderedDocIdRanges(const SegmentDataVector& segmentDatas) { if (IsKeyValueTable()) { return; } mOrderRanges.clear(); if (mPartMeta->Size() == 0) { return; } for (size_t i = 0; i < segmentDatas.size(); i++) { const SegmentData& segData = segmentDatas[i]; if (!OnlineSegmentDirectory::IsIncSegmentId(segData.GetSegmentId())) { break; } docid_t begin = segData.GetBaseDocId(); docid_t end = begin + segData.GetSegmentInfo()->docCount; mOrderRanges.push_back(DocIdRange(begin, end)); } } bool PartitionInfo::GetOrderedDocIdRanges(DocIdRangeVector& ranges) const { if (IsKeyValueTable()) { return false; } if (mOrderRanges.empty()) { return false; } ranges = mOrderRanges; return true; } bool PartitionInfo::GetUnorderedDocIdRange(DocIdRange& range) const { if (IsKeyValueTable()) { return false; } if (mUnorderRange.first >= mUnorderRange.second) { return false; } range = mUnorderRange; return true; } void PartitionInfo::InitUnorderedDocIdRange() { if (IsKeyValueTable()) { return; } if (mOrderRanges.size() == 0) { mUnorderRange.first = (docid_t)0; mUnorderRange.second = (docid_t)mPartitionMetrics.totalDocCount; } else { mUnorderRange.first = mOrderRanges[mOrderRanges.size() - 1].second; mUnorderRange.second = (docid_t)mPartitionMetrics.totalDocCount; } } void PartitionInfo::InitPartitionInfoHint(const SegmentDataVector& segmentDatas, const std::vector<InMemorySegmentPtr>& dumpingSegments) { mPartInfoHint.lastRtSegmentId = INVALID_SEGMENTID; if (dumpingSegments.empty()) { for (int32_t i = segmentDatas.size() - 1; i >= 0; i--) { segmentid_t segId = segmentDatas[i].GetSegmentId(); if (RealtimeSegmentDirectory::IsRtSegmentId(segId)) { mPartInfoHint.lastRtSegmentId = segId; mPartInfoHint.lastRtSegmentDocCount = segmentDatas[i].GetSegmentInfo()->docCount; break; } } } else { mPartInfoHint.lastRtSegmentId = dumpingSegments.back()->GetSegmentId(); mPartInfoHint.lastRtSegmentDocCount = dumpingSegments.back()->GetSegmentInfo()->docCount; } mPartInfoHint.lastIncSegmentId = INVALID_SEGMENTID; for (int32_t i = segmentDatas.size() - 1; i >= 0; i--) { segmentid_t segId = segmentDatas[i].GetSegmentId(); if (OnlineSegmentDirectory::IsIncSegmentId(segId)) { mPartInfoHint.lastIncSegmentId = segId; break; } } } void PartitionInfo::AddInMemorySegment(const InMemorySegmentPtr& inMemSegment) { assert(inMemSegment); if (!IsKeyValueTable()) { mBaseDocIds.push_back(mPartitionMetrics.totalDocCount); } segmentid_t segId = inMemSegment->GetSegmentId(); mPartitionMetrics.segmentCount++; mPartInfoHint.lastRtSegmentId = segId; mPartInfoHint.lastRtSegmentDocCount = inMemSegment->GetSegmentInfo()->docCount; mVersion.AddSegment(segId); mPartitionMetrics.totalDocCount += inMemSegment->GetSegmentInfo()->docCount; if (!IsKeyValueTable()) { mUnorderRange.second = mPartitionMetrics.totalDocCount; } if (!IsKeyValueTable() && mTemperatureDocRange && !mTemperatureDocRange->IsEmptyInfo()) { mTemperatureDocRange->AddNewSegmentInfo(TemperatureProperty::HOT, segId, mPartitionMetrics.totalDocCount, MAX_DOCID); } if (mSubPartitionInfo) { const InMemorySegmentPtr& subInMemSegment = inMemSegment->GetSubInMemorySegment(); mSubPartitionInfo->AddInMemorySegment(subInMemSegment); } } bool PartitionInfo::NeedUpdate(const index_base::InMemorySegmentPtr& inMemSegment) const { assert(inMemSegment); assert(mPartInfoHint.lastRtSegmentId == inMemSegment->GetSegmentId()); uint32_t lastRtSegmentDocCount = inMemSegment->GetSegmentInfo()->docCount; // assume that if main partition doc count is not changed, // sub partition doc count is not changed too. return (lastRtSegmentDocCount > mPartInfoHint.lastRtSegmentDocCount); } void PartitionInfo::UpdateInMemorySegment(const InMemorySegmentPtr& inMemSegment) { assert(inMemSegment); assert(mPartInfoHint.lastRtSegmentId == inMemSegment->GetSegmentId()); if (mDeletionMapReader) { mPartitionMetrics.delDocCount = mDeletionMapReader->GetDeletedDocCount(); } const SegmentInfoPtr& segInfo = inMemSegment->GetSegmentInfo(); size_t lastRtSegmentDocCount = segInfo->docCount; if (lastRtSegmentDocCount > mPartInfoHint.lastRtSegmentDocCount) { size_t increaseDocCount = lastRtSegmentDocCount - mPartInfoHint.lastRtSegmentDocCount; mPartInfoHint.lastRtSegmentDocCount = lastRtSegmentDocCount; mPartitionMetrics.totalDocCount += increaseDocCount; if (!IsKeyValueTable()) { mUnorderRange.second = mPartitionMetrics.totalDocCount; } } if (mSubPartitionInfo) { const InMemorySegmentPtr& subInMemSegment = inMemSegment->GetSubInMemorySegment(); mSubPartitionInfo->UpdateInMemorySegment(subInMemSegment); } } PartitionInfo* PartitionInfo::Clone() { return new PartitionInfo(*this); } bool PartitionInfo::GetIncDiffDocIdRange(segmentid_t lastIncSegId, DocIdRange& range) const { if (IsKeyValueTable()) { return false; } size_t segmentCount = mVersion.GetSegmentCount(); for (size_t i = 0; i < segmentCount; ++i) { if (!OnlineSegmentDirectory::IsIncSegmentId(mVersion[i]) || mVersion[i] <= lastIncSegId) { continue; } if (static_cast<size_t>(mBaseDocIds[i]) >= mPartitionMetrics.incDocCount) { return false; } range = DocIdRange(mBaseDocIds[i], mPartitionMetrics.incDocCount); return true; } return false; } bool PartitionInfo::GetRtDiffDocIdRange(segmentid_t lastRtSegmentId, size_t lastRtSegmentDocCount, DocIdRange& range) const { if (IsKeyValueTable()) { return false; } if (lastRtSegmentId == mPartInfoHint.lastRtSegmentId && lastRtSegmentDocCount == mPartInfoHint.lastRtSegmentDocCount) { return false; } size_t totalDocCount = mPartitionMetrics.totalDocCount; size_t segmentCount = mVersion.GetSegmentCount(); for (size_t i = 0; i < segmentCount; ++i) { if (mVersion[i] == lastRtSegmentId) { range = DocIdRange(mBaseDocIds[i] + lastRtSegmentDocCount, totalDocCount); return range.first < range.second; } } for (size_t i = 0; i < segmentCount; ++i) { if (mVersion[i] > lastRtSegmentId && RealtimeSegmentDirectory::IsRtSegmentId(mVersion[i])) { range = DocIdRange(mBaseDocIds[i], totalDocCount); return range.first < range.second; } } return false; } void PartitionInfo::TEST_AssertEqual(const PartitionInfo& other) { assert(mBaseDocIds == other.mBaseDocIds); assert(mSegIdToBaseDocId == other.mSegIdToBaseDocId); assert(mVersion == other.mVersion); assert(mPartInfoHint == other.mPartInfoHint); assert(mOrderRanges == other.mOrderRanges); assert(mUnorderRange == other.mUnorderRange); mPartMeta->AssertEqual(*other.mPartMeta.get()); assert(mDeletionMapReader->GetDeletedDocCount() == other.mDeletionMapReader->GetDeletedDocCount()); assert(mPartitionMetrics == other.mPartitionMetrics); if (mSubPartitionInfo != nullptr && other.mSubPartitionInfo != nullptr) { mSubPartitionInfo->TEST_AssertEqual(*other.mSubPartitionInfo.get()); } else { assert(mSubPartitionInfo == nullptr && other.mSubPartitionInfo == nullptr); } } }} // namespace indexlib::index