aios/storage/indexlib/index/inverted_index/config/InvertedIndexConfig.cpp (776 lines of code) (raw):

/* * Copyright 2014-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "indexlib/index/inverted_index/config/InvertedIndexConfig.h" #include "autil/EnvUtil.h" #include "autil/StringTokenizer.h" #include "indexlib/base/Constant.h" #include "indexlib/base/Types.h" #include "indexlib/config/ConfigDefine.h" #include "indexlib/config/FieldConfig.h" #include "indexlib/config/FileCompressConfig.h" #include "indexlib/index/ann/Common.h" #include "indexlib/index/inverted_index/Constant.h" #include "indexlib/index/inverted_index/config/AdaptiveDictionaryConfig.h" #include "indexlib/index/inverted_index/config/DictionaryConfig.h" #include "indexlib/index/inverted_index/config/HighFreqVocabularyCreator.h" #include "indexlib/index/inverted_index/config/HighFrequencyVocabulary.h" #include "indexlib/index/inverted_index/config/InvertedIndexConfigSerializer.h" #include "indexlib/index/inverted_index/config/PayloadConfig.h" #include "indexlib/index/inverted_index/config/TruncateProfileConfig.h" #include "indexlib/index/inverted_index/config/TruncateTermVocabulary.h" #include "indexlib/index/primary_key/Common.h" #include "indexlib/util/Exception.h" using namespace std; using namespace autil; using namespace autil::legacy; using namespace autil::legacy::json; using namespace indexlib::file_system; using namespace indexlib::config; using namespace indexlib::util; namespace indexlibv2::config { AUTIL_LOG_SETUP(indexlib.config, InvertedIndexConfig); #define INDEX_FORMAT_VERSION_ENV_STR "INDEXLIB_DEFAULT_INVERTED_INDEX_FORMAT_VERSION" const format_versionid_t InvertedIndexConfig::BINARY_FORMAT_VERSION = 1; format_versionid_t InvertedIndexConfig::DEFAULT_FORMAT_VERSION = autil::EnvUtil::getEnv(INDEX_FORMAT_VERSION_ENV_STR, (format_versionid_t)0); struct InvertedIndexConfig::Impl { indexid_t indexId = INVALID_INDEXID; indexid_t parentIndexId = INVALID_INDEXID; std::string indexName; InvertedIndexType invertedIndexType = it_unknown; std::string analyzer; std::shared_ptr<AdaptiveDictionaryConfig> adaptiveDictConfig; // not copy std::shared_ptr<DictionaryConfig> dictConfig; // not copy std::shared_ptr<HighFrequencyVocabulary> highFreqVocabulary; // not copy std::shared_ptr<FileCompressConfig> fileCompressConfig; std::shared_ptr<FileCompressConfigV2> fileCompressConfigV2; std::string useTruncateProfiles; indexlib::index::HighFrequencyTermPostingType highFrequencyTermPostingType = indexlib::index::hp_bitmap; std::shared_ptr<TruncateTermVocabulary> truncateTermVocabulary; optionflag_t optionFlag = OPTION_FLAG_ALL; InvertedIndexConfig::IndexShardingType shardingType = InvertedIndexConfig::IST_NO_SHARDING; std::vector<std::shared_ptr<InvertedIndexConfig>> shardingIndexConfigs; // not copy std::vector<std::shared_ptr<InvertedIndexConfig>> truncateIndexConfigs; // not copy indexlib::IndexStatus status = indexlib::is_normal; indexlib::schema_opid_t ownerOpId = indexlib::INVALID_SCHEMA_OP_ID; format_versionid_t formatVersionId = DEFAULT_FORMAT_VERSION; format_versionid_t maxSupportedFormatVersionId = BINARY_FORMAT_VERSION; uint32_t bloomFilterMultipleNum = 0; string nonTruncIndexName; // not copy bool isReferenceCompress = false; bool isHashTypedDictionary = false; bool isIndexUpdatable = false; bool isPatchCompressed = false; bool isVirtual = false; bool isShortListVbyteCompress = false; bool hasTruncate = false; indexlib::config::PayloadConfig payloadConfig; Impl() {} Impl(const std::string& indexName, InvertedIndexType invertedIndexType) : indexName(indexName) , invertedIndexType(invertedIndexType) , optionFlag(invertedIndexType == it_expack ? EXPACK_OPTION_FLAG_ALL : OPTION_FLAG_ALL) { } Impl(const Impl& other) : indexId(other.indexId) , parentIndexId(other.parentIndexId) , indexName(other.indexName) , invertedIndexType(other.invertedIndexType) , analyzer(other.analyzer) , fileCompressConfig(other.fileCompressConfig) , fileCompressConfigV2(other.fileCompressConfigV2) , useTruncateProfiles(other.useTruncateProfiles) , highFrequencyTermPostingType(other.highFrequencyTermPostingType) , optionFlag(other.optionFlag) , shardingType(other.shardingType) , status(other.status) , ownerOpId(other.ownerOpId) , formatVersionId(other.formatVersionId) , maxSupportedFormatVersionId(other.maxSupportedFormatVersionId) , bloomFilterMultipleNum(other.bloomFilterMultipleNum) , isReferenceCompress(other.isReferenceCompress) , isHashTypedDictionary(other.isHashTypedDictionary) , isIndexUpdatable(other.isIndexUpdatable) , isPatchCompressed(other.isPatchCompressed) , isVirtual(other.isVirtual) , isShortListVbyteCompress(other.isShortListVbyteCompress) , hasTruncate(other.hasTruncate) { } }; InvertedIndexConfig::Iterator::Iterator() {} InvertedIndexConfig::Iterator::Iterator(const std::vector<std::shared_ptr<FieldConfig>>& fieldConfigs) : _fieldConfigs(fieldConfigs) { } InvertedIndexConfig::Iterator::Iterator(const std::shared_ptr<FieldConfig>& fieldConfig) { _fieldConfigs.push_back(fieldConfig); } InvertedIndexConfig::Iterator::~Iterator() {} bool InvertedIndexConfig::Iterator::HasNext() const { return _idx < _fieldConfigs.size(); } std::shared_ptr<FieldConfig> InvertedIndexConfig::Iterator::Next() { if (_idx < _fieldConfigs.size()) { return _fieldConfigs[_idx++]; } return std::shared_ptr<FieldConfig>(); } void InvertedIndexConfig::ResetDefaultFormatVersion() { format_versionid_t targetId = autil::EnvUtil::getEnv(INDEX_FORMAT_VERSION_ENV_STR, (format_versionid_t)0); format_versionid_t currentId = InvertedIndexConfig::DEFAULT_FORMAT_VERSION; if (targetId != currentId) { cout << "reset inverted index format version " << "from " << currentId << " to " << targetId << endl; InvertedIndexConfig::DEFAULT_FORMAT_VERSION = targetId; } } InvertedIndexConfig::InvertedIndexConfig() : _impl(std::make_unique<Impl>()) {} InvertedIndexConfig::InvertedIndexConfig(const string& indexName, InvertedIndexType invertedIndexType) : _impl(std::make_unique<Impl>(indexName, invertedIndexType)) { } InvertedIndexConfig::InvertedIndexConfig(const InvertedIndexConfig& other) : _impl(std::make_unique<Impl>(*other._impl)) { } InvertedIndexConfig& InvertedIndexConfig::operator=(const InvertedIndexConfig& other) { if (&other != this) { _impl = std::make_unique<Impl>(*other._impl); } return *this; } InvertedIndexConfig::~InvertedIndexConfig() {} int32_t InvertedIndexConfig::GetFieldIdxInPack(fieldid_t id) const { return -1; } Status InvertedIndexConfig::CheckEqual(const InvertedIndexConfig& other) const { CHECK_CONFIG_EQUAL(_impl->indexId, other._impl->indexId, "_impl->indexId not equal"); CHECK_CONFIG_EQUAL(_impl->parentIndexId, other._impl->parentIndexId, "_impl->parentIndexId not equal"); CHECK_CONFIG_EQUAL(_impl->indexName, other._impl->indexName, "_impl->indexName not equal"); CHECK_CONFIG_EQUAL(_impl->invertedIndexType, other._impl->invertedIndexType, "_impl->invertedIndexType not equal"); CHECK_CONFIG_EQUAL(_impl->shardingType, other._impl->shardingType, "_impl->shardingType not equal"); CHECK_CONFIG_EQUAL(_impl->optionFlag, other._impl->optionFlag, "_impl->optionFlag not equal"); CHECK_CONFIG_EQUAL(_impl->shardingIndexConfigs.size(), other._impl->shardingIndexConfigs.size(), "_impl->shardingIndexConfigs size not equal"); CHECK_CONFIG_EQUAL(_impl->formatVersionId, other._impl->formatVersionId, "_impl->formatVersionId not equal"); CHECK_CONFIG_EQUAL(_impl->isShortListVbyteCompress, other._impl->isShortListVbyteCompress, "_impl->isShortListVbyteCompress not equal"); for (size_t i = 0; i < _impl->shardingIndexConfigs.size(); i++) { auto status = _impl->shardingIndexConfigs[i]->CheckEqual(*other._impl->shardingIndexConfigs[i]); RETURN_IF_STATUS_ERROR(status, "sharding index config [%s] not equal", _impl->shardingIndexConfigs[i]->GetIndexName().c_str()); } return Status::OK(); } void InvertedIndexConfig::Check() const { if (_impl->isIndexUpdatable) { bool isIndexUpdatable = (_impl->invertedIndexType == it_number) || (_impl->invertedIndexType == it_number_int8) || (_impl->invertedIndexType == it_number_int16) || (_impl->invertedIndexType == it_number_int32) || (_impl->invertedIndexType == it_number_int64) || (_impl->invertedIndexType == it_number_uint8) || (_impl->invertedIndexType == it_number_uint16) || (_impl->invertedIndexType == it_number_uint32) || (_impl->invertedIndexType == it_number_uint64) || (_impl->invertedIndexType == it_string); isIndexUpdatable = isIndexUpdatable && (_impl->optionFlag == 0); if (!isIndexUpdatable) { INDEXLIB_FATAL_ERROR(Schema, "index [%s] does not support [%s = true].", _impl->indexName.c_str(), INDEX_UPDATABLE.c_str()); } } if ((_impl->optionFlag & of_position_payload) && !(_impl->optionFlag & of_position_list)) { INDEXLIB_FATAL_ERROR(Schema, "position payload flag is 1 but no position list. index name [%s]", GetIndexName().c_str()); } if (!_impl->dictConfig && !_impl->adaptiveDictConfig && _impl->highFrequencyTermPostingType == indexlib::index::hp_both) { INDEXLIB_FATAL_ERROR(Schema, "index[%s] error: high_frequency_term_posting_type is set to both" " without high_frequency_dictionary[%d]/high_frequency_adaptive_dictionary[%d]", _impl->indexName.c_str(), _impl->dictConfig ? 1 : 0, _impl->adaptiveDictConfig ? 1 : 0); } if (_impl->isReferenceCompress && ((_impl->optionFlag & of_term_frequency) && !(_impl->optionFlag & of_tf_bitmap))) { INDEXLIB_FATAL_ERROR(Schema, "reference_compress does not support tf(not tf_bitmap), index name [%s]", GetIndexName().c_str()); } if (_impl->formatVersionId > _impl->maxSupportedFormatVersionId) { INDEXLIB_FATAL_ERROR(Schema, "format_verison_id [%d] over max supported value [%d], index name [%s]", _impl->formatVersionId, _impl->maxSupportedFormatVersionId, GetIndexName().c_str()); } bool supportFileCompress = (_impl->invertedIndexType == it_number) || (_impl->invertedIndexType == it_number_int8) || (_impl->invertedIndexType == it_number_int16) || (_impl->invertedIndexType == it_number_int32) || (_impl->invertedIndexType == it_number_int64) || (_impl->invertedIndexType == it_number_uint8) || (_impl->invertedIndexType == it_number_uint16) || (_impl->invertedIndexType == it_number_uint32) || (_impl->invertedIndexType == it_number_uint64) || (_impl->invertedIndexType == it_string) || (_impl->invertedIndexType == it_text) || (_impl->invertedIndexType == it_pack) || (_impl->invertedIndexType == it_expack) || (_impl->invertedIndexType == it_range) || (_impl->invertedIndexType == it_date) || (_impl->invertedIndexType == it_spatial); if (_impl->fileCompressConfig && !_impl->fileCompressConfig->GetCompressType().empty() && !supportFileCompress) { INDEXLIB_FATAL_ERROR(Schema, "index [%s] with type [%s] not support enable file_compress", _impl->indexName.c_str(), InvertedIndexConfig::InvertedIndexTypeToStr(_impl->invertedIndexType)); } } void InvertedIndexConfig::DoDeserialize(const autil::legacy::Any& any, const config::IndexConfigDeserializeResource& resource) { indexlib::index::InvertedIndexConfigSerializer::DeserializeCommonFields(any, this); } void InvertedIndexConfig::Deserialize(const autil::legacy::Any& any, size_t idxInJsonArray, const config::IndexConfigDeserializeResource& resource) { indexlib::index::InvertedIndexConfigSerializer::Deserialize(any, idxInJsonArray, resource, this); } void InvertedIndexConfig::Serialize(autil::legacy::Jsonizable::JsonWrapper& json) const { indexlib::index::InvertedIndexConfigSerializer::Serialize(*this, &json); } indexid_t InvertedIndexConfig::GetIndexId() const { return _impl->indexId; } void InvertedIndexConfig::SetIndexId(indexid_t id) { _impl->indexId = id; for (const auto& shardingConfig : _impl->shardingIndexConfigs) { shardingConfig->SetParentIndexId(id); } } const std::string& InvertedIndexConfig::GetIndexCommonPath() const { return indexlib::index::INVERTED_INDEX_PATH; } std::vector<std::string> InvertedIndexConfig::GetIndexPath() const { std::vector<std::string> paths; auto shardingType = GetShardingType(); if (shardingType == IST_NO_SHARDING || shardingType == IST_IS_SHARDING) { paths.push_back(GetIndexCommonPath() + "/" + GetIndexName()); } return paths; } const string& InvertedIndexConfig::GetIndexName() const { return _impl->indexName; } void InvertedIndexConfig::SetIndexName(const string& indexName) { _impl->indexName = indexName; } InvertedIndexType InvertedIndexConfig::GetInvertedIndexType() const { return _impl->invertedIndexType; } const string& InvertedIndexConfig::GetIndexType() const { InvertedIndexType invertedIndexType = _impl->invertedIndexType; switch (invertedIndexType) { case InvertedIndexType::it_primarykey64: return indexlibv2::index::PRIMARY_KEY_INDEX_TYPE_STR; case InvertedIndexType::it_primarykey128: return indexlibv2::index::PRIMARY_KEY_INDEX_TYPE_STR; case InvertedIndexType::it_text: case InvertedIndexType::it_pack: case InvertedIndexType::it_expack: case InvertedIndexType::it_string: case InvertedIndexType::it_number: // use external -- legacy case InvertedIndexType::it_number_int8: // 8 - 64 use internal, type transform in InitIndexWriters case InvertedIndexType::it_number_uint8: case InvertedIndexType::it_number_int16: case InvertedIndexType::it_number_uint16: case InvertedIndexType::it_number_int32: case InvertedIndexType::it_number_uint32: case InvertedIndexType::it_number_int64: case InvertedIndexType::it_number_uint64: case InvertedIndexType::it_range: case InvertedIndexType::it_date: case InvertedIndexType::it_spatial: return indexlib::index::INVERTED_INDEX_TYPE_STR; case InvertedIndexType::it_customized: return indexlibv2::index::ANN_INDEX_TYPE_STR; default: static string DEFAULT = "unknown"; return DEFAULT; } } void InvertedIndexConfig::SetInvertedIndexType(InvertedIndexType invertedIndexType) { _impl->invertedIndexType = invertedIndexType; } const string& InvertedIndexConfig::GetAnalyzer() const { return _impl->analyzer; } void InvertedIndexConfig::SetAnalyzer(const string& analyzerName) { _impl->analyzer = analyzerName; } void InvertedIndexConfig::SetOptionFlag(optionflag_t optionFlag) { _impl->optionFlag = optionFlag; } optionflag_t InvertedIndexConfig::GetOptionFlag() const { return _impl->optionFlag; } bool InvertedIndexConfig::IsShortListVbyteCompress() const { return _impl->isShortListVbyteCompress; } void InvertedIndexConfig::SetShortListVbyteCompress(bool isShortListVbyteCompress) { _impl->isShortListVbyteCompress = isShortListVbyteCompress; } void InvertedIndexConfig::SetIsReferenceCompress(bool isReferenceCompress) { _impl->isReferenceCompress = isReferenceCompress; } bool InvertedIndexConfig::IsReferenceCompress() const { return _impl->isReferenceCompress; } void InvertedIndexConfig::SetHashTypedDictionary(bool isHashType) { _impl->isHashTypedDictionary = isHashType; } bool InvertedIndexConfig::IsHashTypedDictionary() const { return _impl->isHashTypedDictionary; } // Dict bool InvertedIndexConfig::HasAdaptiveDictionary() const { return _impl->adaptiveDictConfig.operator bool(); } const std::shared_ptr<indexlib::config::AdaptiveDictionaryConfig>& InvertedIndexConfig::GetAdaptiveDictionaryConfig() const { return _impl->adaptiveDictConfig; } void InvertedIndexConfig::SetAdaptiveDictConfig( const std::shared_ptr<indexlib::config::AdaptiveDictionaryConfig>& dictConfig) { _impl->adaptiveDictConfig = dictConfig; } void InvertedIndexConfig::SetDictConfig(const std::shared_ptr<indexlib::config::DictionaryConfig>& dictConfig) { const string& nullTermLiteralStr = GetNullTermLiteralString(); _impl->dictConfig = dictConfig; _impl->highFreqVocabulary = HighFreqVocabularyCreator::CreateVocabulary( _impl->indexName, _impl->invertedIndexType, dictConfig, nullTermLiteralStr, GetDictHashParams()); } void InvertedIndexConfig::SetDictConfigWithoutVocabulary( const std::shared_ptr<indexlib::config::DictionaryConfig>& dictConfig) { _impl->dictConfig = dictConfig; } const std::shared_ptr<indexlib::config::DictionaryConfig>& InvertedIndexConfig::GetDictConfig() const { return _impl->dictConfig; } void InvertedIndexConfig::SetHighFreqencyTermPostingType(indexlib::index::HighFrequencyTermPostingType type) { _impl->highFrequencyTermPostingType = type; } indexlib::index::HighFrequencyTermPostingType InvertedIndexConfig::GetHighFrequencyTermPostingType() const { return _impl->highFrequencyTermPostingType; } void InvertedIndexConfig::SetHighFreqVocabulary( const std::shared_ptr<indexlib::config::HighFrequencyVocabulary>& vocabulary) { _impl->highFreqVocabulary = vocabulary; } const std::shared_ptr<indexlib::config::HighFrequencyVocabulary>& InvertedIndexConfig::GetHighFreqVocabulary() const { return _impl->highFreqVocabulary; } bool InvertedIndexConfig::IsBitmapOnlyTerm(const indexlib::index::DictKeyInfo& key) const { if (!_impl->highFreqVocabulary) { return false; } if (_impl->highFrequencyTermPostingType != indexlib::index::hp_bitmap) { return false; } return _impl->highFreqVocabulary->Lookup(key); } void InvertedIndexConfig::SetShardingType(InvertedIndexConfig::IndexShardingType shardingType) { _impl->shardingType = shardingType; } InvertedIndexConfig::IndexShardingType InvertedIndexConfig::GetShardingType() const { return _impl->shardingType; } void InvertedIndexConfig::AppendShardingIndexConfig(const std::shared_ptr<InvertedIndexConfig>& shardingIndexConfig) { assert(_impl->shardingType == InvertedIndexConfig::IST_NEED_SHARDING); _impl->shardingIndexConfigs.push_back(shardingIndexConfig); } indexid_t InvertedIndexConfig::GetParentIndexId() const { return _impl->parentIndexId; } void InvertedIndexConfig::SetParentIndexId(indexid_t indexId) { _impl->parentIndexId = indexId; } const vector<std::shared_ptr<InvertedIndexConfig>>& InvertedIndexConfig::GetShardingIndexConfigs() const { return _impl->shardingIndexConfigs; } const char* InvertedIndexConfig::InvertedIndexTypeToStr(InvertedIndexType invertedIndexType) { switch (invertedIndexType) { case it_pack: return "PACK"; case it_text: return "TEXT"; case it_expack: return "EXPACK"; case it_string: return "STRING"; case it_enum: return "ENUM"; case it_property: return "PROPERTY"; case it_number: case it_number_int8: case it_number_uint8: case it_number_int16: case it_number_uint16: case it_number_int32: case it_number_uint32: case it_number_int64: case it_number_uint64: return "NUMBER"; case it_primarykey64: return "PRIMARYKEY64"; case it_primarykey128: return "PRIMARYKEY128"; case it_trie: return "TRIE"; case it_spatial: return "SPATIAL"; case it_customized: return "CUSTOMIZED"; case it_kv: case it_kkv: return "PRIMARY_KEY"; case it_datetime: return "DATE"; // TODO: change to DATETIME after online upgrade case it_range: return "RANGE"; default: return "UNKNOWN"; } return "UNKNOWN"; } std::pair<Status, InvertedIndexType> InvertedIndexConfig::StrToIndexType(const string& typeStr) { if (!strcasecmp(typeStr.c_str(), "text")) { return {Status::OK(), it_text}; } else if (!strcasecmp(typeStr.c_str(), "string")) { return {Status::OK(), it_string}; } else if (!strcasecmp(typeStr.c_str(), "number")) { return {Status::OK(), it_number}; } else if (!strcasecmp(typeStr.c_str(), "enum")) { return {Status::OK(), it_enum}; } else if (!strcasecmp(typeStr.c_str(), "property")) { return {Status::OK(), it_property}; } else if (!strcasecmp(typeStr.c_str(), "pack")) { return {Status::OK(), it_pack}; } else if (!strcasecmp(typeStr.c_str(), "expack")) { return {Status::OK(), it_expack}; } else if (!strcasecmp(typeStr.c_str(), "primarykey64")) { return {Status::OK(), it_primarykey64}; } else if (!strcasecmp(typeStr.c_str(), "primarykey128")) { return {Status::OK(), it_primarykey128}; } else if (!strcasecmp(typeStr.c_str(), "trie")) { return {Status::OK(), it_trie}; } else if (!strcasecmp(typeStr.c_str(), "spatial")) { return {Status::OK(), it_spatial}; } else if (!strcasecmp(typeStr.c_str(), "date")) { return {Status::OK(), it_datetime}; } else if (!strcasecmp(typeStr.c_str(), "datetime")) { return {Status::OK(), it_datetime}; } else if (!strcasecmp(typeStr.c_str(), "range")) { return {Status::OK(), it_range}; } else if (!strcasecmp(typeStr.c_str(), "customized")) { return {Status::OK(), it_customized}; } stringstream ss; ss << "Unknown index_type: " << typeStr << ", support index_type are: "; for (int it = 0; it < (int)it_unknown; ++it) { ss << InvertedIndexTypeToStr((InvertedIndexType)it) << ","; } AUTIL_LOG(ERROR, "%s", ss.str().c_str()); return {Status::ConfigError(ss.str().c_str()), it_unknown}; } string InvertedIndexConfig::GetShardingIndexName(const string& indexName, size_t shardingIdx) { return indexName + "_@_" + autil::StringUtil::toString<size_t>(shardingIdx); } bool InvertedIndexConfig::GetIndexNameFromShardingIndexName(const string& shardingIndexName, string& indexName) { size_t pos = shardingIndexName.rfind("_@_"); if (pos == string::npos) { return false; } string numStr = shardingIndexName.substr(pos + 3); uint64_t num = 0; if (!StringUtil::fromString(numStr, num)) { return false; } indexName = shardingIndexName.substr(0, pos); return true; } void InvertedIndexConfig::SetVirtual(bool flag) { _impl->isVirtual = flag; } bool InvertedIndexConfig::IsVirtual() const { return _impl->isVirtual; } void InvertedIndexConfig::Disable() { _impl->status = (_impl->status == indexlib::is_normal) ? indexlib::is_disable : _impl->status; if (_impl->shardingType == InvertedIndexConfig::IST_NEED_SHARDING) { for (size_t i = 0; i < _impl->shardingIndexConfigs.size(); ++i) { _impl->shardingIndexConfigs[i]->Disable(); } } } void InvertedIndexConfig::Delete() { _impl->status = indexlib::is_deleted; } bool InvertedIndexConfig::IsDeleted() const { return _impl->status == indexlib::is_deleted; } bool InvertedIndexConfig::IsNormal() const { return _impl->status == indexlib::is_normal; } indexlib::IndexStatus InvertedIndexConfig::GetStatus() const { return _impl->status; } format_versionid_t InvertedIndexConfig::GetIndexFormatVersionId() const { return _impl->formatVersionId; } format_versionid_t InvertedIndexConfig::GetMaxSupportedIndexFormatVersionId() const { return _impl->maxSupportedFormatVersionId; } Status InvertedIndexConfig::SetIndexFormatVersionId(format_versionid_t id) { if (id > InvertedIndexConfig::BINARY_FORMAT_VERSION) { RETURN_IF_STATUS_ERROR(Status::ConfigError(), "unsupported format version [%d] for index [%s], " "which is bigger than binary supported version [%d]", id, _impl->indexName.c_str(), InvertedIndexConfig::BINARY_FORMAT_VERSION); } if (id > _impl->maxSupportedFormatVersionId) { AUTIL_LOG(INFO, "ignore SetIndexFormatVersionId to [%d] for index [%s], because max supported id is [%d].", id, _impl->indexName.c_str(), _impl->maxSupportedFormatVersionId); return Status::OK(); } _impl->formatVersionId = id; return Status::OK(); } Status InvertedIndexConfig::SetMaxSupportedIndexFormatVersionId(format_versionid_t id) { if (id > InvertedIndexConfig::BINARY_FORMAT_VERSION) { RETURN_IF_STATUS_ERROR(Status::ConfigError(), "unsupported max format version [%d] for index [%s], " "which is bigger than binary supported version [%d]", id, _impl->indexName.c_str(), InvertedIndexConfig::BINARY_FORMAT_VERSION); } _impl->maxSupportedFormatVersionId = id; if (_impl->formatVersionId > _impl->maxSupportedFormatVersionId) { AUTIL_LOG(INFO, "reset formatVersionId to [%d] for index [%s].", _impl->maxSupportedFormatVersionId, _impl->indexName.c_str()); return SetIndexFormatVersionId(_impl->maxSupportedFormatVersionId); } return Status::OK(); } bool InvertedIndexConfig::GetBloomFilterParamForDictionary(uint32_t& multipleNum, uint32_t& hashFuncNum) const { if (_impl->bloomFilterMultipleNum == 0 || _impl->bloomFilterMultipleNum == 1) { return false; } multipleNum = _impl->bloomFilterMultipleNum; hashFuncNum = GetHashFuncNumForBloomFilter(_impl->bloomFilterMultipleNum); return true; } uint32_t InvertedIndexConfig::GetHashFuncNumForBloomFilter(uint32_t multipleNum) { const static uint32_t HASH_FUNC_NUM[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 10}; if (multipleNum > 16) { return 10; } return HASH_FUNC_NUM[multipleNum]; } void InvertedIndexConfig::EnableBloomFilterForDictionary(uint32_t multipleNum) { assert(multipleNum <= 16); _impl->bloomFilterMultipleNum = multipleNum; // 0 or 1 means disable bloom filter } void InvertedIndexConfig::SetOwnerModifyOperationId(indexlib::schema_opid_t opId) { _impl->ownerOpId = opId; } indexlib::schema_opid_t InvertedIndexConfig::GetOwnerModifyOperationId() const { return _impl->ownerOpId; } bool InvertedIndexConfig::SupportNull() const { Iterator iter = CreateIterator(); while (iter.HasNext()) { auto fieldConfig = iter.Next(); assert(fieldConfig); if (fieldConfig->IsEnableNullField()) { return true; } } return false; } string InvertedIndexConfig::GetNullTermLiteralString() const { std::string nullLiteralStr; InvertedIndexConfig::Iterator iter = CreateIterator(); while (iter.HasNext()) { auto fieldConfig = iter.Next(); if (fieldConfig && fieldConfig->IsEnableNullField()) { nullLiteralStr = fieldConfig->GetNullFieldLiteralString(); break; } } return nullLiteralStr; } bool InvertedIndexConfig::IsIndexUpdatable() const { return _impl->isIndexUpdatable; } void InvertedIndexConfig::SetIndexUpdatable(bool updatable) { _impl->isIndexUpdatable = updatable; } bool InvertedIndexConfig::IsPatchCompressed() const { return _impl->isPatchCompressed; } void InvertedIndexConfig::SetPatchCompressed(bool compressed) { _impl->isPatchCompressed = compressed; } void InvertedIndexConfig::SetFileCompressConfig(const std::shared_ptr<FileCompressConfig>& compressConfig) { _impl->fileCompressConfig = compressConfig; } void InvertedIndexConfig::SetFileCompressConfigV2(const std::shared_ptr<FileCompressConfigV2>& fileCompressConfigV2) { _impl->fileCompressConfigV2 = fileCompressConfigV2; } const std::shared_ptr<FileCompressConfig>& InvertedIndexConfig::GetFileCompressConfig() const { return _impl->fileCompressConfig; } const std::shared_ptr<FileCompressConfigV2>& InvertedIndexConfig::GetFileCompressConfigV2() const { return _impl->fileCompressConfigV2; } InvertedIndexConfig::Iterator InvertedIndexConfig::CreateIterator() const { return DoCreateIterator(); } // TODO(makuo.mnb) implement this function when migrate truncate void InvertedIndexConfig::TEST_SetIndexUpdatable(bool updatable) { SetIndexUpdatable(updatable); } InvertedIndexType InvertedIndexConfig::FieldTypeToInvertedIndexType(FieldType fieldType) { InvertedIndexType type = it_unknown; switch (fieldType) { case ft_int8: type = it_number_int8; break; case ft_uint8: type = it_number_uint8; break; case ft_int16: type = it_number_int16; break; case ft_uint16: type = it_number_uint16; break; case ft_integer: type = it_number_int32; break; case ft_uint32: type = it_number_uint32; break; case ft_long: type = it_number_int64; break; case ft_uint64: type = it_number_uint64; break; default: assert(false); break; } return type; } void InvertedIndexConfig::SetNonTruncateIndexName(const string& indexName) { _impl->nonTruncIndexName = indexName; } const string& InvertedIndexConfig::GetNonTruncateIndexName() const { return _impl->nonTruncIndexName; } void InvertedIndexConfig::SetHasTruncateFlag(bool flag) { _impl->hasTruncate = flag; } bool InvertedIndexConfig::HasTruncate() const { return _impl->hasTruncate; } void InvertedIndexConfig::SetUseTruncateProfilesStr(const std::string& useTruncateProfiles) { _impl->useTruncateProfiles = useTruncateProfiles; } bool InvertedIndexConfig::HasTruncateProfile(const TruncateProfileConfig* truncateProfileConfig) const { if (!_impl->hasTruncate) { return false; } autil::StringTokenizer st(_impl->useTruncateProfiles, USE_TRUNCATE_PROFILES_SEPRATOR, autil::StringTokenizer::TOKEN_TRIM | autil::StringTokenizer::TOKEN_IGNORE_EMPTY); for (autil::StringTokenizer::Iterator it = st.begin(); it != st.end(); ++it) { if (truncateProfileConfig->GetTruncateProfileName() == *it) { return true; } } return false; } std::string InvertedIndexConfig::GetUseTruncateProfilesStr() const { assert(_impl->hasTruncate); return _impl->useTruncateProfiles; } void InvertedIndexConfig::SetUseTruncateProfiles(const std::vector<std::string>& profiles) { _impl->useTruncateProfiles = autil::StringUtil::toString(profiles, USE_TRUNCATE_PROFILES_SEPRATOR); if (_impl->shardingType == InvertedIndexConfig::IST_NEED_SHARDING) { for (size_t i = 0; i < _impl->shardingIndexConfigs.size(); ++i) { _impl->shardingIndexConfigs[i]->SetUseTruncateProfiles(profiles); } } } std::vector<std::string> InvertedIndexConfig::GetUseTruncateProfiles() const { autil::StringTokenizer st(_impl->useTruncateProfiles, USE_TRUNCATE_PROFILES_SEPRATOR, autil::StringTokenizer::TOKEN_TRIM | autil::StringTokenizer::TOKEN_IGNORE_EMPTY); return st.getTokenVector(); } Status InvertedIndexConfig::LoadTruncateTermVocabulary(const std::shared_ptr<indexlib::file_system::ArchiveFolder>& metaFolder, const std::vector<std::string>& truncIndexNames) { auto truncTermVocabulary = std::make_shared<TruncateTermVocabulary>(/*legacyArchiveFolder=*/metaFolder); auto st = truncTermVocabulary->Init(truncIndexNames); if (!st.IsOK()) { AUTIL_LOG(ERROR, "init truncate term vocabulary failed."); return st; } if (truncTermVocabulary->GetTermCount() > 0) { _impl->truncateTermVocabulary = truncTermVocabulary; } return Status::OK(); } Status InvertedIndexConfig::LoadTruncateTermVocabulary(const std::shared_ptr<indexlib::file_system::IDirectory>& metaFolder, const std::vector<std::string>& truncIndexNames) { auto truncTermVocabulary = std::make_shared<TruncateTermVocabulary>(/*legacyArchiveFolder=*/nullptr); auto st = truncTermVocabulary->Init(metaFolder, truncIndexNames); if (!st.IsOK()) { AUTIL_LOG(ERROR, "init truncate term vocabulary failed."); return st; } if (truncTermVocabulary->GetTermCount() > 0) { _impl->truncateTermVocabulary = truncTermVocabulary; } return Status::OK(); } const std::shared_ptr<TruncateTermVocabulary>& InvertedIndexConfig::GetTruncateTermVocabulary() const { return _impl->truncateTermVocabulary; } bool InvertedIndexConfig::IsTruncateTerm(const indexlib::index::DictKeyInfo& key) const { if (!_impl->truncateTermVocabulary) { return false; } return _impl->truncateTermVocabulary->Lookup(key); } bool InvertedIndexConfig::GetTruncatePostingCount(const indexlib::index::DictKeyInfo& key, int32_t& count) const { count = 0; if (!_impl->truncateTermVocabulary) { return false; } return _impl->truncateTermVocabulary->LookupTF(key, count); } std::string InvertedIndexConfig::CreateTruncateIndexName(const std::string& indexName, const std::string& truncateProfileName) { return indexName + "_" + truncateProfileName; } const vector<std::shared_ptr<InvertedIndexConfig>>& InvertedIndexConfig::GetTruncateIndexConfigs() const { return _impl->truncateIndexConfigs; } void InvertedIndexConfig::AppendTruncateIndexConfig(const std::shared_ptr<InvertedIndexConfig>& truncateIndexConfig) { bool alreadyExist = false; for (size_t i = 0; i < _impl->truncateIndexConfigs.size(); ++i) { if (_impl->truncateIndexConfigs[i]->GetIndexName() == truncateIndexConfig->GetIndexName()) { alreadyExist = true; break; } } if (!alreadyExist) { _impl->truncateIndexConfigs.push_back(truncateIndexConfig); } } void InvertedIndexConfig::SetTruncatePayloadConfig(const indexlib::config::PayloadConfig& payloadConfig) { _impl->payloadConfig = payloadConfig; } const indexlib::config::PayloadConfig& InvertedIndexConfig::GetTruncatePayloadConfig() const { return _impl->payloadConfig; } Status InvertedIndexConfig::CheckCompatible(const IIndexConfig* other) const { const auto* typedOther = dynamic_cast<const InvertedIndexConfig*>(other); if (!typedOther) { RETURN_IF_STATUS_ERROR(Status::InvalidArgs(), "cast to InvertedIndexConfig failed"); } auto toJsonString = [](const config::IIndexConfig* config) { autil::legacy::Jsonizable::JsonWrapper json; config->Serialize(json); return ToJsonString(json.GetMap()); }; const auto& jsonStr = toJsonString(this); const auto& jsonStrOther = toJsonString(typedOther); if (jsonStr != jsonStrOther) { RETURN_IF_STATUS_ERROR(Status::InvalidArgs(), "original config [%s] is not compatible with [%s]", jsonStr.c_str(), jsonStrOther.c_str()); } return Status::OK(); } bool InvertedIndexConfig::IsDisabled() const { return _impl->status == indexlib::is_disable; } } // namespace indexlibv2::config