in aios/storage/indexlib/document/normal/SingleDocumentParser.cpp [203:314]
shared_ptr<NormalDocument> SingleDocumentParser::Parse(NormalExtendDocument* document)
{
if (!document) {
AUTIL_LOG(ERROR, "document is null");
return nullptr;
}
const shared_ptr<RawDocument>& rawDoc = document->GetRawDocument();
if (!rawDoc) {
AUTIL_LOG(ERROR, "empty raw document!");
return nullptr;
}
if (rawDoc->getDocOperateType() == SKIP_DOC || rawDoc->getDocOperateType() == CHECKPOINT_DOC ||
rawDoc->getDocOperateType() == UNKNOWN_OP) {
return CreateDocument(document);
}
if (_nullFieldAppender) {
_nullFieldAppender->Append(rawDoc);
}
SetPrimaryKeyField(document);
if (document->getTokenizeDocument()->getFieldCount() == 0) {
AUTIL_LOG(DEBUG, "tokenizeDoc is empty");
}
DocOperateType op = rawDoc->getDocOperateType();
for (const auto& fieldConfig : _fieldConfigs) {
if ((op == DELETE_DOC || op == DELETE_SUB_DOC) && fieldConfig->GetFieldName() != _orderPreservingField) {
continue;
}
fieldid_t fieldId = fieldConfig->GetFieldId();
if (IsInvertedIndexField(fieldId)) {
_fieldConvertPtr->convertIndexField(document, fieldConfig);
}
// TODO: indexlib need to fix this
// use full schema to tell if its updatable(set to attr doc)
if (IsAttributeIndexField(fieldId)) {
if (rawDoc->getDocOperateType() == UPDATE_FIELD) {
const auto& attributeConfig = GetAttributeConfig(fieldId);
assert(attributeConfig);
if (attributeConfig->IsAttributeUpdatable() && rawDoc->exist(attributeConfig->GetAttrName()) &&
_primaryKeyFieldId != fieldId) {
_fieldConvertPtr->convertAttributeField(document, fieldConfig);
}
} else {
_fieldConvertPtr->convertAttributeField(document, fieldConfig);
}
}
if (_summaryIndexConfig && _summaryIndexConfig->NeedStoreSummary(fieldId) &&
rawDoc->getDocOperateType() != UPDATE_FIELD) {
_fieldConvertPtr->convertSummaryField(document, fieldConfig);
}
if (IsFieldMetaIndexField(fieldId)) {
_fieldConvertPtr->convertFieldMetaField(document, fieldConfig);
}
}
const shared_ptr<ClassifiedDocument>& classifiedDocument = document->getClassifiedDocument();
const shared_ptr<AttributeDocument>& attrDoc = classifiedDocument->getAttributeDoc();
if (attrDoc && attrDoc->HasFormatError() && _attributeConvertErrorCounter) {
_attributeConvertErrorCounter->Increase(1);
}
if (_sourceIndexConfig && op != DELETE_DOC && op != DELETE_SUB_DOC) {
auto originalSnapshot = classifiedDocument->getOriginalSnapshot();
if (!originalSnapshot) {
AUTIL_LOG(ERROR, "source index need original raw documnent, but not found");
return nullptr;
}
std::vector<std::vector<std::string>> fieldsInGroups = _sourceDeterministicFieldsInGroups;
for (auto sourceGroupConfig : _sourceIndexConfig->GetGroupConfigs()) {
if (sourceGroupConfig->GetFieldMode() == indexlib::config::SourceGroupConfig::USER_DEFINE) {
if (auto fieldsStr = rawDoc->GetTag("udf_source_fields"); !fieldsStr.empty()) {
auto& udfFields = fieldsInGroups[sourceGroupConfig->GetGroupId()];
autil::StringUtil::fromString(fieldsStr, udfFields, ";");
}
}
}
classifiedDocument->createSourceDocument(fieldsInGroups, originalSnapshot.get());
}
if (_summaryIndexConfig && _summaryIndexConfig->NeedStoreSummary() && op != DELETE_DOC && op != DELETE_SUB_DOC) {
SummaryFormatter formatter(_summaryIndexConfig);
auto status = classifiedDocument->serializeSummaryDocument(formatter);
if (!status.IsOK()) {
AUTIL_LOG(ERROR, "serialize summary document failed");
return nullptr;
}
}
if (_sectionAttrAppender && rawDoc->getDocOperateType() == ADD_DOC) {
shared_ptr<SectionAttributeAppender> appender(_sectionAttrAppender->Clone());
auto status = appender->AppendSectionAttribute(classifiedDocument->getIndexDocument());
if (!status.IsOK()) {
AUTIL_LOG(ERROR, "append section attribute failed");
return nullptr;
}
}
if (_packAttrAppender && rawDoc->getDocOperateType() == ADD_DOC) {
if (!_packAttrAppender->AppendPackAttribute(classifiedDocument->getAttributeDoc(),
classifiedDocument->getPool())) {
IE_RAW_DOC_TRACE(rawDoc, "parse error: append packAttribute failed.");
}
}
if (!Validate(document)) {
return nullptr;
}
return CreateDocument(document);
}