shared_ptr SingleDocumentParser::Parse()

in aios/storage/indexlib/document/normal/SingleDocumentParser.cpp [203:314]


shared_ptr<NormalDocument> SingleDocumentParser::Parse(NormalExtendDocument* document)
{
    if (!document) {
        AUTIL_LOG(ERROR, "document is null");
        return nullptr;
    }
    const shared_ptr<RawDocument>& rawDoc = document->GetRawDocument();
    if (!rawDoc) {
        AUTIL_LOG(ERROR, "empty raw document!");
        return nullptr;
    }
    if (rawDoc->getDocOperateType() == SKIP_DOC || rawDoc->getDocOperateType() == CHECKPOINT_DOC ||
        rawDoc->getDocOperateType() == UNKNOWN_OP) {
        return CreateDocument(document);
    }

    if (_nullFieldAppender) {
        _nullFieldAppender->Append(rawDoc);
    }

    SetPrimaryKeyField(document);
    if (document->getTokenizeDocument()->getFieldCount() == 0) {
        AUTIL_LOG(DEBUG, "tokenizeDoc is empty");
    }

    DocOperateType op = rawDoc->getDocOperateType();
    for (const auto& fieldConfig : _fieldConfigs) {
        if ((op == DELETE_DOC || op == DELETE_SUB_DOC) && fieldConfig->GetFieldName() != _orderPreservingField) {
            continue;
        }

        fieldid_t fieldId = fieldConfig->GetFieldId();
        if (IsInvertedIndexField(fieldId)) {
            _fieldConvertPtr->convertIndexField(document, fieldConfig);
        }

        // TODO: indexlib need to fix this
        // use full schema to tell if its updatable(set to attr doc)
        if (IsAttributeIndexField(fieldId)) {
            if (rawDoc->getDocOperateType() == UPDATE_FIELD) {
                const auto& attributeConfig = GetAttributeConfig(fieldId);
                assert(attributeConfig);
                if (attributeConfig->IsAttributeUpdatable() && rawDoc->exist(attributeConfig->GetAttrName()) &&
                    _primaryKeyFieldId != fieldId) {
                    _fieldConvertPtr->convertAttributeField(document, fieldConfig);
                }
            } else {
                _fieldConvertPtr->convertAttributeField(document, fieldConfig);
            }
        }
        if (_summaryIndexConfig && _summaryIndexConfig->NeedStoreSummary(fieldId) &&
            rawDoc->getDocOperateType() != UPDATE_FIELD) {
            _fieldConvertPtr->convertSummaryField(document, fieldConfig);
        }

        if (IsFieldMetaIndexField(fieldId)) {
            _fieldConvertPtr->convertFieldMetaField(document, fieldConfig);
        }
    }

    const shared_ptr<ClassifiedDocument>& classifiedDocument = document->getClassifiedDocument();
    const shared_ptr<AttributeDocument>& attrDoc = classifiedDocument->getAttributeDoc();
    if (attrDoc && attrDoc->HasFormatError() && _attributeConvertErrorCounter) {
        _attributeConvertErrorCounter->Increase(1);
    }

    if (_sourceIndexConfig && op != DELETE_DOC && op != DELETE_SUB_DOC) {
        auto originalSnapshot = classifiedDocument->getOriginalSnapshot();
        if (!originalSnapshot) {
            AUTIL_LOG(ERROR, "source index need original raw documnent, but not found");
            return nullptr;
        }
        std::vector<std::vector<std::string>> fieldsInGroups = _sourceDeterministicFieldsInGroups;
        for (auto sourceGroupConfig : _sourceIndexConfig->GetGroupConfigs()) {
            if (sourceGroupConfig->GetFieldMode() == indexlib::config::SourceGroupConfig::USER_DEFINE) {
                if (auto fieldsStr = rawDoc->GetTag("udf_source_fields"); !fieldsStr.empty()) {
                    auto& udfFields = fieldsInGroups[sourceGroupConfig->GetGroupId()];
                    autil::StringUtil::fromString(fieldsStr, udfFields, ";");
                }
            }
        }
        classifiedDocument->createSourceDocument(fieldsInGroups, originalSnapshot.get());
    }

    if (_summaryIndexConfig && _summaryIndexConfig->NeedStoreSummary() && op != DELETE_DOC && op != DELETE_SUB_DOC) {
        SummaryFormatter formatter(_summaryIndexConfig);
        auto status = classifiedDocument->serializeSummaryDocument(formatter);
        if (!status.IsOK()) {
            AUTIL_LOG(ERROR, "serialize summary document failed");
            return nullptr;
        }
    }
    if (_sectionAttrAppender && rawDoc->getDocOperateType() == ADD_DOC) {
        shared_ptr<SectionAttributeAppender> appender(_sectionAttrAppender->Clone());
        auto status = appender->AppendSectionAttribute(classifiedDocument->getIndexDocument());
        if (!status.IsOK()) {
            AUTIL_LOG(ERROR, "append section attribute failed");
            return nullptr;
        }
    }
    if (_packAttrAppender && rawDoc->getDocOperateType() == ADD_DOC) {
        if (!_packAttrAppender->AppendPackAttribute(classifiedDocument->getAttributeDoc(),
                                                    classifiedDocument->getPool())) {
            IE_RAW_DOC_TRACE(rawDoc, "parse error: append packAttribute failed.");
        }
    }

    if (!Validate(document)) {
        return nullptr;
    }
    return CreateDocument(document);
}