src/cas/internal_casserializer.cpp (447 lines of code) (raw):

/** \file internal_casserializer.cpp . ----------------------------------------------------------------------------- * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. ----------------------------------------------------------------------------- Description: ----------------------------------------------------------------------------- -------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */ /* Include dependencies */ /* ----------------------------------------------------------------------- */ //#define DEBUG_VERBOSE #include "uima/pragmas.hpp" #include "uima/macros.h" #include "uima/internal_casserializer.hpp" #include "uima/internal_casimpl.hpp" #include "uima/lowlevel_fsheap.hpp" #include "uima/lowlevel_indexiterator.hpp" #include "uima/result_specification.hpp" #include "uima/casdefinition.hpp" /* ----------------------------------------------------------------------- */ /* Constants */ /* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */ /* Forward declarations */ /* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */ /* Types / Classes */ /* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */ /* Implementation */ /* ----------------------------------------------------------------------- */ using namespace std; namespace uima { namespace internal { void CASSerializer::serializeResultSpec(ResultSpecification const & resultSpec, vector<SerializedCAS::TyNum>& resultSpecTypes, vector<SerializedCAS::TyNum>& resultSpecFeatures) { ResultSpecification::TyTypeOrFeatureSTLSet const & tofSet = resultSpec.getTypeOrFeatureSTLSet(); ResultSpecification::TyTypeOrFeatureSTLSet::const_iterator cit; for (cit = tofSet.begin(); cit != tofSet.end(); ++cit) { TypeOrFeature const & tof = *cit; if (tof.isType()) { Type t = tof.getType(); assert( t.isValid() ); resultSpecTypes.push_back( uima::internal::FSPromoter::demoteType(t) ); } else { Feature f = tof.getFeature(); assert( f.isValid() ); resultSpecFeatures.push_back( uima::internal::FSPromoter::demoteFeature(f) ); } } } bool isInterval(uima::lowlevel::TyFSType first, uima::lowlevel::TyFSType last, vector<uima::lowlevel::TyFSType> const & vec) { size_t i; for (i=0; i<vec.size(); ++i) { if (vec[i] != first + i) { return false; } } return true; } CASSerializer::CASSerializer(bool bCopyStrings) : iv_bCopyStrings(bCopyStrings) {} CASSerializer::~CASSerializer() {} UnicodeStringRef CASSerializer::createString(UChar const * cpBuf, size_t uiLen, uima::internal::SerializedCAS & rSerializedCAS) { UnicodeStringRef ref(cpBuf, uiLen); if (iv_bCopyStrings) { return rSerializedCAS.addString(ref); } return ref; } void CASSerializer::serializeTypeSystem(uima::internal::CASDefinition const & casDef, uima::internal::SerializedCAS & rSerializedCAS) { uima::lowlevel::TypeSystem const & crTypeSystem = casDef.getTypeSystem(); UnicodeStringRef invalidUSP(rSerializedCAS.iv_emptyString.getBuffer(), rSerializedCAS.iv_emptyString.length()); assert( invalidUSP.getBuffer() != NULL ); assert( invalidUSP.length() == 0); // 1. inheritance vector rSerializedCAS.iv_vecTypeInheritanceTable.clear(); size_t uiTypeNum = crTypeSystem.getNumberOfTypes() + 1; rSerializedCAS.iv_vecTypeInheritanceTable.resize(uiTypeNum, 0); #ifndef NDEBUG vector<uima::lowlevel::TyFSType> vecTypes; crTypeSystem.getAllTypes(vecTypes); assert( isInterval(1, uiTypeNum, vecTypes) ); assert( uiTypeNum == vecTypes.size() + 1); #endif size_t i; assert( 0 == uima::lowlevel::TypeSystem::INVALID_TYPE ); assert( 1 == crTypeSystem.getTopType() ); for (i=2; i<uiTypeNum; ++i) { uima::lowlevel::TyFSType tyChild = (uima::lowlevel::TyFSType) i; uima::lowlevel::TyFSType tyParent = crTypeSystem.getParentType( tyChild ); assert( tyParent <rSerializedCAS.iv_vecTypeInheritanceTable.size() ); rSerializedCAS.iv_vecTypeInheritanceTable[tyChild] = tyParent; } // 2. feature intro vector rSerializedCAS.iv_vecFeatureDefinitionTable.clear(); size_t uiFeatureNum = crTypeSystem.getNumberOfFeatures() + 1; // leave the first three cells empty rSerializedCAS.iv_vecFeatureDefinitionTable.resize(3,0); #ifndef NDEBUG vector<uima::lowlevel::TyFSFeature> vecFeatures; crTypeSystem.getAllFeatures(vecFeatures); assert( isInterval(1, uiFeatureNum, vecFeatures) ); assert( uiFeatureNum == vecFeatures.size() + 1); #endif assert( 0 == uima::lowlevel::TypeSystem::INVALID_FEATURE ); for (i=1; i<uiFeatureNum; ++i) { uima::lowlevel::TyFSFeature tyFeat = (uima::lowlevel::TyFSFeature) i; UIMA_TPRINT("Adding feature with ID: " << tyFeat ); UIMA_TPRINT("Adding feature: " << crTypeSystem.getFeatureName(tyFeat) ); uima::lowlevel::TyFSType tyIntroType = crTypeSystem.getIntroType(tyFeat); uima::lowlevel::TyFSType tyRangeType = crTypeSystem.getRangeType(tyFeat); int tyMultiRefs = crTypeSystem.isMultipleReferencesAllowed(tyFeat) ? 1 : 0; rSerializedCAS.iv_vecFeatureDefinitionTable.push_back( tyIntroType ); rSerializedCAS.iv_vecFeatureDefinitionTable.push_back( tyRangeType ); rSerializedCAS.iv_vecFeatureDefinitionTable.push_back( tyMultiRefs ); } #ifndef NDEBUG for (i=1; i<vecFeatures.size(); ++i) { uima::lowlevel::TyFSFeature tyFeat = vecFeatures[i]; uima::lowlevel::TyFSType tyIntroType = crTypeSystem.getIntroType(tyFeat); uima::lowlevel::TyFSType tyRangeType = crTypeSystem.getRangeType(tyFeat); int tyMultiRefs = crTypeSystem.isMultipleReferencesAllowed(tyFeat) ? 1 : 0; assert( (tyFeat*2) <rSerializedCAS.iv_vecFeatureDefinitionTable.size() ); assert( (tyFeat*2+1) <rSerializedCAS.iv_vecFeatureDefinitionTable.size() ); assert( rSerializedCAS.iv_vecFeatureDefinitionTable[tyFeat*3] == tyIntroType ); assert( rSerializedCAS.iv_vecFeatureDefinitionTable[tyFeat*3+1] == tyRangeType ); assert( rSerializedCAS.iv_vecFeatureDefinitionTable[tyFeat*3+2] == tyMultiRefs ); } #endif // 3. type string table rSerializedCAS.iv_vecTypeSymbolTable.resize(uiTypeNum); assert( rSerializedCAS.iv_vecTypeSymbolTable.size() == uiTypeNum ); rSerializedCAS.iv_vecTypeSymbolTable[0] = invalidUSP; for (i=1; i<uiTypeNum; ++i) { icu::UnicodeString const & crTypeName = crTypeSystem.getTypeName(i); UnicodeStringRef pus = createString( crTypeName.getBuffer(), crTypeName.length(), rSerializedCAS); rSerializedCAS.iv_vecTypeSymbolTable[i] = pus; } // 4. feature string and feature offset table rSerializedCAS.iv_vecFeatureSymbolTable.resize(uiFeatureNum); assert( rSerializedCAS.iv_vecFeatureSymbolTable.size() == uiFeatureNum ); rSerializedCAS.iv_vecFeatureOffsetTable.resize(uiFeatureNum); assert( rSerializedCAS.iv_vecFeatureOffsetTable.size() == uiFeatureNum ); rSerializedCAS.iv_vecFeatureSymbolTable[0] = invalidUSP; rSerializedCAS.iv_vecFeatureOffsetTable[0] = 0; for (i=1; i<uiFeatureNum; ++i) { uima::lowlevel::TyFSFeature tyFeat = i; // string icu::UnicodeString const & crFeatureName = crTypeSystem.getFeatureBaseName(tyFeat); UnicodeStringRef pus = createString( crFeatureName.getBuffer(), crFeatureName.length(), rSerializedCAS); rSerializedCAS.iv_vecFeatureSymbolTable[i] = pus; // offset rSerializedCAS.iv_vecFeatureOffsetTable[i] = crTypeSystem.getFeatureOffset(tyFeat); } // 5. type priorities rSerializedCAS.iv_vecTypePriorityTable.resize(uiTypeNum-1); for (i=1; i<uiTypeNum; ++i) { size_t num = crTypeSystem.getTypePriorityNumber((uima::lowlevel::TyFSType) i); rSerializedCAS.iv_vecTypePriorityTable[num] = i; } // 6. string sub types vector<uima::lowlevel::TyFSType> stringSubTypes; crTypeSystem.getDirectSubTypes( uima::internal::gs_tyStringType, stringSubTypes ); rSerializedCAS.iv_stringSubTypes.clear(); for (i=0; i<stringSubTypes.size(); ++i) { rSerializedCAS.iv_stringSubTypes.push_back(stringSubTypes[i]); } rSerializedCAS.iv_stringSubTypeValues.clear(); rSerializedCAS.iv_stringSubTypeValuePos.clear(); for (i=0; i<rSerializedCAS.iv_stringSubTypes.size(); ++i) { size_t n = rSerializedCAS.iv_stringSubTypeValues.size(); rSerializedCAS.iv_stringSubTypeValuePos.push_back(n); vector<icu::UnicodeString> const & stringValues = crTypeSystem.getStringsForStringSubtype(rSerializedCAS.iv_stringSubTypes[i]); size_t j; for (j=0; j<stringValues.size(); ++j) { UnicodeStringRef ref(stringValues[j]); rSerializedCAS.iv_stringSubTypeValues.push_back( ref ); } } assert( rSerializedCAS.iv_stringSubTypes.size() == rSerializedCAS.iv_stringSubTypeValuePos.size() ); } #if defined( _MSC_VER ) // locally disable warning about conversion from 'uima::internal::SerializedCAS::TyNum' to 'const int', possible loss of data # pragma warning( disable: 4244 ) #endif void CASSerializer::serializeIndexDefinition(uima::internal::CASDefinition const & casdef, uima::internal::SerializedCAS & rSerializedCAS) { uima::lowlevel::IndexDefinition const & indexDef = casdef.getIndexDefinition(); vector<uima::lowlevel::IndexDefinition::TyIndexID> vecIndexIDs; indexDef.getAllIndexIDs(vecIndexIDs); size_t uiIndexNum = vecIndexIDs.size(); rSerializedCAS.iv_vecIndexIDTable.resize(uiIndexNum); rSerializedCAS.iv_vecComparatorStartTable.resize(uiIndexNum); rSerializedCAS.iv_vecIndexKindTable.resize(uiIndexNum); rSerializedCAS.iv_vecComparatorDefinitionTable.clear(); size_t i; for (i=0; i<uiIndexNum; ++i) { uima::lowlevel::IndexDefinition::TyIndexID const & crIndexID = vecIndexIDs[i]; rSerializedCAS.iv_vecIndexIDTable[i] = createString( crIndexID.getBuffer(), crIndexID.length(), rSerializedCAS); rSerializedCAS.iv_vecIndexKindTable[i] = indexDef.getIndexKind( crIndexID ); UIMA_TPRINT("Index ID: " << crIndexID ); // start of the next comparator definition // is at the end of rSerializedCAS.iv_vecComparatorDefinitionTable rSerializedCAS.iv_vecComparatorStartTable[i] = rSerializedCAS.iv_vecComparatorDefinitionTable.size(); // add type of the index uima::lowlevel::TyFSType indexType = indexDef.getTypeForIndex(crIndexID); // add type of the comparator (even if the index has none) rSerializedCAS.iv_vecComparatorDefinitionTable.push_back( indexType ); uima::lowlevel::IndexComparator const * pComparator = indexDef.getComparator( crIndexID ); if ( pComparator != NULL ) { UIMA_TPRINT(" Index has comparator!"); assert( pComparator->getType() == indexType ); // serialize comparator vector<uima::lowlevel::TyFSFeature> const & crKeyFeatures = pComparator->getKeyFeatures(); vector<uima::lowlevel::IndexComparator::EnKeyFeatureComp> const & crCompOps = pComparator->getComparisonOps(); assert( crKeyFeatures.size() == crCompOps.size() ); // add all key features size_t j; for (j=0; j<crKeyFeatures.size(); ++j) { rSerializedCAS.iv_vecComparatorDefinitionTable.push_back( (SerializedCAS::TyNum) crKeyFeatures[j] ); rSerializedCAS.iv_vecComparatorDefinitionTable.push_back( (SerializedCAS::TyNum) crCompOps[j] ); } } } } #ifdef BYEBYEPTRS SerializedCAS::TyNum CASSerializer::adjustString(uima::lowlevel::TyHeapCell tyFeatureCell, TyStringMap & stringMap, uima::internal::SerializedCAS & rSerializedCAS) { UIMA_TPRINT("adjustString() entered"); UChar* * pPointerStringRefHeap = (UChar * *) tyFeatureCell; UChar * puc = *pPointerStringRefHeap; if (puc == NULL) { return 0; } assert( puc != NULL); assert( EXISTS(puc) ); assert( EXISTS(pPointerStringRefHeap+1) ); SerializedCAS::TyNum iStrLen = (SerializedCAS::TyNum) * (pPointerStringRefHeap+1); ptrdiff_t iStringIndex = 0; // try to find the string TyStringMap::iterator it = stringMap.lower_bound( puc ); // if not found if ( (it == stringMap.end()) || ( (*it).first != puc ) ) { iStringIndex = stringMap.size() + 1; // insert new one TyStringMap::value_type vt(puc, iStringIndex); stringMap.insert(it, vt); UIMA_TPRINT(" iStringIndex: " << iStringIndex << ", StringSymblTableSize: " << rSerializedCAS.iv_vecStringSymbolTable.size()); assert( iStringIndex == rSerializedCAS.iv_vecStringSymbolTable.size() ); UnicodeStringRef ustrp = createString(puc, iStrLen, rSerializedCAS); rSerializedCAS.iv_vecStringSymbolTable.push_back(ustrp); } else { iStringIndex = (*it).second; } return iStringIndex; } #endif void CASSerializer::serializeFSHeapAndStringHeap(uima::CAS const & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap; // copy the FSHeap as is (all offsets and values) size_t uiSegmentLength = tyTempHeap.getTopOfHeap(); uima::lowlevel::TyHeapCell* daHeap = tyTempHeap.getHeapStart(); rSerializedCAS.iv_vecFSHeapArray.resize(uiSegmentLength); // copy the heap (better way to do this?) for (size_t i=0; i<uiSegmentLength; i++) { rSerializedCAS.iv_vecFSHeapArray[i] = daHeap[i]; } // fill the vector of strings from the StringRefHeap uima::lowlevel::FSHeap::TyStringHeap const & tyStringHeap = crHeap.iv_clTemporaryStringHeap; uima::lowlevel::FSHeap::TyStringRefHeap const & tyStringRefHeap = crHeap.iv_clTemporaryStringRefHeap; int uiStringRefLength = tyStringRefHeap.getTopOfHeap(); int j = 1; // point at the first entry rSerializedCAS.iv_vecStringSymbolTable.resize(1); while (j < uiStringRefLength) { UnicodeStringRef ustrp = UnicodeStringRef( tyStringHeap.getHeapStart()+ tyStringRefHeap.getHeapValue(j), (size_t) tyStringRefHeap.getHeapValue(j+1)); rSerializedCAS.iv_vecStringSymbolTable.push_back(ustrp); j += 2; } } void CASSerializer::serializeHeaps(uima::CAS const & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { //serialize the fs heap and string heap serializeFSHeapAndStringHeap(crCAS, rSerializedCAS); uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); //8 bit heap uima::lowlevel::FSHeap::Ty8BitHeap const & ty8BitHeap = crHeap.iv_clTemporary8BitHeap; size_t uiSegmentLength = ty8BitHeap.getTopOfHeap(); char* byteHeap = ty8BitHeap.getHeapStart(); rSerializedCAS.iv_vecByteHeapArray.resize(uiSegmentLength); for (size_t i=0; i<uiSegmentLength; i++) { rSerializedCAS.iv_vecByteHeapArray[i] = byteHeap[i]; } //16 bit heap uima::lowlevel::FSHeap::Ty16BitHeap const & ty16BitHeap = crHeap.iv_clTemporary16BitHeap; uiSegmentLength = ty16BitHeap.getTopOfHeap(); short* shortHeap = ty16BitHeap.getHeapStart(); rSerializedCAS.iv_vecShortHeapArray.resize(uiSegmentLength); for (size_t i=0; i<uiSegmentLength; i++) { rSerializedCAS.iv_vecShortHeapArray[i] = shortHeap[i]; } //64 bit heap uima::lowlevel::FSHeap::Ty64BitHeap const & ty64BitHeap = crHeap.iv_clTemporary64BitHeap; uiSegmentLength = ty64BitHeap.getTopOfHeap(); INT64* longHeap = ty64BitHeap.getHeapStart(); rSerializedCAS.iv_vecLongHeapArray.resize(uiSegmentLength); for (size_t i=0; i<uiSegmentLength; i++) { rSerializedCAS.iv_vecLongHeapArray[i] = longHeap[i]; } } //--------------------------------------------------------------------- // Indexed FS Format // // Element Size Number of Description // (bytes) Elements // ------------ --------- -------------------------------- // 4 1 Number of Views in this CAS // 4 1 Number of Sofas in base Index Repository = nBase // 4 nBase TyFS array // // For each View: // 4 1 Number of FS in sofa Index Repository = nFS // 4 nFS TyFS array // //--------------------------------------------------------------------- void CASSerializer::serializeIndexedFSs(uima::CAS & crCAS, vector<uima::internal::SerializedCAS::TyNum> & iv_vecIndexedFSs) { uima::internal::CASImpl & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); int numViews = crCAS.getBaseCas()->iv_sofaCount; iv_vecIndexedFSs.clear(); iv_vecIndexedFSs.push_back(numViews); uima::lowlevel::IndexRepository * crIndexRep = (uima::lowlevel::IndexRepository*)&crCASImpl.getBaseIndexRepository(); for (int view=0; view<=numViews; view++) { vector<SerializedCAS::TyNum> perLoopIndexedFSs; vector<uima::lowlevel::TyFSType> vecAllTypes; perLoopIndexedFSs.clear(); if (view==0) { // First time through is for base CAS index // FS returned should only be for SofaFS! crIndexRep->getUsedIndexes(vecAllTypes); } else { // for all views found in the CAS, get new IndexRepository crIndexRep = crCASImpl.iv_baseCas->iv_sofa2indexMap[view]; if (crIndexRep == 0) { // no indexed FS for this View, move on iv_vecIndexedFSs.push_back(0); continue; } crIndexRep->getUsedIndexes(vecAllTypes); //serialize the undefined index FSs for (size_t i=0;i < crIndexRep->iv_undefinedindex.size(); i++ ) { SerializedCAS::TyNum tyFSHeapIndex = (SerializedCAS::TyNum) crIndexRep->iv_undefinedindex[i]; perLoopIndexedFSs.push_back(tyFSHeapIndex); } } // serialize index per type if ( 0 == vecAllTypes.size() && 0 == perLoopIndexedFSs.size() ) { // no indexed FS for this View, move on iv_vecIndexedFSs.push_back(0); continue; } for (size_t i=0; i<vecAllTypes.size(); ++i) { vector<uima::lowlevel::internal::SingleIndex*> const & crSingleIndexes = crIndexRep->getAllSingleIndexesForType(vecAllTypes[i]); for (size_t j=0; j<crSingleIndexes.size(); ++j) { unique_ptr<uima::lowlevel::IndexIterator> apIt(crSingleIndexes[j]->createIterator()); for (apIt->moveToFirst(); apIt->isValid(); apIt->moveToNext()) { uima::lowlevel::TyHeapCell pHeapCell = (uima::lowlevel::TyHeapCell) apIt->get(); SerializedCAS::TyNum tyFSHeapIndex = (SerializedCAS::TyNum) pHeapCell; perLoopIndexedFSs.push_back( tyFSHeapIndex ); } } } // eliminate duplicates sort(perLoopIndexedFSs.begin(), perLoopIndexedFSs.end()); vector<SerializedCAS::TyNum>::iterator end = unique(perLoopIndexedFSs.begin(), perLoopIndexedFSs.end()); // append indexedFSs from this loop iv_vecIndexedFSs.push_back(end - perLoopIndexedFSs.begin()); iv_vecIndexedFSs.insert(iv_vecIndexedFSs.end(), perLoopIndexedFSs.begin(), end); } } /* no more document in de CAS void CASSerializer::serializeDocument(uima::TCAS const & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { uima::internal::TCASImpl const & crTCASImpl = uima::internal::TCASImpl::promoteCAS(crCAS); rSerializedCAS.iv_ulstrDocument = crTCASImpl.getDocumentText(); } */ void CASSerializer::serializeDefinitions(uima::internal::CASDefinition const & casDef, uima::internal::SerializedCAS & rSerializedCAS) { serializeTypeSystem(casDef, rSerializedCAS); serializeIndexDefinition(casDef, rSerializedCAS); } #ifdef UIMA_ENABLE_SERIALIZATION_TIMING #define UIMA_SERIALIZATION_TIMING(x) x #else #define UIMA_SERIALIZATION_TIMING(x) #endif void CASSerializer::serializeData(uima::CAS & crCAS, uima::internal::SerializedCAS & rSerializedCAS) { // serializeDocument(crCAS, rSerializedCAS); // serialize indexed FSs first so that the docAnnot can be created if necessary UIMA_TPRINT("Serializing indexed FSs"); UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.reset() ); UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.start() ); serializeIndexedFSs(*crCAS.getBaseCas(), rSerializedCAS.iv_vecIndexedFSs); UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.stop() ); UIMA_TPRINT("indexed FSs serialized"); UIMA_TPRINT("serializing all heaps"); UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.reset() ); UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.start() ); //serializeFSHeapAndStringHeap(*crCAS.getBaseCas(), rSerializedCAS); serializeHeaps(*crCAS.getBaseCas(), rSerializedCAS); UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.stop() ); UIMA_TPRINT("FS heap serialized"); } //--------------------------------------------------------------------- // Blob Format // // Element Size Number of Description // (bytes) Elements // ------------ --------- -------------------------------- // 4 1 Blob key = "UIMA" in utf-8 // 4 1 Version (currently = 1) // 4 1 size of 32-bit FS Heap array = s32H // 4 s32H 32-bit FS heap array // 4 1 size of 16-bit string Heap array = sSH // 2 sSH 16-bit string heap array // 4 1 size of string Ref Heap array = sSRH // 4 2*sSRH string ref offsets and lengths // 4 1 size of FS index array = sFSI // 4 sFSI FS index array // 4 1 size of 8-bit Heap array = s8H // 1 s8H 8-bit Heap array // 4 1 size of 16-bit Heap array = s16H // 2 s16H 16-bit Heap array // 4 1 size of 64-bit Heap array = s64H // 8 s64H 64-bit Heap array //--------------------------------------------------------------------- // estimate total size of serialized CAS data size_t CASSerializer::getBlobSize(uima::CAS & crCAS) { // create STL vector of indexed FS so that we can size the output UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.reset() ); UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.start() ); serializeIndexedFSs(*crCAS.getBaseCas(), iv_vecIndexedFSs); UIMA_SERIALIZATION_TIMING( iv_timerIndexedFSs.stop() ); // get a heap of references uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap; uima::lowlevel::FSHeap::TyStringHeap const & tyStringHeap = crHeap.iv_clTemporaryStringHeap; uima::lowlevel::FSHeap::TyStringRefHeap const & tyStringRefHeap = crHeap.iv_clTemporaryStringRefHeap; uima::lowlevel::FSHeap::Ty8BitHeap const & ty8BitHeap = crHeap.iv_clTemporary8BitHeap; uima::lowlevel::FSHeap::Ty16BitHeap const & ty16BitHeap = crHeap.iv_clTemporary16BitHeap; uima::lowlevel::FSHeap::Ty64BitHeap const & ty64BitHeap = crHeap.iv_clTemporary64BitHeap; size_t uiFSHeapLength = tyTempHeap.getTopOfHeap(); size_t uiStringHeapLength = tyStringHeap.getTopOfHeap(); size_t uialignedStrLen = 2 * ((uiStringHeapLength + 1)/2); size_t uiRefHeapLength = tyStringRefHeap.getTopOfHeap(); size_t uiIndexedFSLength = iv_vecIndexedFSs.size(); size_t ui8BitHeapLength = ty8BitHeap.getTopOfHeap(); size_t uialigned8BitHeapLength = 4 * ((ui8BitHeapLength+3)/4); size_t ui16BitHeapLength = ty16BitHeap.getTopOfHeap(); size_t uialigned16BitHeapLength = 2 * ((ui16BitHeapLength+1)/2); size_t ui64BitHeapLength = ty64BitHeap.getTopOfHeap(); size_t blobSize = 2*4 // key and version + (1 + uiFSHeapLength) * 4 // FSHeap length and data + 1*4 + (uialignedStrLen * 2) // StringHeap length and data + (1 + uiRefHeapLength) * 4 // StringRefheap length and data + (1 + uiIndexedFSLength) * 4 // Indexed FS length and data + (1*4 + uialigned8BitHeapLength) // 8 Bit Heap length and data + (1*4 + uialigned16BitHeapLength*2 ) //16 Bit Heap length and data + (1*4 + ui64BitHeapLength*8 ); //64 Bit Heap length and data return blobSize; } // serialize CAS data into single blob format size_t CASSerializer::getBlob(uima::CAS & crCAS, void * buffer, size_t maxSize) { UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.reset() ); UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.start() ); // get a heap of references uima::internal::CASImpl const & crCASImpl = uima::internal::CASImpl::promoteCAS(crCAS); uima::lowlevel::FSHeap const & crHeap = crCASImpl.getHeap(); uima::lowlevel::FSHeap::TyFSHeap const & tyTempHeap = crHeap.iv_clTemporaryHeap; uima::lowlevel::FSHeap::TyStringHeap const & tyStringHeap = crHeap.iv_clTemporaryStringHeap; uima::lowlevel::FSHeap::TyStringRefHeap const & tyStringRefHeap = crHeap.iv_clTemporaryStringRefHeap; uima::lowlevel::FSHeap::Ty8BitHeap const & ty8BitHeap = crHeap.iv_clTemporary8BitHeap; uima::lowlevel::FSHeap::Ty16BitHeap const & ty16BitHeap = crHeap.iv_clTemporary16BitHeap; uima::lowlevel::FSHeap::Ty64BitHeap const & ty64BitHeap = crHeap.iv_clTemporary64BitHeap; size_t uiFSHeapLength = tyTempHeap.getTopOfHeap(); size_t uiStringHeapLength = tyStringHeap.getTopOfHeap(); size_t uialignedStrLen = 2 * ((uiStringHeapLength + 1)/2); size_t uiRefHeapLength = tyStringRefHeap.getTopOfHeap(); size_t uiIndexedFSLength = iv_vecIndexedFSs.size(); size_t ui8BitHeapLength = ty8BitHeap.getTopOfHeap(); size_t uialigned8BitHeapLength = 4 * ((ui8BitHeapLength+3)/4); size_t ui16BitHeapLength = ty16BitHeap.getTopOfHeap(); size_t uialigned16BitHeapLength = 2 * ((ui16BitHeapLength+1)/2); size_t ui64BitHeapLength = ty64BitHeap.getTopOfHeap(); size_t blobSize = 2*4 // key and version + (1 + uiFSHeapLength) * 4 // FSHeap length and data + 1*4 + (uialignedStrLen * 2) // StringHeap length and data + (1 + uiRefHeapLength) * 4 // StringRefheap length and data + (1 + uiIndexedFSLength) * 4 // Indexed FS length and data + (1*4 + uialigned8BitHeapLength) // 8 Bit Heap length and data + (1*4 + uialigned16BitHeapLength*2 ) //16 Bit Heap length and data + (1*4 + ui64BitHeapLength*8 ); //64 Bit Heap length and data if (blobSize > maxSize) { return 0; // can't serialize into given buffer } // copy all data into the blob buffer int* intPtr = (int*) buffer; #if defined(WORDS_BIGENDIAN) char key[] = "UIMA"; #else char key[] = "AMIU"; #endif int version = 1; intPtr[0] = ((int*)key)[0]; intPtr[1] = version; intPtr[2] = uiFSHeapLength; assert (blobSize > (size_t)((intPtr + 3 + uiFSHeapLength) - (int*)buffer)); memcpy(intPtr+3, tyTempHeap.getHeapStart(), 4*uiFSHeapLength); intPtr += 3 + uiFSHeapLength; intPtr[0] = uialignedStrLen; assert (blobSize > (size_t)((intPtr + 1 + uiStringHeapLength/2) - (int*)buffer)); memcpy(intPtr+1, tyStringHeap.getHeapStart(), 2*uiStringHeapLength); intPtr += 1 + uialignedStrLen/2; intPtr[0] = uiRefHeapLength; assert (blobSize > (size_t)((intPtr + 1 + uiRefHeapLength) - (int*)buffer)); memcpy(intPtr+1, tyStringRefHeap.getHeapStart(), 4*uiRefHeapLength); intPtr += 1 + uiRefHeapLength; intPtr[0] = uiIndexedFSLength; assert (blobSize >= (size_t)((intPtr + 1 + uiIndexedFSLength) - (int*)buffer)); memcpy(intPtr+1, &iv_vecIndexedFSs[0], 4*uiIndexedFSLength); intPtr += 1 + uiIndexedFSLength; intPtr[0] = uialigned8BitHeapLength; assert (blobSize > (size_t)((intPtr + 1 + uialigned8BitHeapLength/4) - (int*)buffer)); memcpy(intPtr+1, ty8BitHeap.getHeapStart(), ui8BitHeapLength); intPtr += 1 + uialigned8BitHeapLength/4; intPtr[0] = uialigned16BitHeapLength; assert (blobSize > (size_t)((intPtr + 1 + ui16BitHeapLength/2) - (int*)buffer)); memcpy(intPtr+1, ty16BitHeap.getHeapStart(), 2*ui16BitHeapLength); intPtr += 1 + uialigned16BitHeapLength/2; intPtr[0] = ui64BitHeapLength; assert (blobSize > (size_t)((intPtr + 1 + ui64BitHeapLength*2) - (int*)buffer)); memcpy(intPtr+1, ty64BitHeap.getHeapStart(), 8*ui64BitHeapLength); UIMA_SERIALIZATION_TIMING( iv_timerFSHeap.stop() ); return blobSize; } } } /* ----------------------------------------------------------------------- */