in uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java [653:1003]
private SerialFormat binaryDeserialization(Header h) {
final boolean delta = h.isDelta;
final Reading r = h.reading;
final DataInputStream dis = r.dis;
final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(baseCas, delta);
if (delta) {
if (nextHeapAddrAfterMark == 0 || heap == null || heap.getCellsUsed() <= 1) {
Misc.internalError(); // can't deserialize without a previous binary serialization for this
// CAS
}
} else {
if (heap == null) {
heap = new Heap();
} else {
heap.reset();
}
if (byteHeap == null) {
byteHeap = new ByteHeap();
} else {
byteHeap.reset();
}
if (shortHeap == null) {
shortHeap = new ShortHeap();
} else {
shortHeap.reset();
}
if (longHeap == null) {
longHeap = new LongHeap();
} else {
longHeap.reset();
}
if (stringHeap == null) {
stringHeap = new StringHeap();
} else {
stringHeap.reset();
}
clearDeltaOffsets();
}
try {
// main fsheap
final int fsheapsz = r.readInt();
// reading the 0th (null) element, because that's what V2 did
int startPos = 0;
if (!delta) {
heap.reinitSizeOnly(fsheapsz);
} else {
startPos = heap.getNextId();
heap.grow(fsheapsz);
}
if (TRACE_DESER) {
System.out.format("BinDes Plain %s startPos: %,d mainHeapSize: %d%n", delta ? "Delta" : "",
startPos, fsheapsz);
}
// add new heap slots
for (int i = startPos; i < fsheapsz + startPos; i++) {
heap.heap[i] = r.readInt();
// if (TRACE_DESER) {
// if (i < 101 + startPos) {
// if (i % 5 == 0) System.out.format("%n i: %4d ", i);
// System.out.format("%,15d ", heap.heap[i]);
// }
// }
}
// if (TRACE_DESER) System.out.println("");
// string heap
int stringheapsz = r.readInt();
final StringHeapDeserializationHelper shdh = new StringHeapDeserializationHelper();
shdh.charHeap = new char[stringheapsz];
for (int i = 0; i < stringheapsz; i++) {
shdh.charHeap[i] = (char) r.readShort();
}
shdh.charHeapPos = stringheapsz;
// word alignment
if (stringheapsz % 2 != 0) {
dis.readChar();
}
// string ref heap
int refheapsz = r.readInt();
refheapsz--;
refheapsz = refheapsz / 2;
refheapsz = refheapsz * 3;
// read back into references consisting of three ints
// --stringheap offset, length, stringlist offset
shdh.refHeap = new int[StringHeapDeserializationHelper.FIRST_CELL_REF + refheapsz];
dis.readInt(); // 0
for (int i = shdh.refHeapPos; i < shdh.refHeap.length; i += StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) {
shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET] = r.readInt();
shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET] = r.readInt();
shdh.refHeap[i + StringHeapDeserializationHelper.STRING_LIST_ADDR_OFFSET] = 0;
}
shdh.refHeapPos = refheapsz + StringHeapDeserializationHelper.FIRST_CELL_REF;
stringHeap.reinit(shdh, delta);
final int fsmodssz2;
final int[] modWords;
// if delta, handle modified fs heap cells
if (delta) {
// Delta Binary Deserialization
//
// At this point, we have
// - not yet converted the main heap array into FSs.
// - not yet read in Aux Heaps (except for strings)
//
// So, we do this in 2 phases.
// - This phase just reads in the data but does not act on it.
// - Phase 2 happens after the FSs are created from the heap data.
fsmodssz2 = 2 * r.readInt();
modWords = new int[fsmodssz2];
for (int i = 0; i < fsmodssz2; i++) {
modWords[i] = r.readInt();
}
if (TRACE_DESER) {
System.out.format("BinDes modified heap slot count: %,d%n", fsmodssz2 / 2);
}
} else {
fsmodssz2 = 0; // not used but must be set to make "final" work
modWords = null;
}
// indexed FSs
int fsindexsz = r.readInt();
int[] fsindexes = new int[fsindexsz];
if (TRACE_DESER) {
System.out.format("BinDes indexedFSs count: %,d%n", fsindexsz);
}
for (int i = 0; i < fsindexsz; i++) {
fsindexes[i] = r.readInt();
if (TRACE_DESER) {
if (i % 5 == 0) {
System.out.format("%n i: %5d ", i);
}
System.out.format("%15d ", fsindexes[i]);
}
}
if (TRACE_DESER) {
System.out.println("");
}
// byte heap
int heapsz = r.readInt();
if (TRACE_DESER) {
System.out.format("BinDes ByteHeap size: %,d%n", heapsz);
}
if (!delta) {
byteHeap.heap = new byte[Math.max(16, heapsz)]; // must be > 0
dis.readFully(byteHeap.heap, 0, heapsz);
byteHeap.heapPos = heapsz;
} else {
final int offset2startOfNewBytes = byteHeap.reserve(heapsz);
dis.readFully(byteHeap.heap, offset2startOfNewBytes, heapsz);
}
// word alignment
int align = (4 - (heapsz % 4)) % 4;
BinaryCasSerDes6.skipBytes(dis, align);
// short heap
heapsz = r.readInt();
if (TRACE_DESER) {
System.out.format("BinDes ShortHeap size: %,d%n", heapsz);
}
if (!delta) {
shortHeap.heap = new short[Math.max(16, heapsz)]; // must be > 0
for (int i = 0; i < heapsz; i++) {
shortHeap.heap[i] = r.readShort();
}
shortHeap.heapPos = heapsz;
} else {
final int pos = shortHeap.reserve(heapsz);
final int end = pos + heapsz;
for (int i = pos; i < end; i++) {
shortHeap.addShort(r.readShort());
}
}
// word alignment
if (heapsz % 2 != 0) {
dis.readShort();
}
// long heap
heapsz = r.readInt();
if (TRACE_DESER) {
System.out.format("BinDes LongHeap size: %,d%n", heapsz);
}
if (!delta) {
longHeap.heap = new long[Math.max(16, heapsz)]; // must be > 0
for (int i = 0; i < heapsz; i++) {
longHeap.heap[i] = r.readLong();
}
longHeap.heapPos = heapsz;
} else {
longHeap.reserve(heapsz);
for (int i = 0; i < heapsz; i++) {
longHeap.addLong(r.readLong());
}
}
if (delta) {
// The modifications are all to existing FSs.
// The modifications consist of an address (offset in an aux array) which is an array
// element.
// We don't update the aux array, but instead update the actual FS below the line
// representing the array.
// To identify the fs, we use the xxAuxAddr2fsa sorted list forms and do a binary search to
// find the item to update,
// with a fast path for the same or next item.
// Same - use case is multiple updates into the same array
// modified Byte Heap
heapsz = updateAuxArrayMods(r, byteAuxAddr2fsa, (ba, arrayIndex) -> {
if (ba instanceof ByteArray byteArray) {
byteArray.set(arrayIndex, dis.readByte());
} else {
((BooleanArray) ba).set(arrayIndex, dis.readByte() == 1);
}
});
// word alignment
align = (4 - (heapsz % 4)) % 4;
BinaryCasSerDes6.skipBytes(dis, align);
// modified Short Heap
heapsz = updateAuxArrayMods(r, shortAuxAddr2fsa, (sa, arrayIndex) -> {
((ShortArray) sa).set(arrayIndex, r.readShort());
});
// word alignment
if (heapsz % 2 != 0) {
dis.readShort();
}
// modified Long Heap
updateAuxArrayMods(r, longAuxAddr2fsa, (la, arrayIndex) -> {
if (la instanceof LongArray longArray) {
longArray.set(arrayIndex, r.readLong());
} else {
((DoubleArray) la).set(arrayIndex, CASImpl.long2double(r.readLong()));
}
});
} // of delta - modified processing
// *********************************************
// Convert model heap added FS into real FS *
// update addr2fs and fs2addr *
// update byte/short/long/string auxAddr2fsa *
// *********************************************
// build the new FSs and record in addr2FSs
createFSsFromHeaps(delta, startPos == 0 ? 1 : startPos, csds);
if (delta) {
final BinDeserSupport bds = new BinDeserSupport();
bds.fssAddrArray = new int[csds.addr2fs.size() + 1]; // need one extra at the end
IntListIterator it = csds.addr2fs.keyIterator();
int iaa = 0;
while (it.hasNext()) {
bds.fssAddrArray[iaa++] = it.nextNvc();
}
// iaa at this point refs the last entry in the table
bds.fssAddrArray[iaa] = heap.getCellsUsed();
Arrays.sort(bds.fssAddrArray); // because addr2fs.keyIterator is arbitrary order due to hash
// table impl
assert (bds.fssAddrArray[iaa] == heap.getCellsUsed());
bds.fsStartAddr = -1; // avoid initial addback of addback/remove pair.
// loop over all heap modifications to existing FSs
// first disable auto addbacks for index corruption - this routine is handling that
baseCas.svd.disableAutoCorruptionCheck = true;
try {
for (int i = 0; i < modWords.length; i = i + 2) {
final int heapAddrBeingModified = modWords[i];
bds.maybeAddBackAndRemoveFs(heapAddrBeingModified, csds.addr2fs);
updateHeapSlot(bds, heapAddrBeingModified, modWords[i + 1], csds.addr2fs);
// heap.heap[heapAddrBeingModified] = r.readInt();
}
bds.addBackIfRemoved();
bds.fssAddrArray = null; // free storage
} finally {
baseCas.svd.disableAutoCorruptionCheck = false;
}
}
// update the indexes
IntFunction<TOP> getFsFromAddr = csds.addr2fs::get;
reinitIndexedFSs(fsindexes, delta, getFsFromAddr);
if (!delta) {
setHeapExtents();
csds.setHeapEnd(nextHeapAddrAfterMark);
}
// cleanup at the end of Binary Deserialization, both Delta and full
// saved and not cleaned up
// because needed by subsequent delta serialization:
// fs2addr, addr2fs,
// byte/short/longAuxAddr2fsa
// next[xx]HeapAddrAfterMark
// heaps cleaned up because after a full deser, a subsequent ser only populates these
// with the new items.
heap = null;
stringHeap = null;
byteHeap = null;
shortHeap = null;
longHeap = null;
// cleared because only used for delta deser, for mods, and mods not allowed for multiple
// deltas
clearAuxAddr2fsa();
} catch (IOException e) {
String msg = e.getMessage();
if (msg == null) {
msg = e.toString();
}
throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION, msg);
} finally {
// safety - not currently logically needed, since this field is only used in the case it has
// been set
// (when there is no previous documentAnnotation, and we're adding a new one, and
// it may not be indexed)
//
// If it is not indexed, removing this will allow the FS to garbage collected.
baseCas.forAllViews(view -> view.set_deserialized_doc_annot_not_indexed(null));
}
return h.typeSystemIndexDefIncluded ? SerialFormat.BINARY_TSI : SerialFormat.BINARY;
}