private SerialFormat binaryDeserialization()

in uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java [653:1003]


  private SerialFormat binaryDeserialization(Header h) {

    final boolean delta = h.isDelta;

    final Reading r = h.reading;

    final DataInputStream dis = r.dis;

    final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(baseCas, delta);

    if (delta) {
      if (nextHeapAddrAfterMark == 0 || heap == null || heap.getCellsUsed() <= 1) {
        Misc.internalError(); // can't deserialize without a previous binary serialization for this
                              // CAS
      }
    } else {
      if (heap == null) {
        heap = new Heap();
      } else {
        heap.reset();
      }
      if (byteHeap == null) {
        byteHeap = new ByteHeap();
      } else {
        byteHeap.reset();
      }
      if (shortHeap == null) {
        shortHeap = new ShortHeap();
      } else {
        shortHeap.reset();
      }
      if (longHeap == null) {
        longHeap = new LongHeap();
      } else {
        longHeap.reset();
      }
      if (stringHeap == null) {
        stringHeap = new StringHeap();
      } else {
        stringHeap.reset();
      }
      clearDeltaOffsets();
    }

    try {
      // main fsheap
      final int fsheapsz = r.readInt();

      // reading the 0th (null) element, because that's what V2 did
      int startPos = 0;
      if (!delta) {
        heap.reinitSizeOnly(fsheapsz);
      } else {
        startPos = heap.getNextId();
        heap.grow(fsheapsz);
      }
      if (TRACE_DESER) {
        System.out.format("BinDes Plain %s startPos: %,d mainHeapSize: %d%n", delta ? "Delta" : "",
                startPos, fsheapsz);
      }

      // add new heap slots
      for (int i = startPos; i < fsheapsz + startPos; i++) {
        heap.heap[i] = r.readInt();
        // if (TRACE_DESER) {
        // if (i < 101 + startPos) {
        // if (i % 5 == 0) System.out.format("%n i: %4d ", i);
        // System.out.format("%,15d ", heap.heap[i]);
        // }
        // }
      }
      // if (TRACE_DESER) System.out.println("");

      // string heap
      int stringheapsz = r.readInt();

      final StringHeapDeserializationHelper shdh = new StringHeapDeserializationHelper();

      shdh.charHeap = new char[stringheapsz];
      for (int i = 0; i < stringheapsz; i++) {
        shdh.charHeap[i] = (char) r.readShort();
      }
      shdh.charHeapPos = stringheapsz;

      // word alignment
      if (stringheapsz % 2 != 0) {
        dis.readChar();
      }

      // string ref heap
      int refheapsz = r.readInt();

      refheapsz--;
      refheapsz = refheapsz / 2;
      refheapsz = refheapsz * 3;

      // read back into references consisting of three ints
      // --stringheap offset, length, stringlist offset
      shdh.refHeap = new int[StringHeapDeserializationHelper.FIRST_CELL_REF + refheapsz];

      dis.readInt(); // 0
      for (int i = shdh.refHeapPos; i < shdh.refHeap.length; i += StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) {
        shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET] = r.readInt();
        shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET] = r.readInt();
        shdh.refHeap[i + StringHeapDeserializationHelper.STRING_LIST_ADDR_OFFSET] = 0;
      }
      shdh.refHeapPos = refheapsz + StringHeapDeserializationHelper.FIRST_CELL_REF;

      stringHeap.reinit(shdh, delta);

      final int fsmodssz2;
      final int[] modWords;
      // if delta, handle modified fs heap cells
      if (delta) {
        // Delta Binary Deserialization
        //
        // At this point, we have
        // - not yet converted the main heap array into FSs.
        // - not yet read in Aux Heaps (except for strings)
        //
        // So, we do this in 2 phases.
        // - This phase just reads in the data but does not act on it.
        // - Phase 2 happens after the FSs are created from the heap data.
        fsmodssz2 = 2 * r.readInt();
        modWords = new int[fsmodssz2];

        for (int i = 0; i < fsmodssz2; i++) {
          modWords[i] = r.readInt();
        }
        if (TRACE_DESER) {
          System.out.format("BinDes modified heap slot count: %,d%n", fsmodssz2 / 2);
        }
      } else {
        fsmodssz2 = 0; // not used but must be set to make "final" work
        modWords = null;
      }

      // indexed FSs
      int fsindexsz = r.readInt();
      int[] fsindexes = new int[fsindexsz];
      if (TRACE_DESER) {
        System.out.format("BinDes indexedFSs count: %,d%n", fsindexsz);
      }
      for (int i = 0; i < fsindexsz; i++) {
        fsindexes[i] = r.readInt();
        if (TRACE_DESER) {
          if (i % 5 == 0) {
            System.out.format("%n i: %5d ", i);
          }
          System.out.format("%15d ", fsindexes[i]);
        }
      }
      if (TRACE_DESER) {
        System.out.println("");
      }

      // byte heap
      int heapsz = r.readInt();
      if (TRACE_DESER) {
        System.out.format("BinDes ByteHeap size: %,d%n", heapsz);
      }

      if (!delta) {
        byteHeap.heap = new byte[Math.max(16, heapsz)]; // must be > 0
        dis.readFully(byteHeap.heap, 0, heapsz);
        byteHeap.heapPos = heapsz;
      } else {
        final int offset2startOfNewBytes = byteHeap.reserve(heapsz);
        dis.readFully(byteHeap.heap, offset2startOfNewBytes, heapsz);
      }
      // word alignment
      int align = (4 - (heapsz % 4)) % 4;
      BinaryCasSerDes6.skipBytes(dis, align);

      // short heap
      heapsz = r.readInt();
      if (TRACE_DESER) {
        System.out.format("BinDes ShortHeap size: %,d%n", heapsz);
      }

      if (!delta) {
        shortHeap.heap = new short[Math.max(16, heapsz)]; // must be > 0
        for (int i = 0; i < heapsz; i++) {
          shortHeap.heap[i] = r.readShort();
        }
        shortHeap.heapPos = heapsz;
      } else {
        final int pos = shortHeap.reserve(heapsz);
        final int end = pos + heapsz;
        for (int i = pos; i < end; i++) {
          shortHeap.addShort(r.readShort());
        }
      }
      // word alignment
      if (heapsz % 2 != 0) {
        dis.readShort();
      }

      // long heap
      heapsz = r.readInt();
      if (TRACE_DESER) {
        System.out.format("BinDes LongHeap size: %,d%n", heapsz);
      }

      if (!delta) {
        longHeap.heap = new long[Math.max(16, heapsz)]; // must be > 0
        for (int i = 0; i < heapsz; i++) {
          longHeap.heap[i] = r.readLong();
        }
        longHeap.heapPos = heapsz;
      } else {
        longHeap.reserve(heapsz);
        for (int i = 0; i < heapsz; i++) {
          longHeap.addLong(r.readLong());
        }
      }

      if (delta) {
        // The modifications are all to existing FSs.
        // The modifications consist of an address (offset in an aux array) which is an array
        // element.
        // We don't update the aux array, but instead update the actual FS below the line
        // representing the array.
        // To identify the fs, we use the xxAuxAddr2fsa sorted list forms and do a binary search to
        // find the item to update,
        // with a fast path for the same or next item.
        // Same - use case is multiple updates into the same array

        // modified Byte Heap
        heapsz = updateAuxArrayMods(r, byteAuxAddr2fsa, (ba, arrayIndex) -> {
          if (ba instanceof ByteArray byteArray) {
            byteArray.set(arrayIndex, dis.readByte());
          } else {
            ((BooleanArray) ba).set(arrayIndex, dis.readByte() == 1);
          }
        });

        // word alignment
        align = (4 - (heapsz % 4)) % 4;
        BinaryCasSerDes6.skipBytes(dis, align);

        // modified Short Heap
        heapsz = updateAuxArrayMods(r, shortAuxAddr2fsa, (sa, arrayIndex) -> {
          ((ShortArray) sa).set(arrayIndex, r.readShort());
        });

        // word alignment
        if (heapsz % 2 != 0) {
          dis.readShort();
        }

        // modified Long Heap
        updateAuxArrayMods(r, longAuxAddr2fsa, (la, arrayIndex) -> {
          if (la instanceof LongArray longArray) {
            longArray.set(arrayIndex, r.readLong());
          } else {
            ((DoubleArray) la).set(arrayIndex, CASImpl.long2double(r.readLong()));
          }
        });
      } // of delta - modified processing

      // *********************************************
      // Convert model heap added FS into real FS *
      // update addr2fs and fs2addr *
      // update byte/short/long/string auxAddr2fsa *
      // *********************************************

      // build the new FSs and record in addr2FSs
      createFSsFromHeaps(delta, startPos == 0 ? 1 : startPos, csds);

      if (delta) {
        final BinDeserSupport bds = new BinDeserSupport();

        bds.fssAddrArray = new int[csds.addr2fs.size() + 1]; // need one extra at the end
        IntListIterator it = csds.addr2fs.keyIterator();
        int iaa = 0;
        while (it.hasNext()) {
          bds.fssAddrArray[iaa++] = it.nextNvc();
        }
        // iaa at this point refs the last entry in the table
        bds.fssAddrArray[iaa] = heap.getCellsUsed();
        Arrays.sort(bds.fssAddrArray); // because addr2fs.keyIterator is arbitrary order due to hash
                                       // table impl
        assert (bds.fssAddrArray[iaa] == heap.getCellsUsed());

        bds.fsStartAddr = -1; // avoid initial addback of addback/remove pair.

        // loop over all heap modifications to existing FSs

        // first disable auto addbacks for index corruption - this routine is handling that
        baseCas.svd.disableAutoCorruptionCheck = true;

        try {
          for (int i = 0; i < modWords.length; i = i + 2) {
            final int heapAddrBeingModified = modWords[i];
            bds.maybeAddBackAndRemoveFs(heapAddrBeingModified, csds.addr2fs);
            updateHeapSlot(bds, heapAddrBeingModified, modWords[i + 1], csds.addr2fs);
            // heap.heap[heapAddrBeingModified] = r.readInt();
          }
          bds.addBackIfRemoved();
          bds.fssAddrArray = null; // free storage
        } finally {
          baseCas.svd.disableAutoCorruptionCheck = false;
        }
      }

      // update the indexes
      IntFunction<TOP> getFsFromAddr = csds.addr2fs::get;
      reinitIndexedFSs(fsindexes, delta, getFsFromAddr);

      if (!delta) {
        setHeapExtents();
        csds.setHeapEnd(nextHeapAddrAfterMark);
      }

      // cleanup at the end of Binary Deserialization, both Delta and full
      // saved and not cleaned up
      // because needed by subsequent delta serialization:
      // fs2addr, addr2fs,
      // byte/short/longAuxAddr2fsa
      // next[xx]HeapAddrAfterMark

      // heaps cleaned up because after a full deser, a subsequent ser only populates these
      // with the new items.
      heap = null;
      stringHeap = null;
      byteHeap = null;
      shortHeap = null;
      longHeap = null;

      // cleared because only used for delta deser, for mods, and mods not allowed for multiple
      // deltas
      clearAuxAddr2fsa();
    } catch (IOException e) {
      String msg = e.getMessage();
      if (msg == null) {
        msg = e.toString();
      }
      throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION, msg);
    } finally {
      // safety - not currently logically needed, since this field is only used in the case it has
      // been set
      // (when there is no previous documentAnnotation, and we're adding a new one, and
      // it may not be indexed)
      //
      // If it is not indexed, removing this will allow the FS to garbage collected.
      baseCas.forAllViews(view -> view.set_deserialized_doc_annot_not_indexed(null));
    }

    return h.typeSystemIndexDefIncluded ? SerialFormat.BINARY_TSI : SerialFormat.BINARY;
  }