private FileNode deserializeFileNode()

in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java [352:713]


    private FileNode deserializeFileNode(FileNode data, FileNodePtr curPath)
            throws IOException, TikaException {
        OneNotePtr backup = new OneNotePtr(this);
        long reserved;

        data.isFileData = false;
        data.gosid = ExtendedGUID.nil();
        long fileNodeHeader = deserializeLittleEndianInt();
        data.id = fileNodeHeader & 0x3ff;
        if (data.id == 0) {
            return data;
        }
        LOG.debug("{}Start Node {} ({}) - Offset={}, End={}", getIndent(),
                FndStructureConstants.nameOf(data.id), data.id, offset, end);

        ++indentLevel;

        data.size = (fileNodeHeader >> 10) & 0x1fff;
        // reset the size to only be in scope of this FileNode
        end = backup.offset + data.size;

        long stpFormat = (fileNodeHeader >> 23) & 0x3;
        long cbFormat = (fileNodeHeader >> 25) & 0x3;
        data.baseType = (fileNodeHeader >> 27) & 0xf;
        reserved = (fileNodeHeader >> 31);
        data.ref = FileChunkReference.nil();
        if (data.baseType == 1 || data.baseType == 2) {
            data.ref = deserializeVarFileChunkReference(stpFormat, cbFormat);
        } // otherwise ignore the data ref, since we're a type 0
        if (data.baseType == 1 && !data.ref.equals(FileChunkReference.nil())) {
            OneNotePtr content = new OneNotePtr(this);
            content.reposition(data.ref);
            // would have thrown an error if invalid.
        }
        if (data.id == FndStructureConstants.ObjectGroupStartFND) {
            data.idDesc = "oid(group)";
            data.gosid = deserializeExtendedGUID();
        } else if (data.id == FndStructureConstants.ObjectGroupEndFND) {
            // no data
        } else if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND ||
                data.id == FndStructureConstants.ObjectSpaceManifestListStartFND) {
            if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND) {
                data.idDesc = "gosidRoot";
            } else {
                data.idDesc = "gosid";
            }
            // Specifies the identity of the object space being specified by this object
            // space manifest list. MUST match the ObjectSpaceManifestListReferenceFND.gosid
            // field of the FileNode structure that referenced
            // this file node list.
            data.gosid = deserializeExtendedGUID();
            //LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
        } else if (data.id == FndStructureConstants.ObjectSpaceManifestListReferenceFND) {
            data.gosid = deserializeExtendedGUID();
            data.idDesc = "gosid";
            //LOG.debug("{}gosid {}", getIndent(),data.gosid.toString().c_str());
            //children parsed in generic base_type 2 parser
        } else if (data.id == FndStructureConstants.RevisionManifestListStartFND) {
            data.gosid = deserializeExtendedGUID();
            data.idDesc = "gosid";
            FileNodePtr parentPath = new FileNodePtr(curPath);
            parentPath.nodeListPositions.remove(parentPath.nodeListPositions.size() - 1);
            document.registerRevisionManifestList(data.gosid, parentPath);

            //LOG.debug("{}gosid {}", getIndent(),data.gosid.toString().c_str());
            data.subType.revisionManifestListStart.nInstanceIgnored = deserializeLittleEndianInt();
        } else if (data.id == FndStructureConstants.RevisionManifestStart4FND) {
            data.gosid = deserializeExtendedGUID(); // the rid
            data.idDesc = "rid";
            //LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
            data.subType.revisionManifest.ridDependent = deserializeExtendedGUID(); // the rid
            LOG.debug("{}dependent gosid {}", getIndent(),
                    data.subType.revisionManifest.ridDependent);
            data.subType.revisionManifest.timeCreation = deserializeLittleEndianLong();
            data.subType.revisionManifest.revisionRole = deserializeLittleEndianInt();
            data.subType.revisionManifest.odcsDefault = deserializeLittleEndianShort();

            data.gctxid = ExtendedGUID.nil();
            document.registerRevisionManifest(data);
        } else if (data.id == FndStructureConstants.RevisionManifestStart6FND ||
                data.id == FndStructureConstants.RevisionManifestStart7FND) {
            data.gosid = deserializeExtendedGUID(); // the rid
            data.idDesc = "rid";
            //LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
            data.subType.revisionManifest.ridDependent = deserializeExtendedGUID(); // the rid
            LOG.debug("{}dependent gosid {}", getIndent(),
                    data.subType.revisionManifest.ridDependent);
            data.subType.revisionManifest.revisionRole = deserializeLittleEndianInt();
            data.subType.revisionManifest.odcsDefault = deserializeLittleEndianShort();

            data.gctxid = ExtendedGUID.nil();
            if (data.id == FndStructureConstants.RevisionManifestStart7FND) {
                data.gctxid = deserializeExtendedGUID(); // the rid
            }
            document.registerAdditionalRevisionRole(data.gosid,
                    data.subType.revisionManifest.revisionRole, data.gctxid);
            document.registerRevisionManifest(data);
        } else if (data.id == FndStructureConstants.GlobalIdTableStartFNDX) {
            data.subType.globalIdTableStartFNDX.reserved = deserializeLittleEndianChar();

        } else if (data.id == FndStructureConstants.GlobalIdTableEntryFNDX) {
            data.subType.globalIdTableEntryFNDX.index = deserializeLittleEndianInt();

            data.subType.globalIdTableEntryFNDX.guid = deserializeGUID();

            document.revisionMap.get(document.currentRevision).globalId.put(
                    data.subType.globalIdTableEntryFNDX.index,
                    data.subType.globalIdTableEntryFNDX.guid);
        } else if (data.id == FndStructureConstants.GlobalIdTableEntry2FNDX) {
            data.subType.globalIdTableEntry2FNDX.indexMapFrom = deserializeLittleEndianInt();
            data.subType.globalIdTableEntry2FNDX.indexMapTo = deserializeLittleEndianInt();

            ExtendedGUID dependentRevision =
                    document.revisionMap.get(document.currentRevision).dependent;
            // Get the compactId from the revisionMap's globalId map.
            GUID compactId = document.revisionMap.get(dependentRevision).globalId.get(
                    data.subType.globalIdTableEntry2FNDX.indexMapFrom);
            if (compactId == null) {
                throw new TikaException("COMPACT_ID_MISSING");
            }
            document.revisionMap.get(document.currentRevision).globalId.put(
                    data.subType.globalIdTableEntry2FNDX.indexMapTo, compactId);
        } else if (data.id == FndStructureConstants.GlobalIdTableEntry3FNDX) {
            data.subType.globalIdTableEntry3FNDX.indexCopyFromStart = deserializeLittleEndianInt();

            data.subType.globalIdTableEntry3FNDX.entriesToCopy = deserializeLittleEndianInt();

            data.subType.globalIdTableEntry3FNDX.indexCopyToStart = deserializeLittleEndianInt();

            ExtendedGUID dependent_revision =
                    document.revisionMap.get(document.currentRevision).dependent;
            for (int i = 0; i < data.subType.globalIdTableEntry3FNDX.entriesToCopy; ++i) {
                Map<Long, GUID> globalIdMap = document.revisionMap.get(dependent_revision).globalId;
                GUID compactId = globalIdMap.get(
                        data.subType.globalIdTableEntry3FNDX.indexCopyFromStart + i);
                if (compactId == null) {
                    throw new TikaException("COMPACT_ID_MISSING");
                }
                document.revisionMap.get(document.currentRevision).globalId.put(
                        data.subType.globalIdTableEntry3FNDX.indexCopyToStart + i, compactId);
            }
        } else if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX ||
                data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCount2FNDX) {
            data.subType.objectRevisionWithRefCountFNDX.oid = deserializeCompactID(); // the oid

            if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX) {
                int ref = deserializeLittleEndianChar();

                data.subType.objectRevisionWithRefCountFNDX.hasOidReferences = ref & 1;
                data.subType.objectRevisionWithRefCountFNDX.hasOsidReferences = ref & 2;
                data.subType.objectRevisionWithRefCountFNDX.cRef = (ref >> 2);
            } else {
                long ref = deserializeLittleEndianInt();

                data.subType.objectRevisionWithRefCountFNDX.hasOidReferences = ref & 1;
                data.subType.objectRevisionWithRefCountFNDX.hasOsidReferences = ref & 2;
                if ((ref >> 2) != 0) {
                    throw new TikaException("Reserved non-zero");
                }
                data.subType.objectRevisionWithRefCountFNDX.cRef = deserializeLittleEndianInt();
            }
        } else if (data.id == FndStructureConstants.RootObjectReference2FNDX) {
            data.subType.rootObjectReference.oidRoot = deserializeCompactID();

            data.idDesc = "oidRoot";
            data.gosid = data.subType.rootObjectReference.oidRoot.guid;
            data.subType.rootObjectReference.rootObjectReferenceBase.rootRole =
                    deserializeLittleEndianInt();

            LOG.debug("{}Root role {}", getIndent(),
                    data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
        } else if (data.id == FndStructureConstants.RootObjectReference3FND) {
            data.idDesc = "oidRoot";
            data.gosid = deserializeExtendedGUID();

            data.subType.rootObjectReference.rootObjectReferenceBase.rootRole =
                    deserializeLittleEndianInt();

            LOG.debug("{}Root role {}", getIndent(),
                    data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
        } else if (data.id == FndStructureConstants.RevisionRoleDeclarationFND ||
                data.id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
            data.gosid = deserializeExtendedGUID();

            data.subType.revisionRoleDeclaration.revisionRole = deserializeLittleEndianInt();

            if (data.id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
                data.gctxid = deserializeExtendedGUID();

            }
            document.registerAdditionalRevisionRole(data.gosid,
                    data.subType.revisionRoleDeclaration.revisionRole, data.gctxid);
            // FIXME: deal with ObjectDataEncryptionKey
        } else if (data.id == FndStructureConstants.ObjectInfoDependencyOverridesFND) {
            OneNotePtr content = new OneNotePtr(this);
            if (!data.ref.equals(FileChunkReference.nil())) {
                content.reposition(data.ref); // otherwise it's positioned right at this node
            }
            data.subType.objectInfoDependencyOverrides.data =
                    content.deserializeObjectInfoDependencyOverrideData();
        } else if (data.id == FndStructureConstants.FileDataStoreListReferenceFND) {
            // already processed this
        } else if (data.id == FndStructureConstants.FileDataStoreObjectReferenceFND) {
            FileChunkReference ref = deserializeFileChunkReference64();
            GUID guid = deserializeGUID();
            ExtendedGUID extendedGuid = new ExtendedGUID(guid, 0);
            LOG.trace("found extended guid {}", extendedGuid);
            document.guidToRef.put(extendedGuid, ref);
            OneNotePtr fileDataStorePtr = new OneNotePtr(this);
            fileDataStorePtr.reposition(data.ref);

            data.subType.fileDataStoreObjectReference.ref =
                    fileDataStorePtr.deserializeFileDataStoreObject();

        } else if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
                data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX ||
                data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND ||
                data.id == FndStructureConstants.CanRevise.ObjectDeclaration2LargeRefCountFND ||
                data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND ||
                data.id ==
                        FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
            data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = false;
            if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
                    data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX) {
                data.subType.objectDeclarationWithRefCount.body =
                        deserializeObjectDeclarationWithRefCountBody();
            } else { // one of the other 4 that use the ObjectDeclaration2Body
                data.subType.objectDeclarationWithRefCount.body =
                        deserializeObjectDeclaration2Body();
            }
            if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
                    data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND ||
                    data.id ==
                            FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND) {
                data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianChar();
            } else {
                data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianInt();
            }

            if (data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND ||
                    data.id ==
                            FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
                ByteBuffer md5Buffer = ByteBuffer.allocate(16);
                deserializeBytes(md5Buffer);
                data.subType.objectDeclarationWithRefCount.readOnly.md5 = md5Buffer.array();
            }
            data.idDesc = "oid";
            postprocessObjectDeclarationContents(data, curPath);

            LOG.debug("{}Ref Count JCID {}", getIndent(),
                    data.subType.objectDeclarationWithRefCount.body.jcid);
        } else if (
                data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND ||
                        data.id ==
                                FndStructureConstants.CanRevise.ObjectDeclarationFileData3LargeRefCountFND) {
            data.subType.objectDeclarationWithRefCount.body.oid = deserializeCompactID();

            long jcid = deserializeLittleEndianInt();

            data.subType.objectDeclarationWithRefCount.body.jcid.loadFrom32BitIndex(jcid);

            if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND) {
                data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianChar();
            } else {
                data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianInt();
            }

            long cch = deserializeLittleEndianInt();

            long roomLeftLong = roomLeft();
            if (cch > roomLeftLong) { // not a valid guid
                throw new TikaException(
                        "Data out of bounds - cch " + cch + " is > room left = " + roomLeftLong);
            }

            if (cch > dif.size()) {
                throw new TikaMemoryLimitException(
                        "CCH=" + cch + " was found that was greater" + " than file size " +
                                dif.size());
            }
            ByteBuffer dataSpaceBuffer = ByteBuffer.allocate((int) cch * 2);
            dif.read(dataSpaceBuffer);
            byte[] dataSpaceBufferBytes = dataSpaceBuffer.array();
            offset += dataSpaceBufferBytes.length;
            if (dataSpaceBufferBytes.length == (IFNDF_GUID_LENGTH * 2 + IFNDF.length) &&
                    Arrays.equals(IFNDF,
                            Arrays.copyOfRange(dataSpaceBufferBytes, 0, IFNDF.length))) {
                data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = true;
                GUID guid = GUID.fromCurlyBraceUTF16Bytes(
                        Arrays.copyOfRange(dataSpaceBufferBytes, IFNDF.length,
                                dataSpaceBufferBytes.length));
                ExtendedGUID extendedGUID = new ExtendedGUID(guid, 0);
                FileChunkReference fileChunk = document.getAssocGuidToRef(extendedGUID);
                if (fileChunk == null) {
                    LOG.debug("{} have not seen GUID {} yet", getIndent(), extendedGUID);
                } else {
                    // TODO - call postprocessObjectDeclarationContents on this object?
                }
            } else {
                LOG.debug("{}Ignoring an external reference {}", getIndent(),
                        new String(dataSpaceBufferBytes, StandardCharsets.UTF_16LE));
            }
        } else if (data.id == FndStructureConstants.ObjectGroupListReferenceFND) {
            data.idDesc = "object_group_id";
            data.gosid = deserializeExtendedGUID(); // the object group id

            // the ref populates the FileNodeList children
        } else if (data.id == FndStructureConstants.ObjectGroupStartFND) {
            data.idDesc = "object_group_id";
            data.gosid = deserializeExtendedGUID(); // the oid

        } else if (data.id == FndStructureConstants.ObjectGroupEndFND) {
            // nothing to see here
        } else if (data.id == FndStructureConstants.DataSignatureGroupDefinitionFND) {
            data.idDesc = "data_sig";
            data.gosid = deserializeExtendedGUID(); // the DataSignatureGroup

        } else if (data.id == FndStructureConstants.RevisionManifestListReferenceFND) {
            document.revisionMap.putIfAbsent(document.currentRevision, new Revision());
            Revision currentRevision = document.revisionMap.get(document.currentRevision);
            currentRevision.manifestList.add(curPath);
        } else {
            LOG.debug(
                    "No fnd needed to be parsed for data.id=0x" + Long.toHexString(data.id) + " (" +
                            FndStructureConstants.nameOf(data.id) + ")");
        }
        if (data.baseType == 2) {
            // Generic baseType == 2 parser - means we have children to parse.
            OneNotePtr subList = new OneNotePtr(this);
            // position the subList pointer to the data.ref and deserialize recursively.
            subList.reposition(data.ref);
            subList.deserializeFileNodeList(data.childFileNodeList, curPath);
        }

        offset = backup.offset + data.size;
        end = backup.end;

        if (reserved != 1) {
            throw new TikaException("RESERVED_NONZERO");
        }

        if (data.baseType == 1 && !(data.ref.equals(FileChunkReference.nil()))) {
            document.setAssocGuidToRef(data.gosid, data.ref);
            OneNotePtr content = new OneNotePtr(this);
            content.reposition(data.ref);
            if (data.hasGctxid()) {
                LOG.debug("{}gctxid {}", getIndent(), data.gctxid);
            }
        } else if (!data.gosid.equals(ExtendedGUID.nil())) {
            LOG.trace("Non base type == 1 guid {}", data.gosid);
        }
        --indentLevel;
        if (data.gosid.equals(ExtendedGUID.nil())) {
            LOG.debug("{}End Node {} ({}) - Offset={}, End={}", getIndent(),
                    FndStructureConstants.nameOf(data.id), (int) data.id, offset, end);
        } else {
            LOG.debug("{}End Node {} ({}) {}:[{}] - Offset={}, End={}", getIndent(),
                    FndStructureConstants.nameOf(data.id), (int) data.id, data.idDesc, data.gosid,
                    offset, end);
        }
        return data;
    }