in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNotePtr.java [352:713]
private FileNode deserializeFileNode(FileNode data, FileNodePtr curPath)
throws IOException, TikaException {
OneNotePtr backup = new OneNotePtr(this);
long reserved;
data.isFileData = false;
data.gosid = ExtendedGUID.nil();
long fileNodeHeader = deserializeLittleEndianInt();
data.id = fileNodeHeader & 0x3ff;
if (data.id == 0) {
return data;
}
LOG.debug("{}Start Node {} ({}) - Offset={}, End={}", getIndent(),
FndStructureConstants.nameOf(data.id), data.id, offset, end);
++indentLevel;
data.size = (fileNodeHeader >> 10) & 0x1fff;
// reset the size to only be in scope of this FileNode
end = backup.offset + data.size;
long stpFormat = (fileNodeHeader >> 23) & 0x3;
long cbFormat = (fileNodeHeader >> 25) & 0x3;
data.baseType = (fileNodeHeader >> 27) & 0xf;
reserved = (fileNodeHeader >> 31);
data.ref = FileChunkReference.nil();
if (data.baseType == 1 || data.baseType == 2) {
data.ref = deserializeVarFileChunkReference(stpFormat, cbFormat);
} // otherwise ignore the data ref, since we're a type 0
if (data.baseType == 1 && !data.ref.equals(FileChunkReference.nil())) {
OneNotePtr content = new OneNotePtr(this);
content.reposition(data.ref);
// would have thrown an error if invalid.
}
if (data.id == FndStructureConstants.ObjectGroupStartFND) {
data.idDesc = "oid(group)";
data.gosid = deserializeExtendedGUID();
} else if (data.id == FndStructureConstants.ObjectGroupEndFND) {
// no data
} else if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND ||
data.id == FndStructureConstants.ObjectSpaceManifestListStartFND) {
if (data.id == FndStructureConstants.ObjectSpaceManifestRootFND) {
data.idDesc = "gosidRoot";
} else {
data.idDesc = "gosid";
}
// Specifies the identity of the object space being specified by this object
// space manifest list. MUST match the ObjectSpaceManifestListReferenceFND.gosid
// field of the FileNode structure that referenced
// this file node list.
data.gosid = deserializeExtendedGUID();
//LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
} else if (data.id == FndStructureConstants.ObjectSpaceManifestListReferenceFND) {
data.gosid = deserializeExtendedGUID();
data.idDesc = "gosid";
//LOG.debug("{}gosid {}", getIndent(),data.gosid.toString().c_str());
//children parsed in generic base_type 2 parser
} else if (data.id == FndStructureConstants.RevisionManifestListStartFND) {
data.gosid = deserializeExtendedGUID();
data.idDesc = "gosid";
FileNodePtr parentPath = new FileNodePtr(curPath);
parentPath.nodeListPositions.remove(parentPath.nodeListPositions.size() - 1);
document.registerRevisionManifestList(data.gosid, parentPath);
//LOG.debug("{}gosid {}", getIndent(),data.gosid.toString().c_str());
data.subType.revisionManifestListStart.nInstanceIgnored = deserializeLittleEndianInt();
} else if (data.id == FndStructureConstants.RevisionManifestStart4FND) {
data.gosid = deserializeExtendedGUID(); // the rid
data.idDesc = "rid";
//LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
data.subType.revisionManifest.ridDependent = deserializeExtendedGUID(); // the rid
LOG.debug("{}dependent gosid {}", getIndent(),
data.subType.revisionManifest.ridDependent);
data.subType.revisionManifest.timeCreation = deserializeLittleEndianLong();
data.subType.revisionManifest.revisionRole = deserializeLittleEndianInt();
data.subType.revisionManifest.odcsDefault = deserializeLittleEndianShort();
data.gctxid = ExtendedGUID.nil();
document.registerRevisionManifest(data);
} else if (data.id == FndStructureConstants.RevisionManifestStart6FND ||
data.id == FndStructureConstants.RevisionManifestStart7FND) {
data.gosid = deserializeExtendedGUID(); // the rid
data.idDesc = "rid";
//LOG.debug("{}gosid {}", getIndent(), data.gosid.toString().c_str());
data.subType.revisionManifest.ridDependent = deserializeExtendedGUID(); // the rid
LOG.debug("{}dependent gosid {}", getIndent(),
data.subType.revisionManifest.ridDependent);
data.subType.revisionManifest.revisionRole = deserializeLittleEndianInt();
data.subType.revisionManifest.odcsDefault = deserializeLittleEndianShort();
data.gctxid = ExtendedGUID.nil();
if (data.id == FndStructureConstants.RevisionManifestStart7FND) {
data.gctxid = deserializeExtendedGUID(); // the rid
}
document.registerAdditionalRevisionRole(data.gosid,
data.subType.revisionManifest.revisionRole, data.gctxid);
document.registerRevisionManifest(data);
} else if (data.id == FndStructureConstants.GlobalIdTableStartFNDX) {
data.subType.globalIdTableStartFNDX.reserved = deserializeLittleEndianChar();
} else if (data.id == FndStructureConstants.GlobalIdTableEntryFNDX) {
data.subType.globalIdTableEntryFNDX.index = deserializeLittleEndianInt();
data.subType.globalIdTableEntryFNDX.guid = deserializeGUID();
document.revisionMap.get(document.currentRevision).globalId.put(
data.subType.globalIdTableEntryFNDX.index,
data.subType.globalIdTableEntryFNDX.guid);
} else if (data.id == FndStructureConstants.GlobalIdTableEntry2FNDX) {
data.subType.globalIdTableEntry2FNDX.indexMapFrom = deserializeLittleEndianInt();
data.subType.globalIdTableEntry2FNDX.indexMapTo = deserializeLittleEndianInt();
ExtendedGUID dependentRevision =
document.revisionMap.get(document.currentRevision).dependent;
// Get the compactId from the revisionMap's globalId map.
GUID compactId = document.revisionMap.get(dependentRevision).globalId.get(
data.subType.globalIdTableEntry2FNDX.indexMapFrom);
if (compactId == null) {
throw new TikaException("COMPACT_ID_MISSING");
}
document.revisionMap.get(document.currentRevision).globalId.put(
data.subType.globalIdTableEntry2FNDX.indexMapTo, compactId);
} else if (data.id == FndStructureConstants.GlobalIdTableEntry3FNDX) {
data.subType.globalIdTableEntry3FNDX.indexCopyFromStart = deserializeLittleEndianInt();
data.subType.globalIdTableEntry3FNDX.entriesToCopy = deserializeLittleEndianInt();
data.subType.globalIdTableEntry3FNDX.indexCopyToStart = deserializeLittleEndianInt();
ExtendedGUID dependent_revision =
document.revisionMap.get(document.currentRevision).dependent;
for (int i = 0; i < data.subType.globalIdTableEntry3FNDX.entriesToCopy; ++i) {
Map<Long, GUID> globalIdMap = document.revisionMap.get(dependent_revision).globalId;
GUID compactId = globalIdMap.get(
data.subType.globalIdTableEntry3FNDX.indexCopyFromStart + i);
if (compactId == null) {
throw new TikaException("COMPACT_ID_MISSING");
}
document.revisionMap.get(document.currentRevision).globalId.put(
data.subType.globalIdTableEntry3FNDX.indexCopyToStart + i, compactId);
}
} else if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX ||
data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCount2FNDX) {
data.subType.objectRevisionWithRefCountFNDX.oid = deserializeCompactID(); // the oid
if (data.id == FndStructureConstants.CanRevise.ObjectRevisionWithRefCountFNDX) {
int ref = deserializeLittleEndianChar();
data.subType.objectRevisionWithRefCountFNDX.hasOidReferences = ref & 1;
data.subType.objectRevisionWithRefCountFNDX.hasOsidReferences = ref & 2;
data.subType.objectRevisionWithRefCountFNDX.cRef = (ref >> 2);
} else {
long ref = deserializeLittleEndianInt();
data.subType.objectRevisionWithRefCountFNDX.hasOidReferences = ref & 1;
data.subType.objectRevisionWithRefCountFNDX.hasOsidReferences = ref & 2;
if ((ref >> 2) != 0) {
throw new TikaException("Reserved non-zero");
}
data.subType.objectRevisionWithRefCountFNDX.cRef = deserializeLittleEndianInt();
}
} else if (data.id == FndStructureConstants.RootObjectReference2FNDX) {
data.subType.rootObjectReference.oidRoot = deserializeCompactID();
data.idDesc = "oidRoot";
data.gosid = data.subType.rootObjectReference.oidRoot.guid;
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole =
deserializeLittleEndianInt();
LOG.debug("{}Root role {}", getIndent(),
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
} else if (data.id == FndStructureConstants.RootObjectReference3FND) {
data.idDesc = "oidRoot";
data.gosid = deserializeExtendedGUID();
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole =
deserializeLittleEndianInt();
LOG.debug("{}Root role {}", getIndent(),
data.subType.rootObjectReference.rootObjectReferenceBase.rootRole);
} else if (data.id == FndStructureConstants.RevisionRoleDeclarationFND ||
data.id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
data.gosid = deserializeExtendedGUID();
data.subType.revisionRoleDeclaration.revisionRole = deserializeLittleEndianInt();
if (data.id == FndStructureConstants.RevisionRoleAndContextDeclarationFND) {
data.gctxid = deserializeExtendedGUID();
}
document.registerAdditionalRevisionRole(data.gosid,
data.subType.revisionRoleDeclaration.revisionRole, data.gctxid);
// FIXME: deal with ObjectDataEncryptionKey
} else if (data.id == FndStructureConstants.ObjectInfoDependencyOverridesFND) {
OneNotePtr content = new OneNotePtr(this);
if (!data.ref.equals(FileChunkReference.nil())) {
content.reposition(data.ref); // otherwise it's positioned right at this node
}
data.subType.objectInfoDependencyOverrides.data =
content.deserializeObjectInfoDependencyOverrideData();
} else if (data.id == FndStructureConstants.FileDataStoreListReferenceFND) {
// already processed this
} else if (data.id == FndStructureConstants.FileDataStoreObjectReferenceFND) {
FileChunkReference ref = deserializeFileChunkReference64();
GUID guid = deserializeGUID();
ExtendedGUID extendedGuid = new ExtendedGUID(guid, 0);
LOG.trace("found extended guid {}", extendedGuid);
document.guidToRef.put(extendedGuid, ref);
OneNotePtr fileDataStorePtr = new OneNotePtr(this);
fileDataStorePtr.reposition(data.ref);
data.subType.fileDataStoreObjectReference.ref =
fileDataStorePtr.deserializeFileDataStoreObject();
} else if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX ||
data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND ||
data.id == FndStructureConstants.CanRevise.ObjectDeclaration2LargeRefCountFND ||
data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND ||
data.id ==
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = false;
if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCount2FNDX) {
data.subType.objectDeclarationWithRefCount.body =
deserializeObjectDeclarationWithRefCountBody();
} else { // one of the other 4 that use the ObjectDeclaration2Body
data.subType.objectDeclarationWithRefCount.body =
deserializeObjectDeclaration2Body();
}
if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationWithRefCountFNDX ||
data.id == FndStructureConstants.CanRevise.ObjectDeclaration2RefCountFND ||
data.id ==
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND) {
data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianChar();
} else {
data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianInt();
}
if (data.id == FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2RefCountFND ||
data.id ==
FndStructureConstants.CanRevise.ReadOnlyObjectDeclaration2LargeRefCountFND) {
ByteBuffer md5Buffer = ByteBuffer.allocate(16);
deserializeBytes(md5Buffer);
data.subType.objectDeclarationWithRefCount.readOnly.md5 = md5Buffer.array();
}
data.idDesc = "oid";
postprocessObjectDeclarationContents(data, curPath);
LOG.debug("{}Ref Count JCID {}", getIndent(),
data.subType.objectDeclarationWithRefCount.body.jcid);
} else if (
data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND ||
data.id ==
FndStructureConstants.CanRevise.ObjectDeclarationFileData3LargeRefCountFND) {
data.subType.objectDeclarationWithRefCount.body.oid = deserializeCompactID();
long jcid = deserializeLittleEndianInt();
data.subType.objectDeclarationWithRefCount.body.jcid.loadFrom32BitIndex(jcid);
if (data.id == FndStructureConstants.CanRevise.ObjectDeclarationFileData3RefCountFND) {
data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianChar();
} else {
data.subType.objectDeclarationWithRefCount.cRef = deserializeLittleEndianInt();
}
long cch = deserializeLittleEndianInt();
long roomLeftLong = roomLeft();
if (cch > roomLeftLong) { // not a valid guid
throw new TikaException(
"Data out of bounds - cch " + cch + " is > room left = " + roomLeftLong);
}
if (cch > dif.size()) {
throw new TikaMemoryLimitException(
"CCH=" + cch + " was found that was greater" + " than file size " +
dif.size());
}
ByteBuffer dataSpaceBuffer = ByteBuffer.allocate((int) cch * 2);
dif.read(dataSpaceBuffer);
byte[] dataSpaceBufferBytes = dataSpaceBuffer.array();
offset += dataSpaceBufferBytes.length;
if (dataSpaceBufferBytes.length == (IFNDF_GUID_LENGTH * 2 + IFNDF.length) &&
Arrays.equals(IFNDF,
Arrays.copyOfRange(dataSpaceBufferBytes, 0, IFNDF.length))) {
data.subType.objectDeclarationWithRefCount.body.file_data_store_reference = true;
GUID guid = GUID.fromCurlyBraceUTF16Bytes(
Arrays.copyOfRange(dataSpaceBufferBytes, IFNDF.length,
dataSpaceBufferBytes.length));
ExtendedGUID extendedGUID = new ExtendedGUID(guid, 0);
FileChunkReference fileChunk = document.getAssocGuidToRef(extendedGUID);
if (fileChunk == null) {
LOG.debug("{} have not seen GUID {} yet", getIndent(), extendedGUID);
} else {
// TODO - call postprocessObjectDeclarationContents on this object?
}
} else {
LOG.debug("{}Ignoring an external reference {}", getIndent(),
new String(dataSpaceBufferBytes, StandardCharsets.UTF_16LE));
}
} else if (data.id == FndStructureConstants.ObjectGroupListReferenceFND) {
data.idDesc = "object_group_id";
data.gosid = deserializeExtendedGUID(); // the object group id
// the ref populates the FileNodeList children
} else if (data.id == FndStructureConstants.ObjectGroupStartFND) {
data.idDesc = "object_group_id";
data.gosid = deserializeExtendedGUID(); // the oid
} else if (data.id == FndStructureConstants.ObjectGroupEndFND) {
// nothing to see here
} else if (data.id == FndStructureConstants.DataSignatureGroupDefinitionFND) {
data.idDesc = "data_sig";
data.gosid = deserializeExtendedGUID(); // the DataSignatureGroup
} else if (data.id == FndStructureConstants.RevisionManifestListReferenceFND) {
document.revisionMap.putIfAbsent(document.currentRevision, new Revision());
Revision currentRevision = document.revisionMap.get(document.currentRevision);
currentRevision.manifestList.add(curPath);
} else {
LOG.debug(
"No fnd needed to be parsed for data.id=0x" + Long.toHexString(data.id) + " (" +
FndStructureConstants.nameOf(data.id) + ")");
}
if (data.baseType == 2) {
// Generic baseType == 2 parser - means we have children to parse.
OneNotePtr subList = new OneNotePtr(this);
// position the subList pointer to the data.ref and deserialize recursively.
subList.reposition(data.ref);
subList.deserializeFileNodeList(data.childFileNodeList, curPath);
}
offset = backup.offset + data.size;
end = backup.end;
if (reserved != 1) {
throw new TikaException("RESERVED_NONZERO");
}
if (data.baseType == 1 && !(data.ref.equals(FileChunkReference.nil()))) {
document.setAssocGuidToRef(data.gosid, data.ref);
OneNotePtr content = new OneNotePtr(this);
content.reposition(data.ref);
if (data.hasGctxid()) {
LOG.debug("{}gctxid {}", getIndent(), data.gctxid);
}
} else if (!data.gosid.equals(ExtendedGUID.nil())) {
LOG.trace("Non base type == 1 guid {}", data.gosid);
}
--indentLevel;
if (data.gosid.equals(ExtendedGUID.nil())) {
LOG.debug("{}End Node {} ({}) - Offset={}, End={}", getIndent(),
FndStructureConstants.nameOf(data.id), (int) data.id, offset, end);
} else {
LOG.debug("{}End Node {} ({}) {}:[{}] - Offset={}, End={}", getIndent(),
FndStructureConstants.nameOf(data.id), (int) data.id, data.idDesc, data.gosid,
offset, end);
}
return data;
}