in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java [79:184]
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
byte[] oneStoreFileBytes = IOUtils.toByteArray(stream);
try (TemporaryResources temporaryResources = new TemporaryResources();
TikaInputStream tikaInputStream = TikaInputStream.get(oneStoreFileBytes);
OneNoteDirectFileResource oneNoteDirectFileResource = new OneNoteDirectFileResource(
tikaInputStream.getFile())) {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
temporaryResources.addResource(oneNoteDirectFileResource);
OneNoteDocument oneNoteDocument =
createOneNoteDocumentFromDirectFileResource(oneNoteDirectFileResource);
OneNoteHeader header = oneNoteDocument.header;
if (header.isMsOneStoreFormat()) {
metadata.set(ONE_NOTE_PREFIX + "buildNumberCreated",
"0x" + Long.toHexString(oneNoteDocument.header.buildNumberCreated));
metadata.set(ONE_NOTE_PREFIX + "buildNumberLastWroteToFile",
"0x" + Long.toHexString(oneNoteDocument.header.buildNumberLastWroteToFile));
metadata.set(ONE_NOTE_PREFIX + "buildNumberNewestWritten",
"0x" + Long.toHexString(oneNoteDocument.header.buildNumberNewestWritten));
metadata.set(ONE_NOTE_PREFIX + "buildNumberOldestWritten",
"0x" + Long.toHexString(oneNoteDocument.header.buildNumberOldestWritten));
metadata.set(ONE_NOTE_PREFIX + "cbExpectedFileLength",
"0x" + Long.toHexString(oneNoteDocument.header.cbExpectedFileLength));
metadata.set(ONE_NOTE_PREFIX + "cbFreeSpaceInFreeChunkList",
"0x" + Long.toHexString(oneNoteDocument.header.cbFreeSpaceInFreeChunkList));
metadata.set(ONE_NOTE_PREFIX + "cbLegacyExpectedFileLength",
"0x" + Long.toHexString(oneNoteDocument.header.cbLegacyExpectedFileLength));
metadata.set(ONE_NOTE_PREFIX + "cbLegacyFreeSpaceInFreeChunkList", "0x" +
Long.toHexString(oneNoteDocument.header.cbLegacyFreeSpaceInFreeChunkList));
metadata.set(ONE_NOTE_PREFIX + "crcName", "0x" + Long.toHexString(oneNoteDocument.header.crcName));
metadata.set(ONE_NOTE_PREFIX + "cTransactionsInLog",
"0x" + Long.toHexString(oneNoteDocument.header.cTransactionsInLog));
metadata.set(ONE_NOTE_PREFIX + "ffvLastCodeThatWroteToThisFile", "0x" +
Long.toHexString(oneNoteDocument.header.ffvLastCodeThatWroteToThisFile));
metadata.set(ONE_NOTE_PREFIX + "ffvNewestCodeThatHasWrittenToThisFile", "0x" + Long.toHexString(
oneNoteDocument.header.ffvNewestCodeThatHasWrittenToThisFile));
metadata.set(ONE_NOTE_PREFIX + "ffvOldestCodeThatMayReadThisFile", "0x" +
Long.toHexString(oneNoteDocument.header.ffvOldestCodeThatMayReadThisFile));
metadata.set(ONE_NOTE_PREFIX + "ffvOldestCodeThatHasWrittenToThisFile", "0x" + Long.toHexString(
oneNoteDocument.header.ffvOldestCodeThatHasWrittenToThisFile));
metadata.set(ONE_NOTE_PREFIX + "grfDebugLogFlags",
"0x" + Long.toHexString(oneNoteDocument.header.grfDebugLogFlags));
metadata.set(ONE_NOTE_PREFIX + "nFileVersionGeneration",
"0x" + Long.toHexString(oneNoteDocument.header.nFileVersionGeneration));
metadata.set(ONE_NOTE_PREFIX + "rgbPlaceholder",
"0x" + Long.toHexString(oneNoteDocument.header.rgbPlaceholder));
Pair<Long, ExtendedGUID> roleAndContext = Pair.of(1L, ExtendedGUID.nil());
OneNoteTreeWalker oneNoteTreeWalker =
new OneNoteTreeWalker(options, oneNoteDocument, oneNoteDirectFileResource,
xhtml, metadata, context, roleAndContext);
oneNoteTreeWalker.walkTree();
if (!oneNoteTreeWalker.getAuthors().isEmpty()) {
metadata.set(TikaCoreProperties.CREATOR,
oneNoteTreeWalker.getAuthors().toArray(new String[]{}));
}
if (!oneNoteTreeWalker.getMostRecentAuthors().isEmpty()) {
metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "mostRecentAuthors"),
oneNoteTreeWalker.getMostRecentAuthors().toArray(new String[]{}));
}
if (!oneNoteTreeWalker.getOriginalAuthors().isEmpty()) {
metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "originalAuthors"),
oneNoteTreeWalker.getOriginalAuthors().toArray(new String[]{}));
}
if (!Instant.MAX.equals(
Instant.ofEpochMilli(oneNoteTreeWalker.getCreationTimestamp()))) {
metadata.set(ONE_NOTE_PREFIX + "creationTimestamp",
String.valueOf(oneNoteTreeWalker.getCreationTimestamp()));
}
if (!Instant.MIN.equals(oneNoteTreeWalker.getLastModifiedTimestamp())) {
metadata.set(ONE_NOTE_PREFIX + "lastModifiedTimestamp", String.valueOf(
oneNoteTreeWalker.getLastModifiedTimestamp().toEpochMilli()));
}
if (oneNoteTreeWalker.getLastModified() > Long.MIN_VALUE) {
metadata.set(TikaCoreProperties.MODIFIED,
String.valueOf(oneNoteTreeWalker.getLastModified()));
}
} else if (header.isLegacyOrAlternativePackaging()) {
try {
AlternativePackaging alternatePackageOneStoreFile = new AlternativePackaging();
alternatePackageOneStoreFile.doDeserializeFromByteArray(oneStoreFileBytes, 0);
MSOneStoreParser onenoteParser = new MSOneStoreParser();
MSOneStorePackage pkg =
onenoteParser.parse(alternatePackageOneStoreFile.dataElementPackage);
pkg.walkTree(options, metadata, xhtml);
} catch (Exception e) {
OneNoteLegacyDumpStrings dumpStrings =
new OneNoteLegacyDumpStrings(oneNoteDirectFileResource, xhtml);
dumpStrings.dump();
}
} else {
throw new TikaException("Invalid OneStore document - could not parse headers");
}
xhtml.endDocument();
}
}