in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/fsshttpb/MSOneStorePackage.java [164:268]
public void walkTree(OneNoteTreeWalkerOptions options, Metadata metadata,
XHTMLContentHandler xhtml)
throws SAXException, TikaException, IOException {
for (RevisionStoreObjectGroup revisionStoreObjectGroup : OtherFileNodeList) {
for (RevisionStoreObject revisionStoreObject : revisionStoreObjectGroup.objects) {
PropertySet propertySet =
revisionStoreObject.propertySet.objectSpaceObjectPropSet.body;
for (int i = 0; i < propertySet.rgData.size(); ++i) {
IProperty property = propertySet.rgData.get(i);
PropertyID propertyID = propertySet.rgPrids[i];
PropertyType propertyType = PropertyType.fromIntVal(propertyID.type);
OneNotePropertyEnum oneNotePropertyEnum =
OneNotePropertyEnum.of(Unsigned.uint(propertyID.value).longValue());
if (oneNotePropertyEnum == OneNotePropertyEnum.LastModifiedTimeStamp) {
long fullval = getScalar(property);
Instant instant = Instant.ofEpochSecond(
fullval / 10000000 + DATETIME_EPOCH_DIFF_1601);
if (instant.isAfter(lastModifiedTimestamp)) {
lastModifiedTimestamp = instant;
}
metadata.set(ONE_NOTE_PREFIX + "lastModifiedTimestamp",
String.valueOf(lastModifiedTimestamp.toEpochMilli()));
} else if (oneNotePropertyEnum == OneNotePropertyEnum.CreationTimeStamp) {
// add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
// 1970
long scalar = getScalar(property);
long creationTs = scalar + TIME32_EPOCH_DIFF_1980;
if (creationTs < creationTimestamp) {
creationTimestamp = creationTs;
}
metadata.set(ONE_NOTE_PREFIX + "creationTimestamp", String.valueOf(creationTimestamp));
} else if (oneNotePropertyEnum == OneNotePropertyEnum.LastModifiedTime) {
// add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
// 1970
long scalar = getScalar(property);
long lastMod = scalar + TIME32_EPOCH_DIFF_1980;
if (lastMod > lastModified) {
lastModified = lastMod;
}
metadata.set(TikaCoreProperties.MODIFIED, String.valueOf(lastModified));
} else if (oneNotePropertyEnum == OneNotePropertyEnum.Author) {
String author =
new String(((PrtFourBytesOfLengthFollowedByData) property).data,
StandardCharsets.UTF_8);
if (mostRecentAuthorProp) {
mostRecentAuthors.add(author);
} else if (originalAuthorProp) {
originalAuthors.add(author);
} else {
authors.add(author);
}
} else if (oneNotePropertyEnum == OneNotePropertyEnum.AuthorMostRecent) {
mostRecentAuthorProp = true;
} else if (oneNotePropertyEnum == OneNotePropertyEnum.AuthorOriginal) {
originalAuthorProp = true;
} else if (propertyType == PropertyType.FourBytesOfLengthFollowedByData) {
boolean isBinary = propertyIsBinary(oneNotePropertyEnum);
PrtFourBytesOfLengthFollowedByData dataProperty =
(PrtFourBytesOfLengthFollowedByData) property;
if ((dataProperty.data.length & 1) == 0 &&
oneNotePropertyEnum != OneNotePropertyEnum.TextExtendedAscii &&
!isBinary) {
if (options.getUtf16PropertiesToPrint().contains(oneNotePropertyEnum)) {
xhtml.startElement(P);
xhtml.characters(
new String(dataProperty.data, StandardCharsets.UTF_16LE));
xhtml.endElement(P);
}
} else if (oneNotePropertyEnum == OneNotePropertyEnum.TextExtendedAscii) {
xhtml.startElement(P);
xhtml.characters(
new String(dataProperty.data, StandardCharsets.US_ASCII));
xhtml.endElement(P);
} else if (!isBinary) {
if (options.getUtf16PropertiesToPrint().contains(oneNotePropertyEnum)) {
xhtml.startElement(P);
xhtml.characters(
new String(dataProperty.data, StandardCharsets.UTF_16LE));
xhtml.endElement(P);
}
} else {
if (oneNotePropertyEnum == OneNotePropertyEnum.RichEditTextUnicode) {
handleRichEditTextUnicode(dataProperty.data, xhtml);
} else {
//TODO -- these seem to be somewhat broken font files and other
//odds and ends...what are they and how should we process them?
//handleEmbedded(content.size());
}
}
}
}
}
}
if (!authors.isEmpty()) {
metadata.set(TikaCoreProperties.CREATOR, authors.toArray(new String[]{}));
}
if (!mostRecentAuthors.isEmpty()) {
metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "mostRecentAuthors"),
mostRecentAuthors.toArray(new String[]{}));
}
if (!originalAuthors.isEmpty()) {
metadata.set(Property.externalTextBag(ONE_NOTE_PREFIX + "originalAuthors"),
originalAuthors.toArray(new String[]{}));
}
}