in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java [411:546]
private Map<String, Object> processPropertyValue(PropertyValue propertyValue,
OneNotePropertyId parentPropertyId)
throws IOException, TikaException, SAXException {
Map<String, Object> propMap = new HashMap<>();
propMap.put("oneNoteType", "PropertyValue");
propMap.put("propertyId", propertyValue.propertyId.toString());
if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.LastModifiedTimeStamp) {
long fullval = propertyValue.scalar;
Instant instant = Instant.ofEpochSecond(fullval / 10000000 + DATETIME_EPOCH_DIFF_1601);
if (instant.isAfter(lastModifiedTimestamp)) {
lastModifiedTimestamp = instant;
}
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.CreationTimeStamp) {
// add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
// 1970
long creationTs = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
if (creationTs < creationTimestamp) {
creationTimestamp = creationTs;
}
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.LastModifiedTime) {
// add the TIME32_EPOCH_DIFF_1980 because OneNote TIME32 epoch time is per 1980, not
// 1970
long lastMod = propertyValue.scalar + TIME32_EPOCH_DIFF_1980;
if (lastMod > lastModified) {
lastModified = lastMod;
}
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.Author) {
String author = getAuthor(propertyValue);
if (mostRecentAuthorProp) {
propMap.put("MostRecentAuthor", author);
mostRecentAuthors.add(author);
} else if (originalAuthorProp) {
propMap.put("OriginalAuthor", author);
originalAuthors.add(author);
} else {
propMap.put("Author", author);
authors.add(author);
}
mostRecentAuthorProp = false;
originalAuthorProp = false;
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.AuthorMostRecent) {
mostRecentAuthorProp = true;
} else if (propertyValue.propertyId.propertyEnum == OneNotePropertyEnum.AuthorOriginal) {
originalAuthorProp = true;
} else if (propertyValue.propertyId.type > 0 && propertyValue.propertyId.type <= 6) {
propMap.put("scalar", propertyValue.scalar);
} else {
OneNotePtr content = new OneNotePtr(oneNoteDocument, dif);
content.reposition(propertyValue.rawData);
boolean isBinary = propertyIsBinary(propertyValue.propertyId.propertyEnum);
propMap.put("isBinary", isBinary);
if ((content.size() & 1) == 0 && propertyValue.propertyId.propertyEnum !=
OneNotePropertyEnum.TextExtendedAscii && !isBinary) {
if (content.size() > dif.size()) {
throw new TikaMemoryLimitException(
"File data store cb " + content.size() + " exceeds document size: " +
dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
propMap.put("dataUnicode16LE", new String(buf.array(), StandardCharsets.UTF_16LE));
if (options.getUtf16PropertiesToPrint().contains(propertyValue.propertyId.propertyEnum)) {
xhtml.startElement(P);
xhtml.characters((String) propMap.get("dataUnicode16LE"));
xhtml.endElement(P);
}
} else if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.TextExtendedAscii) {
if (content.size() > dif.size()) {
throw new TikaMemoryLimitException(
"File data store cb " + content.size() + " exceeds document size: " +
dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
propMap.put("dataAscii", new String(buf.array(), StandardCharsets.US_ASCII));
xhtml.startElement(P);
xhtml.characters((String) propMap.get("dataAscii"));
xhtml.endElement(P);
} else if (!isBinary) {
if (content.size() > dif.size()) {
throw new TikaMemoryLimitException(
"File data store cb " + content.size() + " exceeds document size: " +
dif.size());
}
ByteBuffer buf = ByteBuffer.allocate(content.size());
dif.read(buf);
propMap.put("dataUnicode16LE", new String(buf.array(), StandardCharsets.UTF_16LE));
if (options.getUtf16PropertiesToPrint().contains(propertyValue.propertyId.propertyEnum)) {
xhtml.startElement(P);
xhtml.characters((String) propMap.get("dataUnicode16LE"));
xhtml.endElement(P);
}
} else {
if (content.size() > dif.size()) {
throw new TikaMemoryLimitException(
"File data store cb " + content.size() + " exceeds document size: " +
dif.size());
}
if (propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.RichEditTextUnicode
|| propertyValue.propertyId.propertyEnum ==
OneNotePropertyEnum.CachedTitleString) {
if (!options.isOnlyLatestRevision()
|| (parentPropertyId != null &&
parentPropertyId.propertyEnum != OneNotePropertyEnum.ElementChildNodesOfVersionHistory)) {
// only handle text for the latest revision, unless the options
// have the onlyLatestRevision = false
handleRichEditTextUnicode(content.size());
}
} else {
//TODO -- these seem to be somewhat broken font files and other
//odds and ends...what are they and how should we process them?
//handleEmbedded(content.size());
}
}
}
if (propertyValue.compactIDs != null) {
List<Map<String, Object>> children = new ArrayList<>();
for (CompactID compactID : propertyValue.compactIDs) {
FileNodePtr childFileNodePointer = oneNoteDocument.guidToObject.get(compactID.guid);
children.add(walkFileNodePtr(childFileNodePointer, propertyValue.propertyId));
}
if (!children.isEmpty()) {
propMap.put("children", children);
}
}
if (propertyValue.propertySet != null && propertyValue.propertySet.rgPridsData != null) {
List<Map<String, Object>> propSet = processPropertySet(propertyValue.propertySet, parentPropertyId);
if (!propSet.isEmpty()) {
propMap.put("propertySet", propSet);
}
}
return propMap;
}