in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/TextExtractor.java [1441:1551]
private void processGroupEnd() throws IOException, SAXException, TikaException {
if (inHeader) {
if (nextMetaData != null) {
if (nextMetaData == TikaCoreProperties.CREATED) {
Calendar cal = Calendar.getInstance(TimeZone.getDefault(), Locale.ROOT);
cal.set(year, month - 1, day, hour, minute, 0);
metadata.set(nextMetaData, cal.getTime());
} else if (nextMetaData.isMultiValuePermitted()) {
metadata.add(nextMetaData, pendingBuffer.toString());
} else {
metadata.set(nextMetaData, pendingBuffer.toString());
}
nextMetaData = null;
}
pendingBuffer.setLength(0);
}
assert groupState.depth > 0;
ansiSkip = 0;
if (groupState.objdata == true) {
try {
embObjHandler.handleCompletedObject();
} catch (TikaException | IOException e) {
EmbeddedDocumentUtil.recordException(e, metadata);
}
groupState.objdata = false;
} else if (groupState.pictDepth > 0) {
if (groupState.sn == true) {
embObjHandler.endSN();
} else if (groupState.sv == true) {
embObjHandler.endSV();
} else if (groupState.sp == true) {
embObjHandler.endSP();
} else if (groupState.pictDepth == 1) {
embObjHandler.handleCompletedObject();
}
}
if (groupState.annotation == true) {
addOutputChar(SPACE);
}
if (groupState.object == true) {
embObjHandler.setInObject(false);
}
// Be robust if RTF doc is corrupt (has too many
// closing }s):
// TODO: log a warning?
if (groupStates.size() > 0) {
// Restore group state:
final GroupState outerGroupState = groupStates.removeLast();
//only modify styles if we're not in a hyperlink
if (fieldState == 0) {
// Close italic, if outer does not have italic or
// bold changed:
if (groupState.italic) {
if (!outerGroupState.italic || groupState.bold != outerGroupState.bold) {
end("i");
groupState.italic = false;
}
}
// Close bold
if (groupState.bold && !outerGroupState.bold) {
end("b");
}
// Open bold
if (!groupState.bold && outerGroupState.bold) {
start("b");
}
// Open italic
if (!groupState.italic && outerGroupState.italic) {
start("i");
}
}
groupState = outerGroupState;
}
assert groupStates.size() == groupState.depth;
if (fieldState == 1) {
String s = pendingBuffer.toString().trim();
pendingBuffer.setLength(0);
if (s.startsWith("HYPERLINK")) {
s = s.substring(9).trim();
// TODO: what other instructions can be in a
// HYPERLINK destination?
final boolean isLocalLink = s.contains("\\l ");
int idx = s.indexOf('"');
if (idx != -1) {
int idx2 = s.indexOf('"', 1 + idx);
if (idx2 != -1) {
s = s.substring(1 + idx, idx2);
}
}
pendingURL = (isLocalLink ? "#" : "") + s;
fieldState = 2;
} else {
fieldState = 0;
}
// TODO: we could process the other known field
// types. Right now, we will extract their text
// inlined, but fail to record them in metadata
// as a field value.
} else if (fieldState == 3) {
end("a");
fieldState = 0;
}
}