in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentBodyHandler.java [346:443]
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
throws SAXException {
if (DRAW_NS.equals(namespaceURI) && "image".equals(localName)) {
String link = attrs.getValue(XLINK_NS, "href");
AttributesImpl attr = new AttributesImpl();
if (!StringUtils.isEmpty(link)) {
attr.addAttribute("", "src", "src", "CDATA", "embedded:" + link);
}
handler.startElement(XHTMLContentHandler.XHTML, "img", "img", attr);
handler.endElement(XHTMLContentHandler.XHTML, "img", "img");
}
if (BINARY_DATA.equals(localName)) {
inBinaryData = true;
return;
}
// keep track of current node type. If it is a text node,
// a bit at the current depth its set in textNodeStack.
// characters() checks the top bit to determine, if the
// actual node is a text node to print out nodeDepth contains
// the depth of the current node and also marks top of stack.
assert nodeDepth >= 0;
// Set styles
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
String family = attrs.getValue(STYLE_NS, "family");
if ("text".equals(family)) {
currTextStyle = new TextStyle();
currTextStyleName = attrs.getValue(STYLE_NS, "name");
} else if ("paragraph".equals(family)) {
currTextStyle = new TextStyle();
currParagraphStyleName = attrs.getValue(STYLE_NS, "name");
}
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
listStyle = new ListStyle();
String name = attrs.getValue(STYLE_NS, "name");
listStyleMap.put(name, listStyle);
} else if (currTextStyle != null && STYLE_NS.equals(namespaceURI) &&
"text-properties".equals(localName)) {
String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
currTextStyle.italic = true;
}
String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
if ("bold".equals(fontWeight) || "bolder".equals(fontWeight) ||
(fontWeight != null && Character.isDigit(fontWeight.charAt(0)) &&
Integer.parseInt(fontWeight) > 500)) {
currTextStyle.bold = true;
}
String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
if (underlineStyle != null && !underlineStyle.equals("none")) {
currTextStyle.underlined = true;
}
} else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
if ("list-level-style-bullet".equals(localName)) {
listStyle.ordered = false;
} else if ("list-level-style-number".equals(localName)) {
listStyle.ordered = true;
}
}
textNodeStack.set(nodeDepth++, isTextNode(namespaceURI, localName));
// filter *all* content of some tags
assert completelyFiltered >= 0;
if (needsCompleteFiltering(namespaceURI, localName)) {
completelyFiltered++;
}
// call next handler if no filtering
if (completelyFiltered == 0) {
// special handling of text:h, that are directly passed
// to incoming handler
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
startList(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
startSpan(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
startParagraph(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
handler.characters(SPACE, 0, 1);
} else if ("annotation".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "p", "p", ANNOTATION_ATTRIBUTES);
} else if ("note".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "p", "p", NOTE_ATTRIBUTES);
} else if ("notes".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "p", "p", NOTES_ATTRIBUTES);
} else {
super.startElement(namespaceURI, localName, qName, attrs);
}
}
}