in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSPageContentHandler.java [146:242]
public void startElement(String uri, String localName, String qName, Attributes atts)
throws SAXException {
if (CANVAS.equals(localName)) {
String clip = getVal(CLIP, atts);
if (clip == null) {
canvasStack.push(NULL_CLIP);
} else {
canvasStack.push(clip);
}
return;
} else if (VISUAL_BRUSH.equals(localName)) {
// Also push visual brush transform onto stack as this will move children
String transform = getVal(TRANSFORM, atts);
if (transform == null) {
canvasStack.push(NULL_CLIP);
} else {
canvasStack.push(transform);
}
return;
} else if (PATH.equals(localName)) {
//for now just grab them and dump them at the end of the page.
String url = getVal(NAVIGATE_URI, atts);
if (url != null) {
urls.add(url);
}
originalLocationOnDrive = getVal(AUTOMATION_PROPERITES_HELP_TEXT, atts);
} else if (IMAGE_BRUSH.equals(localName)) {
imageSourcePathInZip = getVal(IMAGE_SOURCE, atts);
}
if (!GLYPHS.equals(localName)) {
return;
}
String name = null;
Float originX = null;
Float originY = null;
String unicodeString = null;
Integer bidilevel = null;
List<GlyphIndex> indices = null;
float fontSize = 0;
String fontUri = null;
for (int i = 0; i < atts.getLength(); i++) {
String lName = atts.getLocalName(i);
String value = atts.getValue(i);
value = (value == null) ? "" : value.trim();
if (ORIGIN_X.equals(lName) && value.length() > 0) {
try {
originX = Float.parseFloat(value);
} catch (NumberFormatException e) {
throw new SAXException(e);
}
} else if (ORIGIN_Y.equals(lName) && value.length() > 0) {
try {
originY = Float.parseFloat(value);
} catch (NumberFormatException e) {
throw new SAXException(e);
}
} else if (UNICODE_STRING.equals(lName)) {
unicodeString = atts.getValue(i);
} else if (BIDI_LEVEL.equals(lName) && value.length() > 0) {
try {
bidilevel = Integer.parseInt(value);
} catch (NumberFormatException e) {
throw new SAXException(e);
}
} else if (INDICES.equals(lName)) {
indices = parseIndicesString(value);
} else if (NAME.equals(lName)) {
name = value;
} else if (FONT_RENDERING_EM_SIZE.equals(lName)) {
fontSize = Float.parseFloat(value);
} else if (FONT_URI.equals(lName)) {
fontUri = value;
}
}
if (unicodeString != null) {
originX = (originX == null) ? Integer.MIN_VALUE : originX;
originY = (originY == null) ? Integer.MAX_VALUE : originY;
StringBuilder canvasStringBuilder = new StringBuilder();
for (String s : canvasStack) {
canvasStringBuilder.append(s);
canvasStringBuilder.append(';');
}
String canvasCombined = canvasStringBuilder.toString();
List<GlyphRun> runs = canvases.get(canvasCombined);
if (runs == null) {
runs = new ArrayList<>();
}
if (indices == null) {
indices = new ArrayList<>();
}
runs.add(new GlyphRun(name, originY, originX, unicodeString, bidilevel, indices, fontSize, fontUri));
canvases.put(canvasCombined, runs);
}
}