in core/src/main/java/org/apache/stormcrawler/parse/filter/XPathFilter.java [98:152]
List<String> evaluate(DocumentFragment doc) throws XPathExpressionException, IOException {
Object evalResult = expression.evaluate(doc, evalFunction.getReturnType());
List<String> values = new LinkedList<>();
switch (evalFunction) {
case STRING:
if (evalResult != null) {
String strippedValue = StringUtils.strip((String) evalResult);
values.add(strippedValue);
}
break;
case SERIALIZE:
NodeList nodesToSerialize = (NodeList) evalResult;
StringWriter out = new StringWriter();
OutputFormat format = new OutputFormat(Method.XHTML, null, false);
format.setOmitXMLDeclaration(true);
XMLSerializer serializer = new XMLSerializer(out, format);
for (int i = 0; i < nodesToSerialize.getLength(); i++) {
Node node = nodesToSerialize.item(i);
switch (node.getNodeType()) {
case Node.ELEMENT_NODE:
serializer.serialize((Element) node);
break;
case Node.DOCUMENT_NODE:
serializer.serialize((Document) node);
break;
case Node.DOCUMENT_FRAGMENT_NODE:
serializer.serialize((DocumentFragment) node);
break;
case Node.TEXT_NODE:
String text = node.getTextContent();
if (text.length() > 0) {
values.add(text);
}
// By pass the rest of the code since it is used to
// extract
// the value out of the serialized which isn't used in
// this case
continue;
}
String serializedValue = out.toString();
if (serializedValue.length() > 0) {
values.add(serializedValue);
}
out.getBuffer().setLength(0);
}
break;
default:
NodeList nodes = (NodeList) evalResult;
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
values.add(StringUtils.strip(node.getTextContent()));
}
}
return values;
}