in opennlp-tools/src/main/java/opennlp/tools/formats/nkjp/NKJPSegmentationDocument.java [92:159]
public static NKJPSegmentationDocument parse(InputStream is) throws IOException {
Map<String, Map<String, Pointer>> sentences = new LinkedHashMap<>();
try {
DocumentBuilder docBuilder = XmlUtil.createDocumentBuilder();
Document doc = docBuilder.parse(is);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
final XPathExpression SENT_NODES = xpath.compile("/teiCorpus/TEI/text/body/p/s");
final XPathExpression SEG_NODES = xpath.compile("./seg|./choice");
final XPathExpression SEG_NODES_ONLY = xpath.compile("./seg");
NodeList nl = (NodeList) SENT_NODES.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nl.getLength(); i++) {
Node sentnode = nl.item(i);
String sentid = null;
if (sentnode.getAttributes().getNamedItem(XML_ID) != null) {
sentid = sentnode.getAttributes().getNamedItem(XML_ID).getTextContent();
}
Map<String, Pointer> segments = new LinkedHashMap<>();
NodeList segnl = (NodeList) SEG_NODES.evaluate(sentnode, XPathConstants.NODESET);
for (int j = 0; j < segnl.getLength(); j++) {
Node n = segnl.item(j);
if (n.getNodeName().equals(SEG)) {
String segid = xmlID(n);
Pointer pointer = fromSeg(n);
segments.put(segid, pointer);
} else if (n.getNodeName().equals(CHOICE)) {
NodeList choices = n.getChildNodes();
for (int k = 0; k < choices.getLength(); k++) {
Node choice = choices.item(k);
if (choice.getNodeName().equals(NKJP_PAREN)) {
if (!checkRejectedParen(choice)) {
NodeList paren_segs = (NodeList) SEG_NODES_ONLY.evaluate(choice,
XPathConstants.NODESET);
for (int l = 0; l < paren_segs.getLength(); l++) {
String segid = xmlID(paren_segs.item(l));
Pointer pointer = fromSeg(paren_segs.item(l));
segments.put(segid, pointer);
}
}
} else if (choice.getNodeName().equals(SEG)) {
if (!checkRejected(choice)) {
String segid = xmlID(choice);
Pointer pointer = fromSeg(choice);
segments.put(segid, pointer);
}
}
}
}
}
sentences.put(sentid, segments);
}
} catch (SAXException | XPathExpressionException | IOException e) {
throw new IOException("Failed to parse NKJP document", e);
}
return new NKJPSegmentationDocument(sentences);
}