in opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java [223:322]
public TreeElement getElement(String line) {
// Note: all levels are higher than 1, because 0 is reserved for the root.
// try node
Matcher nodeMatcher = NODE_PATTERN.matcher(line);
if (nodeMatcher.matches()) {
int level = nodeMatcher.group(1).length() + 1;
String syntacticTag = nodeMatcher.group(2);
Node node = new Node();
node.setLevel(level);
node.setSyntacticTag(syntacticTag);
return node;
}
Matcher leafMatcher = LEAF_PATTERN.matcher(line);
if (leafMatcher.matches()) {
int level = leafMatcher.group(1).length() + 1;
String syntacticTag = leafMatcher.group(2);
String funcTag = leafMatcher.group(3);
String lemma = leafMatcher.group(4);
String secondaryTag = leafMatcher.group(5);
String morphologicalTag = leafMatcher.group(6);
String lexeme = leafMatcher.group(7);
Leaf leaf = new Leaf();
leaf.setLevel(level);
leaf.setSyntacticTag(syntacticTag);
leaf.setFunctionalTag(funcTag);
leaf.setSecondaryTag(secondaryTag);
leaf.setMorphologicalTag(morphologicalTag);
leaf.setLexeme(lexeme);
leaf.setLemma(lemma);
return leaf;
}
Matcher punctuationMatcher = PUNCTUATION_PATTERN.matcher(line);
if (punctuationMatcher.matches()) {
int level = punctuationMatcher.group(1).length() + 1;
String lexeme = punctuationMatcher.group(2);
Leaf leaf = new Leaf();
leaf.setLevel(level);
leaf.setLexeme(lexeme);
return leaf;
}
// process the bizarre cases
if (line.equals("_") || line.startsWith("<lixo") || line.startsWith("pause")) {
return null;
}
if (line.startsWith("=")) {
Matcher bizarreLeafMatcher = BIZARRE_LEAF_PATTERN.matcher(line);
if (bizarreLeafMatcher.matches()) {
int level = bizarreLeafMatcher.group(1).length() + 1;
String syntacticTag = bizarreLeafMatcher.group(2);
String lemma = bizarreLeafMatcher.group(3);
String morphologicalTag = bizarreLeafMatcher.group(4);
String lexeme = bizarreLeafMatcher.group(5);
Leaf leaf = new Leaf();
leaf.setLevel(level);
leaf.setSyntacticTag(syntacticTag);
leaf.setMorphologicalTag(morphologicalTag);
leaf.setLexeme(lexeme);
if (lemma != null) {
if (lemma.length() > 2) {
lemma = lemma.substring(1, lemma.length() - 1);
}
leaf.setLemma(lemma);
}
return leaf;
} else {
int level = line.lastIndexOf("=") + 1;
String lexeme = line.substring(level + 1);
if (lexeme.matches("\\w.*?[\\.<>].*")) {
return null;
}
Leaf leaf = new Leaf();
leaf.setLevel(level + 1);
leaf.setSyntacticTag("");
leaf.setMorphologicalTag("");
leaf.setFunctionalTag("");
leaf.setLexeme(lexeme);
return leaf;
}
}
logger.warn("Couldn't parse leaf: {}", line);
Leaf leaf = new Leaf();
leaf.setLevel(1);
leaf.setSyntacticTag("");
leaf.setMorphologicalTag("");
leaf.setFunctionalTag("");
leaf.setLexeme(line);
return leaf;
}