in opennlp-tools/src/main/java/opennlp/tools/formats/conllu/ConlluStream.java [62:143]
public ConlluSentence read() throws IOException {
String sentence = sentenceStream.read();
if (sentence != null) {
List<ConlluWordLine> wordLines = new ArrayList<>();
BufferedReader reader = new BufferedReader(new StringReader(sentence));
boolean newDocument = false;
boolean newParagraph = false;
String documentId = null;
String paragraphId = null;
String sentenceId = null;
String text = null;
Map<Locale, String> textLang = null;
String translit = null;
String line;
while ((line = reader.readLine()) != null) {
// # indicates a comment line and contains additional data
if (line.trim().startsWith("#")) {
String commentLine = line.trim().substring(1);
int separator = commentLine.indexOf('=');
if (separator != -1) {
String firstPart = commentLine.substring(0, separator).trim();
String secondPart = commentLine.substring(separator + 1, commentLine.length()).trim();
if (!secondPart.isEmpty()) {
switch (firstPart) {
case "newdoc id":
newDocument = true;
documentId = secondPart;
break;
case "newpar id":
newParagraph = true;
paragraphId = secondPart;
break;
case "sent_id":
sentenceId = secondPart;
break;
case "text":
text = secondPart;
break;
case "translit":
translit = secondPart;
break;
}
}
if (firstPart.startsWith("text_")) {
if (textLang == null) {
textLang = new HashMap<>();
}
addTextLang(firstPart, secondPart, textLang);
}
}
else {
switch (commentLine.trim()) {
case "newdoc":
newDocument = true;
break;
case "newpar":
newParagraph = true;
break;
}
}
}
else {
wordLines.add(new ConlluWordLine(line));
}
}
wordLines = postProcessContractions(wordLines);
return new ConlluSentence(wordLines, sentenceId, text, newDocument, documentId, newParagraph,
paragraphId, textLang, translit);
}
return null;
}