in opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java [481:531]
public Sentence read() throws IOException {
final StringBuilder sentence = new StringBuilder();
boolean sentenceStarted = false;
while (true) {
String line = samples.read();
if (line != null) {
if (sentenceStarted) {
if (SENT_END.matcher(line).matches() || EXT_END.matcher(line).matches()) {
sentenceStarted = false;
} else if (!line.startsWith("A1")) {
sentence.append(line).append('\n');
}
} else {
if (SENT_START.matcher(line).matches()) {
sentenceStarted = true;
} else if (PARA_START.matcher(line).matches()) {
paraID++;
} else if (TITLE_START.matcher(line).matches()) {
isTitle = true;
} else if (TITLE_END.matcher(line).matches()) {
isTitle = false;
} else if (TEXT_START.matcher(line).matches()) {
paraID = 0;
} else if (BOX_START.matcher(line).matches()) {
isBox = true;
} else if (BOX_END.matcher(line).matches()) {
isBox = false;
}
}
if (!sentenceStarted && sentence.length() > 0) {
return parser.parse(sentence.toString(), paraID, isTitle, isBox);
}
} else {
// handle end of file
if (sentenceStarted) {
if (sentence.length() > 0) {
return parser.parse(sentence.toString(), paraID, isTitle, isBox);
}
} else {
return null;
}
}
}
}