in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/TerminatingBlocksFinder.java [45:79]
public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
boolean changes = false;
// long t = System.currentTimeMillis();
for (TextBlock tb : doc.getTextBlocks()) {
final int numWords = tb.getNumWords();
if (numWords < 15) {
final String text = tb.getText().trim();
final int len = text.length();
if (len >= 8) {
final String textLC = text.toLowerCase();
if (textLC.startsWith("comments")
|| startsWithNumber(textLC, len, " comments", " users responded in")
|| textLC.startsWith("© reuters") || textLC.startsWith("please rate this")
|| textLC.startsWith("post a comment") || textLC.contains("what you think...")
|| textLC.contains("add your comment") || textLC.contains("add comment")
|| textLC.contains("reader views") || textLC.contains("have your say")
|| textLC.contains("reader comments") || textLC.contains("rätta artikeln")
|| textLC.equals("thanks for your comments - this feedback is now closed")) {
tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT);
changes = true;
}
} else if (tb.getLinkDensity() == 1.0) {
if (text.equals("Comment")) {
tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT);
}
}
}
}
// timeSpent += System.currentTimeMillis() - t;
return changes;
}