public boolean process()

in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/TerminatingBlocksFinder.java [45:79]


  public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
    boolean changes = false;

    // long t = System.currentTimeMillis();

    for (TextBlock tb : doc.getTextBlocks()) {
      final int numWords = tb.getNumWords();
      if (numWords < 15) {
        final String text = tb.getText().trim();
        final int len = text.length();
        if (len >= 8) {
          final String textLC = text.toLowerCase();
          if (textLC.startsWith("comments")
              || startsWithNumber(textLC, len, " comments", " users responded in")
              || textLC.startsWith("© reuters") || textLC.startsWith("please rate this")
              || textLC.startsWith("post a comment") || textLC.contains("what you think...")
              || textLC.contains("add your comment") || textLC.contains("add comment")
              || textLC.contains("reader views") || textLC.contains("have your say")
              || textLC.contains("reader comments") || textLC.contains("rätta artikeln")
              || textLC.equals("thanks for your comments - this feedback is now closed")) {
            tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT);
            changes = true;
          }
        } else if (tb.getLinkDensity() == 1.0) {
          if (text.equals("Comment")) {
            tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT);
          }
        }
      }
    }

    // timeSpent += System.currentTimeMillis() - t;

    return changes;
  }