public void flushBlock()

in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLContentHandler.java [287:365]


  public void flushBlock() {
    if (inBody == 0) {
      if ("TITLE".equalsIgnoreCase(lastStartTag) && inBody == 0) {
        setTitle(tokenBuffer.toString().trim());
      }
      textBuffer.setLength(0);
      tokenBuffer.setLength(0);
      return;
    }

    final int length = tokenBuffer.length();
    switch (length) {
      case 0:
        return;
      case 1:
        if (sbLastWasWhitespace) {
          textBuffer.setLength(0);
          tokenBuffer.setLength(0);
          return;
        }
    }
    final String[] tokens = UnicodeTokenizer.tokenize(tokenBuffer);

    int numWords = 0;
    int numLinkedWords = 0;
    int numWrappedLines = 0;
    int currentLineLength = -1; // don't count the first space
    final int maxLineLength = 80;
    int numTokens = 0;
    int numWordsCurrentLine = 0;

    for (String token : tokens) {
      if (ANCHOR_TEXT_START.equals(token)) {
        inAnchorText = true;
      } else if (ANCHOR_TEXT_END.equals(token)) {
        inAnchorText = false;
      } else if (isWord(token)) {
        numTokens++;
        numWords++;
        numWordsCurrentLine++;
        if (inAnchorText) {
          numLinkedWords++;
        }
        final int tokenLength = token.length();
        currentLineLength += tokenLength + 1;
        if (currentLineLength > maxLineLength) {
          numWrappedLines++;
          currentLineLength = tokenLength;
          numWordsCurrentLine = 1;
        }
      } else {
        numTokens++;
      }
    }
    if (numTokens == 0) {
      return;
    }
    int numWordsInWrappedLines;
    if (numWrappedLines == 0) {
      numWordsInWrappedLines = numWords;
      numWrappedLines = 1;
    } else {
      numWordsInWrappedLines = numWords - numWordsCurrentLine;
    }

    TextBlock tb =
        new TextBlock(textBuffer.toString().trim(), currentContainedTextElements, numWords,
            numLinkedWords, numWordsInWrappedLines, numWrappedLines, offsetBlocks);
    currentContainedTextElements = new BitSet();

    offsetBlocks++;

    textBuffer.setLength(0);
    tokenBuffer.setLength(0);

    tb.setTagLevel(blockTagLevel);
    addTextBlock(tb);
    blockTagLevel = -1;
  }