public void characters()

in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLContentHandler.java [202:281]


  public void characters(char[] ch, int start, int length) throws SAXException {
    textElementIdx++;

    if (flush) {
      flushBlock();
      flush = false;
    }

    if (inIgnorableElement != 0) {
      return;
    }

    char c;
    boolean startWhitespace = false;
    boolean endWhitespace = false;
    if (length == 0) {
      return;
    }

    final int end = start + length;
    for (int i = start; i < end; i++) {
      if (Character.isWhitespace(ch[i])) {
        ch[i] = ' ';
      }
    }
    while (start < end) {
      c = ch[start];
      if (c == ' ') {
        startWhitespace = true;
        start++;
        length--;
      } else {
        break;
      }
    }
    while (length > 0) {
      c = ch[start + length - 1];
      if (c == ' ') {
        endWhitespace = true;
        length--;
      } else {
        break;
      }
    }
    if (length == 0) {
      if (startWhitespace || endWhitespace) {
        if (!sbLastWasWhitespace) {
          textBuffer.append(' ');
          tokenBuffer.append(' ');
        }
        sbLastWasWhitespace = true;
      } else {
        sbLastWasWhitespace = false;
      }
      lastEvent = Event.WHITESPACE;
      return;
    }
    if (startWhitespace) {
      if (!sbLastWasWhitespace) {
        textBuffer.append(' ');
        tokenBuffer.append(' ');
      }
    }

    if (blockTagLevel == -1) {
      blockTagLevel = tagLevel;
    }

    textBuffer.append(ch, start, length);
    tokenBuffer.append(ch, start, length);
    if (endWhitespace) {
      textBuffer.append(' ');
      tokenBuffer.append(' ');
    }

    sbLastWasWhitespace = endWhitespace;
    lastEvent = Event.CHARACTERS;

    currentContainedTextElements.set(textElementIdx);
  }