in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLContentHandler.java [202:281]
public void characters(char[] ch, int start, int length) throws SAXException {
textElementIdx++;
if (flush) {
flushBlock();
flush = false;
}
if (inIgnorableElement != 0) {
return;
}
char c;
boolean startWhitespace = false;
boolean endWhitespace = false;
if (length == 0) {
return;
}
final int end = start + length;
for (int i = start; i < end; i++) {
if (Character.isWhitespace(ch[i])) {
ch[i] = ' ';
}
}
while (start < end) {
c = ch[start];
if (c == ' ') {
startWhitespace = true;
start++;
length--;
} else {
break;
}
}
while (length > 0) {
c = ch[start + length - 1];
if (c == ' ') {
endWhitespace = true;
length--;
} else {
break;
}
}
if (length == 0) {
if (startWhitespace || endWhitespace) {
if (!sbLastWasWhitespace) {
textBuffer.append(' ');
tokenBuffer.append(' ');
}
sbLastWasWhitespace = true;
} else {
sbLastWasWhitespace = false;
}
lastEvent = Event.WHITESPACE;
return;
}
if (startWhitespace) {
if (!sbLastWasWhitespace) {
textBuffer.append(' ');
tokenBuffer.append(' ');
}
}
if (blockTagLevel == -1) {
blockTagLevel = tagLevel;
}
textBuffer.append(ch, start, length);
tokenBuffer.append(ch, start, length);
if (endWhitespace) {
textBuffer.append(' ');
tokenBuffer.append(' ');
}
sbLastWasWhitespace = endWhitespace;
lastEvent = Event.CHARACTERS;
currentContainedTextElements.set(textElementIdx);
}