in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/BlockProximityFusion.java [62:120]
public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
List<TextBlock> textBlocks = doc.getTextBlocks();
if (textBlocks.size() < 2) {
return false;
}
boolean changes = false;
TextBlock prevBlock;
int offset;
if (contentOnly) {
prevBlock = null;
offset = 0;
for (TextBlock tb : textBlocks) {
offset++;
if (tb.isContent()) {
prevBlock = tb;
break;
}
}
if (prevBlock == null) {
return false;
}
} else {
prevBlock = textBlocks.get(0);
offset = 1;
}
for (Iterator<TextBlock> it = textBlocks.listIterator(offset); it.hasNext(); ) {
TextBlock block = it.next();
if (!block.isContent()) {
prevBlock = block;
continue;
}
int diffBlocks = block.getOffsetBlocksStart() - prevBlock.getOffsetBlocksEnd() - 1;
if (diffBlocks <= maxBlocksDistance) {
boolean ok = true;
if (contentOnly) {
if (!prevBlock.isContent() || !block.isContent()) {
ok = false;
}
}
if (ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) {
ok = false;
}
if (ok) {
prevBlock.mergeNext(block);
it.remove();
changes = true;
} else {
prevBlock = block;
}
} else {
prevBlock = block;
}
}
return changes;
}