public boolean process()

in streampipes-extensions/streampipes-pipeline-elements-experimental-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/BlockProximityFusion.java [62:120]


  public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
    List<TextBlock> textBlocks = doc.getTextBlocks();
    if (textBlocks.size() < 2) {
      return false;
    }

    boolean changes = false;
    TextBlock prevBlock;

    int offset;
    if (contentOnly) {
      prevBlock = null;
      offset = 0;
      for (TextBlock tb : textBlocks) {
        offset++;
        if (tb.isContent()) {
          prevBlock = tb;
          break;
        }
      }
      if (prevBlock == null) {
        return false;
      }
    } else {
      prevBlock = textBlocks.get(0);
      offset = 1;
    }

    for (Iterator<TextBlock> it = textBlocks.listIterator(offset); it.hasNext(); ) {
      TextBlock block = it.next();
      if (!block.isContent()) {
        prevBlock = block;
        continue;
      }
      int diffBlocks = block.getOffsetBlocksStart() - prevBlock.getOffsetBlocksEnd() - 1;
      if (diffBlocks <= maxBlocksDistance) {
        boolean ok = true;
        if (contentOnly) {
          if (!prevBlock.isContent() || !block.isContent()) {
            ok = false;
          }
        }
        if (ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) {
          ok = false;
        }
        if (ok) {
          prevBlock.mergeNext(block);
          it.remove();
          changes = true;
        } else {
          prevBlock = block;
        }
      } else {
        prevBlock = block;
      }
    }

    return changes;
  }