protected WhiskRule anchor()

in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java [688:848]


  protected WhiskRule anchor(WhiskRule rule, TextRulerExampleDocument doc,
          TextRulerExample example, int slotIndex) {
    List<WhiskRule> result = new ArrayList<WhiskRule>();
    TextRulerAnnotation slotAnnotation = example.getAnnotations()[slotIndex];
    List<List<WhiskRuleItem>> window = getTermsWithinBounds(slotAnnotation.getBegin(),
            slotAnnotation.getEnd(), example);

    for (List<WhiskRuleItem> inside : window) {

      if (rule == null || inside.isEmpty()) {
        return null;
      }
      // create base 1 and base 2:
      WhiskRule base1 = rule.copy(); // slot filler rule
      TextRulerSlotPattern slotPattern = base1.getPatterns().get(slotIndex);
      // questionable restriction:
      if (inside.size() <= windowSize) { // TODO add parameter for this!
        slotPattern.fillerPattern.addAll(inside);
      } else {
        for (int i = 0; i < inside.size(); i++)
          if (i == 0 || (i == inside.size() - 1))
            slotPattern.fillerPattern.add(inside.get(i).copy());
          else if (inside.size() > 2 && i < 2)
            slotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
      }
      List<WhiskRuleItem> beforeList = getTermsBefore(inside.get(0), example);
      List<WhiskRuleItem> afterList = getTermsAfter(inside.get(inside.size() - 1), example);
      beforeList.add(null);
      afterList.add(null);
      Collection<WhiskRule> tempRules = new HashSet<WhiskRule>();

      // workaround for better rules:
      // only inner begin
      for (WhiskRuleItem eachBefore : beforeList) {
        for (WhiskRuleItem eachAfter : afterList) {
          WhiskRule copy = rule.copy();
          TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
          if (eachBefore != null) {
            textRulerSlotPattern.preFillerPattern.add(eachBefore);
          }
          textRulerSlotPattern.fillerPattern.add(inside.get(0).copy());
          textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
          if (eachAfter != null) {
            textRulerSlotPattern.postFillerPattern.add(eachAfter);
          }
          tempRules.add(copy);
        }
      }
      // only inner end
      for (WhiskRuleItem eachBefore : beforeList) {
        for (WhiskRuleItem eachAfter : afterList) {
          WhiskRule copy = rule.copy();
          TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
          if (eachBefore != null) {
            textRulerSlotPattern.preFillerPattern.add(eachBefore);
          }
          textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
          textRulerSlotPattern.fillerPattern.add(inside.get(inside.size() - 1).copy());
          if (eachAfter != null) {
            textRulerSlotPattern.postFillerPattern.add(eachAfter);
          }
          tempRules.add(copy);
        }
      }

      if (!beforeList.isEmpty()) {
        if (!afterList.isEmpty()) {
          for (WhiskRuleItem eachBefore : beforeList) {
            for (WhiskRuleItem eachAfter : afterList) {
              WhiskRule copy = rule.copy();
              TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
              if (eachBefore != null) {
                textRulerSlotPattern.preFillerPattern.add(eachBefore);
              }
              textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
              if (eachAfter != null) {
                textRulerSlotPattern.postFillerPattern.add(eachAfter);
              }
              tempRules.add(copy);
            }
          }
        } else {
          for (WhiskRuleItem eachBefore : beforeList) {
            WhiskRule copy = rule.copy();
            TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
            textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
            if (eachBefore != null) {
              textRulerSlotPattern.preFillerPattern.add(eachBefore);
            }
            tempRules.add(copy);
          }
        }
      } else {
        for (WhiskRuleItem eachAfter : afterList) {
          WhiskRule copy = rule.copy();
          TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
          textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
          if (eachAfter != null) {
            textRulerSlotPattern.postFillerPattern.add(eachAfter);
          }
          tempRules.add(copy);
        }
      }
      ArrayList<TextRulerRule> rules = new ArrayList<TextRulerRule>(tempRules);
      testRulesIfNotCached(rules);
      TextRulerRule best = null;
      for (TextRulerRule each : rules) {
        if (best == null) {
          best = each;
        } else {
          if (each.getCoveringStatistics().getCoveredPositivesCount() > best
                  .getCoveringStatistics().getCoveredPositivesCount()) {
            best = each;
          }
        }
      }
      WhiskRule base2 = (WhiskRule) best;
      List<TextRulerRule> testRules = new ArrayList<TextRulerRule>();
      if (base1 != null) {
        TextRulerToolkit.log("base1: " + base1.getRuleString());
        testRules.add(base1);
      }
      if (base2 != null) {
        TextRulerToolkit.log("base2: " + base2.getRuleString());
        testRules.add(base2);
      }
      testRulesIfNotCached(testRules);
      if (shouldAbort()) {
        return null;
      }
      if (base1 != null && base2 == null) {
        TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = "
                + base1.getLaplacian());
        result.add(base1);
      } else {
        TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = "
                + base1.getLaplacian());
        TextRulerToolkit.log("\tbase2: " + base2.getCoveringStatistics() + " --> laplacian = "
                + base2.getLaplacian());
        if (base2.getCoveringStatistics().getCoveredPositivesCount() > base1
                .getCoveringStatistics().getCoveredPositivesCount()) {
          result.add(base2);
        } else {
          result.add(base1);
        }
      }
    }
    TextRulerRule best = null;
    for (TextRulerRule each : result) {
      if (best == null) {
        best = each;
      } else {
        if (each.getCoveringStatistics().getCoveredPositivesCount() > best.getCoveringStatistics()
                .getCoveredPositivesCount()) {
          best = each;
        }
      }
    }

    return (WhiskRule) best;
  }