protected WhiskRule createNewRuleByAddingTerm()

in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java [449:686]


  protected WhiskRule createNewRuleByAddingTerm(WhiskRule baseRule, WhiskRuleItem term) {
    if (term == null)
      return null;
    if (term.isStarWildCard() || term.getWordConstraint() == null)
      return null;
    WhiskRule newRule = baseRule.copy();
    // int foundSlotNumber = -1; // debug info
    // String foundSlotPattern = "";
    int termBeginNumber = term.getWordConstraint().getTokenAnnotation().getBegin();
    int termEndNumber = term.getWordConstraint().getTokenAnnotation().getEnd();
    TextRulerRulePattern targetPattern = null;
    TextRulerRulePattern previousSlotPostFillerPattern = null;
    for (int i = 0; i < newRule.getPatterns().size(); i++) {
      TextRulerSlotPattern slotPattern = newRule.getPatterns().get(i);
      WhiskRuleItem it = (WhiskRuleItem) slotPattern.preFillerPattern.lastItem(); // look at the
      // prefiller
      // pattern
      if (it != null && it.getWordConstraint() != null
              && termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin())
        targetPattern = slotPattern.preFillerPattern;
      if (targetPattern == null && slotPattern.fillerPattern.size() > 0) // now
      // look
      // at
      // the
      // filler
      // pattern
      {
        it = (WhiskRuleItem) slotPattern.fillerPattern.firstItem();
        if (it.getWordConstraint() != null
                && termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) // it's
          // still
          // for
          // the prefiller
          // pattern but it
          // seems to be
          // emtpy so we
          // could not find
          // that out above!
          targetPattern = slotPattern.preFillerPattern;
        else {
          it = (WhiskRuleItem) slotPattern.fillerPattern.lastItem();
          if (it.getWordConstraint() != null
                  && termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) {
            targetPattern = slotPattern.fillerPattern;
          }
        }
      }
      if (targetPattern == null && slotPattern.postFillerPattern.size() > 0) // now
                                                                             // look
                                                                             // at
      // the
      // postfiller
      // pattern
      {
        it = (WhiskRuleItem) slotPattern.postFillerPattern.firstItem();
        if (it.getWordConstraint() != null
                && termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) // it's
          // still
          // for
          // the filler
          // pattern but it
          // seems to be
          // emtpy so we
          // could not find
          // that out above!
          targetPattern = slotPattern.fillerPattern;
        else {
          it = (WhiskRuleItem) slotPattern.postFillerPattern.lastItem();
          if (it.getWordConstraint() != null
                  && termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin())
            targetPattern = slotPattern.postFillerPattern;
        }
      }
      if (targetPattern == null) {
        targetPattern = previousSlotPostFillerPattern;
        // debug info
        // if (i > 0) {
        // TextRulerSlotPattern prevSlotPattern = newRule.getPatterns().get(i -
        // 1);
        // foundSlotPattern = targetPattern == prevSlotPattern.preFillerPattern
        // ? "PRE FILLER"
        // : (targetPattern == prevSlotPattern.fillerPattern ? "FILLER" :
        // "POST FILLER");
        // foundSlotNumber = i - 1;
        // }
        // } else {
        // foundSlotPattern = targetPattern == slotPattern.preFillerPattern ?
        // "PRE FILLER"
        // : (targetPattern == slotPattern.fillerPattern ? "FILLER" :
        // "POST FILLER");
        // foundSlotNumber = i;
      }
      previousSlotPostFillerPattern = slotPattern.postFillerPattern;
    }

    if (targetPattern == null) {
      targetPattern = previousSlotPostFillerPattern;
      // debug info
      // foundSlotNumber = newRule.getPatterns().size() - 1;
      // foundSlotPattern = "POST FILLER";
    }

    if (targetPattern == null) {
      TextRulerToolkit.log("ERROR, NO TARGET PATTERN FOR NEW RULE TERM FOUND !");
    } else {
      // TextRulerToolkit.log("Ok, found for Rule: "+newRule.getRuleString());
      // TextRulerToolkit.log("Term: "+term.getTermNumberInExample()+" ; "+term);
      // TextRulerToolkit.log("Slot "+foundSlotNumber+" - Pattern: "+foundSlotPattern);
      // now put that term into the rule:
      int indexInPattern = -1;
      if (targetPattern.size() == 0) {
        targetPattern.add(term.copy());
        indexInPattern = 0;
      } else {
        // 1. search if the term would replace a wildcard:
        WhiskRuleItem wildCard = null;
        for (TextRulerRuleItem i : newRule.getPatterns().get(0).preFillerPattern) {
          if (((WhiskRuleItem) i).isStarWildCard()) {
            WhiskRuleItem left = newRule.searchNeighborOfItem(((WhiskRuleItem) i), true);
            WhiskRuleItem right = newRule.searchNeighborOfItem(((WhiskRuleItem) i), false);
            if (left.getWordConstraint().getTokenAnnotation().getEnd() <= termBeginNumber
                    && right.getWordConstraint().getTokenAnnotation().getBegin() >= termEndNumber)
              wildCard = (WhiskRuleItem) i;
          }
        }
        if (wildCard == null) {
          for (TextRulerRuleItem i : newRule.getPatterns().get(0).fillerPattern) {
            if (((WhiskRuleItem) i).isStarWildCard()) {
              WhiskRuleItem left = newRule.searchNeighborOfItem(((WhiskRuleItem) i), true);
              WhiskRuleItem right = newRule.searchNeighborOfItem(((WhiskRuleItem) i), false);
              if (left != null
                      && left.getWordConstraint().getTokenAnnotation().getEnd() <= termBeginNumber
                      && right.getWordConstraint().getTokenAnnotation().getBegin() >= termEndNumber)
                wildCard = (WhiskRuleItem) i;
            }
          }
        }
        if (wildCard == null) {
          for (TextRulerRuleItem i : newRule.getPatterns().get(0).postFillerPattern) {
            if (((WhiskRuleItem) i).isStarWildCard()) {
              WhiskRuleItem left = newRule.searchNeighborOfItem(((WhiskRuleItem) i), true);
              WhiskRuleItem right = newRule.searchNeighborOfItem(((WhiskRuleItem) i), false);
              if (left.getWordConstraint().getTokenAnnotation().getEnd() <= termBeginNumber
                      && right.getWordConstraint().getTokenAnnotation().getBegin() >= termEndNumber)
                wildCard = (WhiskRuleItem) i;
            }
          }
        }
        if (wildCard != null) {
          if (!wildCard.isStarWildCard()) {
            TextRulerToolkit
                    .log("ERROR, FOUND A TERM WITH THE SAME NUMBER THAT IS NOT A WILDCARD! HOW IS THAT???");
            return null;
          }
          if (!targetPattern.contains(wildCard)) {
            TextRulerToolkit.log("EVEN WORSE, THAT MUST NOT BE AT ALL!");
            return null;
          }
          indexInPattern = targetPattern.indexOf(wildCard);
          targetPattern.set(indexInPattern, term.copy());
        } else {
          // not a wildcard, so search for the insertion point:
          for (int i = 0; i < targetPattern.size(); i++) {
            WhiskRuleItem it = (WhiskRuleItem) targetPattern.get(i);
            if (it.getWordConstraint() != null
                    && termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) {
              indexInPattern = i;
              break;
            }
          }
          if (indexInPattern < 0) {
            indexInPattern = targetPattern.size();
            targetPattern.add(term.copy());
          } else
            targetPattern.add(indexInPattern, term.copy());
        }
      }
      // ok, now we have replaced a wildcard with the term or added the
      // term between two other items.
      // we now have to check the neighbors of the new term: if it is a
      // direct neighbor (according to the termNumber),
      // we have nothing special to do. but if it is not a direct
      // neighbor, we have to add a wildcard between the two items (if the
      // neighbor item
      // is not a wildcard itself!
      WhiskRuleItem newTerm = (WhiskRuleItem) targetPattern.get(indexInPattern);

      // look at left neighbor:
      WhiskRuleItem left = newRule.searchNeighborOfItem(newTerm, true);
      if (left != null && left.getWordConstraint() != null) {
        // TextRulerToolkit.log("LEFT NEIGHBOR FOUND!");

        // so we have a left neighbor. let's see if it also is the
        // neighbor in our seed token stream:
        if (!left.isStarWildCard()) { // no direct neighbor and
          // no wildcard yet,
          // so insert a wildcard between us!
          boolean isValid = isNextValidNeighbor(left, newTerm, newRule.getSeedExample());
          if (!isValid) {
            targetPattern.add(indexInPattern, WhiskRuleItem.newWildCardItem());
            indexInPattern++;
          }
        }
      }

      // look at right neighbor:
      WhiskRuleItem right = newRule.searchNeighborOfItem(newTerm, false);
      if (right != null && right.getWordConstraint() != null) {
        // TextRulerToolkit.log("RIGHT NEIGHBOR FOUND!");
        // so we have a right neighbor. let's see if it also is the
        // neighbor in our seed token stream:
        if (!right.isStarWildCard()) {
          // no direct neighbor and
          // no wildcard yet,
          // so insert a wildcard between us!
          boolean isValid = isNextValidNeighbor(newTerm, right, newRule.getSeedExample());
          if (!isValid) {
            WhiskRuleItem wc = WhiskRuleItem.newWildCardItem();
            if (indexInPattern + 1 < targetPattern.size())
              targetPattern.add(indexInPattern + 1, wc);
            else
              targetPattern.add(wc);
          }
        }
      }

      newRule.setNeedsCompile(true);
      // TextRulerToolkit.log("BEFORE: "+baseRule.getRuleString());
      // TextRulerToolkit.log("AFTER : "+newRule.getRuleString());
      // TextRulerToolkit.log("");
    }
    if (newRule.getRuleString().equals(baseRule.getRuleString())) // this
      // must
      // not be!
      return null;
    else
      return newRule;
  }