private static ArrayList getGeneralizationsForRuleItemLists()

in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/rapier/RapierGeneralizationHelper.java [50:192]


  private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItemLists(
          ArrayList<TextRulerRuleItem> item1List, ArrayList<TextRulerRuleItem> item2List) {
    ArrayList<RapierRuleItem> proposedWordConstraints = new ArrayList<RapierRuleItem>();
    ArrayList<RapierRuleItem> proposedTagConstraints = new ArrayList<RapierRuleItem>();
    ArrayList<RapierRuleItem> proposedClassConstraints = new ArrayList<RapierRuleItem>();
    ArrayList<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();

    int resultListLen1 = 0;
    int resultListLen2 = 0;
    boolean oneListIsEmpty = false;

    if (item1List.size() == 0 && item2List.size() == 0) {
      TextRulerToolkit.log("ERROR !");
    }
    if (item1List.size() == 0 || item2List.size() == 0) {
      // TextRulerToolkit.log("SPECIAL CASE WITH ONE LIST OF ZERO SIZE");
      oneListIsEmpty = true;
    }

    boolean hasEmptyWordList = false;
    int maxWordCount = 0;
    boolean hasEmptyTagList = false;
    int maxTagCount = 0;
    for (TextRulerRuleItem rt : item2List) {
      RapierRuleItem t = (RapierRuleItem) rt;
      resultListLen2 += t.isListItem() ? t.listLen() : 1;
      if (t.getWordConstraints().size() > maxWordCount)
        maxWordCount = t.getWordConstraints().size();
      if (t.getWordConstraints().size() == 0)
        hasEmptyWordList = true;
      if (t.getTagConstraints().size() > maxTagCount)
        maxTagCount = t.getTagConstraints().size();
      if (t.getTagConstraints().size() == 0)
        hasEmptyTagList = true;
    }
    for (TextRulerRuleItem rt : item1List) {
      RapierRuleItem t = (RapierRuleItem) rt;
      resultListLen1 += t.isListItem() ? t.listLen() : 1;
      if (t.getWordConstraints().size() > maxWordCount)
        maxWordCount = t.getWordConstraints().size();
      if (t.getWordConstraints().size() == 0)
        hasEmptyWordList = true;
      if (t.getTagConstraints().size() > maxTagCount)
        maxTagCount = t.getTagConstraints().size();
      if (t.getTagConstraints().size() == 0)
        hasEmptyTagList = true;
    }
    int resultListLen = resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2; // take
    // the
    // bigger
    // of
    // both
    if (resultListLen == 1 && !oneListIsEmpty)
      resultListLen = 0; // lists with a length of 1 can only occur when
    // one itemList is empty! THAT CANNOT HAPPEN
    // HERE!

    // generalize word constraints:
    if (hasEmptyWordList) // at least one constraint of both is empty
    {
      // do nothing here, proposed.wordItems stays empty
      proposedWordConstraints.add(new RapierRuleItem());
    } else // create union of both constraints AND (if both constraints
    // weren't the same) drop constraint
    {
      RapierRuleItem proposed = new RapierRuleItem();
      for (TextRulerRuleItem t : item1List)
        proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
      for (TextRulerRuleItem t : item2List)
        proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());

      proposedWordConstraints.add(proposed);

      // if the union of both constraints is a real union (one does not
      // subsume the other completely),
      // we have to add the DROPPING OF THE CONSTRAINT as a second
      // proposed word constraint
      if (maxWordCount != proposed.getWordConstraints().size()) // the
      // union
      // is a
      // real
      // bigger
      // set
      // than
      {
        proposedWordConstraints.add(new RapierRuleItem());
      }
    }

    if (hasEmptyTagList) // at least one constraint of both is empty
    {
      // do nothing here, proposed.tagItems stays empty
      proposedTagConstraints.add(new RapierRuleItem());
    } else // create union of both constraints AND (if both constraints
    // weren't the same) drop constraint
    {
      RapierRuleItem proposed = new RapierRuleItem();
      for (TextRulerRuleItem t : item1List)
        proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
      for (TextRulerRuleItem t : item2List)
        proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());

      proposedTagConstraints.add(proposed);

      // if the union of both constraints is a real union (one does not
      // subsume the other completely),
      // we have to add the DROPPING OF THE CONSTRAINT as a second
      // proposed tag constraint
      if (maxTagCount != proposed.getTagConstraints().size()) // the union
      // is a real
      // bigger
      // set than
      {
        proposedTagConstraints.add(new RapierRuleItem());
      }
    }

    // TODO semantic class generalization
    proposedClassConstraints.add(new RapierRuleItem()); // add only NO
    // class
    // constraint
    // version for
    // now!

    // finally, create all combinations of the above proposed items
    for (RapierRuleItem wt : proposedWordConstraints) {
      for (RapierRuleItem tt : proposedTagConstraints) {
        for (RapierRuleItem ct : proposedClassConstraints) {
          RapierRuleItem newItem = new RapierRuleItem();
          for (TextRulerWordConstraint wi : wt.getWordConstraints())
            newItem.addWordConstraint(wi.copy());
          for (String ti : tt.getTagConstraints())
            newItem.addTagConstraint(ti);
          for (String tc : ct.getClassConstraints())
            newItem.addClassConstraint(tc);
          newItem.setListLen(resultListLen);
          newItem.setListBeginsAtZero(oneListIsEmpty && resultListLen > 0);
          result.add(newItem);
        }
      }
    }
    return result;
  }