in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/rapier/RapierGeneralizationHelper.java [50:192]
private static ArrayList<TextRulerRuleItem> getGeneralizationsForRuleItemLists(
ArrayList<TextRulerRuleItem> item1List, ArrayList<TextRulerRuleItem> item2List) {
ArrayList<RapierRuleItem> proposedWordConstraints = new ArrayList<RapierRuleItem>();
ArrayList<RapierRuleItem> proposedTagConstraints = new ArrayList<RapierRuleItem>();
ArrayList<RapierRuleItem> proposedClassConstraints = new ArrayList<RapierRuleItem>();
ArrayList<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();
int resultListLen1 = 0;
int resultListLen2 = 0;
boolean oneListIsEmpty = false;
if (item1List.size() == 0 && item2List.size() == 0) {
TextRulerToolkit.log("ERROR !");
}
if (item1List.size() == 0 || item2List.size() == 0) {
// TextRulerToolkit.log("SPECIAL CASE WITH ONE LIST OF ZERO SIZE");
oneListIsEmpty = true;
}
boolean hasEmptyWordList = false;
int maxWordCount = 0;
boolean hasEmptyTagList = false;
int maxTagCount = 0;
for (TextRulerRuleItem rt : item2List) {
RapierRuleItem t = (RapierRuleItem) rt;
resultListLen2 += t.isListItem() ? t.listLen() : 1;
if (t.getWordConstraints().size() > maxWordCount)
maxWordCount = t.getWordConstraints().size();
if (t.getWordConstraints().size() == 0)
hasEmptyWordList = true;
if (t.getTagConstraints().size() > maxTagCount)
maxTagCount = t.getTagConstraints().size();
if (t.getTagConstraints().size() == 0)
hasEmptyTagList = true;
}
for (TextRulerRuleItem rt : item1List) {
RapierRuleItem t = (RapierRuleItem) rt;
resultListLen1 += t.isListItem() ? t.listLen() : 1;
if (t.getWordConstraints().size() > maxWordCount)
maxWordCount = t.getWordConstraints().size();
if (t.getWordConstraints().size() == 0)
hasEmptyWordList = true;
if (t.getTagConstraints().size() > maxTagCount)
maxTagCount = t.getTagConstraints().size();
if (t.getTagConstraints().size() == 0)
hasEmptyTagList = true;
}
int resultListLen = resultListLen1 > resultListLen2 ? resultListLen1 : resultListLen2; // take
// the
// bigger
// of
// both
if (resultListLen == 1 && !oneListIsEmpty)
resultListLen = 0; // lists with a length of 1 can only occur when
// one itemList is empty! THAT CANNOT HAPPEN
// HERE!
// generalize word constraints:
if (hasEmptyWordList) // at least one constraint of both is empty
{
// do nothing here, proposed.wordItems stays empty
proposedWordConstraints.add(new RapierRuleItem());
} else // create union of both constraints AND (if both constraints
// weren't the same) drop constraint
{
RapierRuleItem proposed = new RapierRuleItem();
for (TextRulerRuleItem t : item1List)
proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
for (TextRulerRuleItem t : item2List)
proposed.addWordConstraints(((RapierRuleItem) t).getWordConstraints());
proposedWordConstraints.add(proposed);
// if the union of both constraints is a real union (one does not
// subsume the other completely),
// we have to add the DROPPING OF THE CONSTRAINT as a second
// proposed word constraint
if (maxWordCount != proposed.getWordConstraints().size()) // the
// union
// is a
// real
// bigger
// set
// than
{
proposedWordConstraints.add(new RapierRuleItem());
}
}
if (hasEmptyTagList) // at least one constraint of both is empty
{
// do nothing here, proposed.tagItems stays empty
proposedTagConstraints.add(new RapierRuleItem());
} else // create union of both constraints AND (if both constraints
// weren't the same) drop constraint
{
RapierRuleItem proposed = new RapierRuleItem();
for (TextRulerRuleItem t : item1List)
proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
for (TextRulerRuleItem t : item2List)
proposed.addTagConstraints(((RapierRuleItem) t).getTagConstraints());
proposedTagConstraints.add(proposed);
// if the union of both constraints is a real union (one does not
// subsume the other completely),
// we have to add the DROPPING OF THE CONSTRAINT as a second
// proposed tag constraint
if (maxTagCount != proposed.getTagConstraints().size()) // the union
// is a real
// bigger
// set than
{
proposedTagConstraints.add(new RapierRuleItem());
}
}
// TODO semantic class generalization
proposedClassConstraints.add(new RapierRuleItem()); // add only NO
// class
// constraint
// version for
// now!
// finally, create all combinations of the above proposed items
for (RapierRuleItem wt : proposedWordConstraints) {
for (RapierRuleItem tt : proposedTagConstraints) {
for (RapierRuleItem ct : proposedClassConstraints) {
RapierRuleItem newItem = new RapierRuleItem();
for (TextRulerWordConstraint wi : wt.getWordConstraints())
newItem.addWordConstraint(wi.copy());
for (String ti : tt.getTagConstraints())
newItem.addTagConstraint(ti);
for (String tc : ct.getClassConstraints())
newItem.addClassConstraint(tc);
newItem.setListLen(resultListLen);
newItem.setListBeginsAtZero(oneListIsEmpty && resultListLen > 0);
result.add(newItem);
}
}
}
return result;
}