in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java [449:686]
protected WhiskRule createNewRuleByAddingTerm(WhiskRule baseRule, WhiskRuleItem term) {
if (term == null)
return null;
if (term.isStarWildCard() || term.getWordConstraint() == null)
return null;
WhiskRule newRule = baseRule.copy();
// int foundSlotNumber = -1; // debug info
// String foundSlotPattern = "";
int termBeginNumber = term.getWordConstraint().getTokenAnnotation().getBegin();
int termEndNumber = term.getWordConstraint().getTokenAnnotation().getEnd();
TextRulerRulePattern targetPattern = null;
TextRulerRulePattern previousSlotPostFillerPattern = null;
for (int i = 0; i < newRule.getPatterns().size(); i++) {
TextRulerSlotPattern slotPattern = newRule.getPatterns().get(i);
WhiskRuleItem it = (WhiskRuleItem) slotPattern.preFillerPattern.lastItem(); // look at the
// prefiller
// pattern
if (it != null && it.getWordConstraint() != null
&& termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin())
targetPattern = slotPattern.preFillerPattern;
if (targetPattern == null && slotPattern.fillerPattern.size() > 0) // now
// look
// at
// the
// filler
// pattern
{
it = (WhiskRuleItem) slotPattern.fillerPattern.firstItem();
if (it.getWordConstraint() != null
&& termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) // it's
// still
// for
// the prefiller
// pattern but it
// seems to be
// emtpy so we
// could not find
// that out above!
targetPattern = slotPattern.preFillerPattern;
else {
it = (WhiskRuleItem) slotPattern.fillerPattern.lastItem();
if (it.getWordConstraint() != null
&& termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) {
targetPattern = slotPattern.fillerPattern;
}
}
}
if (targetPattern == null && slotPattern.postFillerPattern.size() > 0) // now
// look
// at
// the
// postfiller
// pattern
{
it = (WhiskRuleItem) slotPattern.postFillerPattern.firstItem();
if (it.getWordConstraint() != null
&& termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) // it's
// still
// for
// the filler
// pattern but it
// seems to be
// emtpy so we
// could not find
// that out above!
targetPattern = slotPattern.fillerPattern;
else {
it = (WhiskRuleItem) slotPattern.postFillerPattern.lastItem();
if (it.getWordConstraint() != null
&& termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin())
targetPattern = slotPattern.postFillerPattern;
}
}
if (targetPattern == null) {
targetPattern = previousSlotPostFillerPattern;
// debug info
// if (i > 0) {
// TextRulerSlotPattern prevSlotPattern = newRule.getPatterns().get(i -
// 1);
// foundSlotPattern = targetPattern == prevSlotPattern.preFillerPattern
// ? "PRE FILLER"
// : (targetPattern == prevSlotPattern.fillerPattern ? "FILLER" :
// "POST FILLER");
// foundSlotNumber = i - 1;
// }
// } else {
// foundSlotPattern = targetPattern == slotPattern.preFillerPattern ?
// "PRE FILLER"
// : (targetPattern == slotPattern.fillerPattern ? "FILLER" :
// "POST FILLER");
// foundSlotNumber = i;
}
previousSlotPostFillerPattern = slotPattern.postFillerPattern;
}
if (targetPattern == null) {
targetPattern = previousSlotPostFillerPattern;
// debug info
// foundSlotNumber = newRule.getPatterns().size() - 1;
// foundSlotPattern = "POST FILLER";
}
if (targetPattern == null) {
TextRulerToolkit.log("ERROR, NO TARGET PATTERN FOR NEW RULE TERM FOUND !");
} else {
// TextRulerToolkit.log("Ok, found for Rule: "+newRule.getRuleString());
// TextRulerToolkit.log("Term: "+term.getTermNumberInExample()+" ; "+term);
// TextRulerToolkit.log("Slot "+foundSlotNumber+" - Pattern: "+foundSlotPattern);
// now put that term into the rule:
int indexInPattern = -1;
if (targetPattern.size() == 0) {
targetPattern.add(term.copy());
indexInPattern = 0;
} else {
// 1. search if the term would replace a wildcard:
WhiskRuleItem wildCard = null;
for (TextRulerRuleItem i : newRule.getPatterns().get(0).preFillerPattern) {
if (((WhiskRuleItem) i).isStarWildCard()) {
WhiskRuleItem left = newRule.searchNeighborOfItem(((WhiskRuleItem) i), true);
WhiskRuleItem right = newRule.searchNeighborOfItem(((WhiskRuleItem) i), false);
if (left.getWordConstraint().getTokenAnnotation().getEnd() <= termBeginNumber
&& right.getWordConstraint().getTokenAnnotation().getBegin() >= termEndNumber)
wildCard = (WhiskRuleItem) i;
}
}
if (wildCard == null) {
for (TextRulerRuleItem i : newRule.getPatterns().get(0).fillerPattern) {
if (((WhiskRuleItem) i).isStarWildCard()) {
WhiskRuleItem left = newRule.searchNeighborOfItem(((WhiskRuleItem) i), true);
WhiskRuleItem right = newRule.searchNeighborOfItem(((WhiskRuleItem) i), false);
if (left != null
&& left.getWordConstraint().getTokenAnnotation().getEnd() <= termBeginNumber
&& right.getWordConstraint().getTokenAnnotation().getBegin() >= termEndNumber)
wildCard = (WhiskRuleItem) i;
}
}
}
if (wildCard == null) {
for (TextRulerRuleItem i : newRule.getPatterns().get(0).postFillerPattern) {
if (((WhiskRuleItem) i).isStarWildCard()) {
WhiskRuleItem left = newRule.searchNeighborOfItem(((WhiskRuleItem) i), true);
WhiskRuleItem right = newRule.searchNeighborOfItem(((WhiskRuleItem) i), false);
if (left.getWordConstraint().getTokenAnnotation().getEnd() <= termBeginNumber
&& right.getWordConstraint().getTokenAnnotation().getBegin() >= termEndNumber)
wildCard = (WhiskRuleItem) i;
}
}
}
if (wildCard != null) {
if (!wildCard.isStarWildCard()) {
TextRulerToolkit
.log("ERROR, FOUND A TERM WITH THE SAME NUMBER THAT IS NOT A WILDCARD! HOW IS THAT???");
return null;
}
if (!targetPattern.contains(wildCard)) {
TextRulerToolkit.log("EVEN WORSE, THAT MUST NOT BE AT ALL!");
return null;
}
indexInPattern = targetPattern.indexOf(wildCard);
targetPattern.set(indexInPattern, term.copy());
} else {
// not a wildcard, so search for the insertion point:
for (int i = 0; i < targetPattern.size(); i++) {
WhiskRuleItem it = (WhiskRuleItem) targetPattern.get(i);
if (it.getWordConstraint() != null
&& termEndNumber <= it.getWordConstraint().getTokenAnnotation().getBegin()) {
indexInPattern = i;
break;
}
}
if (indexInPattern < 0) {
indexInPattern = targetPattern.size();
targetPattern.add(term.copy());
} else
targetPattern.add(indexInPattern, term.copy());
}
}
// ok, now we have replaced a wildcard with the term or added the
// term between two other items.
// we now have to check the neighbors of the new term: if it is a
// direct neighbor (according to the termNumber),
// we have nothing special to do. but if it is not a direct
// neighbor, we have to add a wildcard between the two items (if the
// neighbor item
// is not a wildcard itself!
WhiskRuleItem newTerm = (WhiskRuleItem) targetPattern.get(indexInPattern);
// look at left neighbor:
WhiskRuleItem left = newRule.searchNeighborOfItem(newTerm, true);
if (left != null && left.getWordConstraint() != null) {
// TextRulerToolkit.log("LEFT NEIGHBOR FOUND!");
// so we have a left neighbor. let's see if it also is the
// neighbor in our seed token stream:
if (!left.isStarWildCard()) { // no direct neighbor and
// no wildcard yet,
// so insert a wildcard between us!
boolean isValid = isNextValidNeighbor(left, newTerm, newRule.getSeedExample());
if (!isValid) {
targetPattern.add(indexInPattern, WhiskRuleItem.newWildCardItem());
indexInPattern++;
}
}
}
// look at right neighbor:
WhiskRuleItem right = newRule.searchNeighborOfItem(newTerm, false);
if (right != null && right.getWordConstraint() != null) {
// TextRulerToolkit.log("RIGHT NEIGHBOR FOUND!");
// so we have a right neighbor. let's see if it also is the
// neighbor in our seed token stream:
if (!right.isStarWildCard()) {
// no direct neighbor and
// no wildcard yet,
// so insert a wildcard between us!
boolean isValid = isNextValidNeighbor(newTerm, right, newRule.getSeedExample());
if (!isValid) {
WhiskRuleItem wc = WhiskRuleItem.newWildCardItem();
if (indexInPattern + 1 < targetPattern.size())
targetPattern.add(indexInPattern + 1, wc);
else
targetPattern.add(wc);
}
}
}
newRule.setNeedsCompile(true);
// TextRulerToolkit.log("BEFORE: "+baseRule.getRuleString());
// TextRulerToolkit.log("AFTER : "+newRule.getRuleString());
// TextRulerToolkit.log("");
}
if (newRule.getRuleString().equals(baseRule.getRuleString())) // this
// must
// not be!
return null;
else
return newRule;
}