in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java [688:848]
protected WhiskRule anchor(WhiskRule rule, TextRulerExampleDocument doc,
TextRulerExample example, int slotIndex) {
List<WhiskRule> result = new ArrayList<WhiskRule>();
TextRulerAnnotation slotAnnotation = example.getAnnotations()[slotIndex];
List<List<WhiskRuleItem>> window = getTermsWithinBounds(slotAnnotation.getBegin(),
slotAnnotation.getEnd(), example);
for (List<WhiskRuleItem> inside : window) {
if (rule == null || inside.isEmpty()) {
return null;
}
// create base 1 and base 2:
WhiskRule base1 = rule.copy(); // slot filler rule
TextRulerSlotPattern slotPattern = base1.getPatterns().get(slotIndex);
// questionable restriction:
if (inside.size() <= windowSize) { // TODO add parameter for this!
slotPattern.fillerPattern.addAll(inside);
} else {
for (int i = 0; i < inside.size(); i++)
if (i == 0 || (i == inside.size() - 1))
slotPattern.fillerPattern.add(inside.get(i).copy());
else if (inside.size() > 2 && i < 2)
slotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
}
List<WhiskRuleItem> beforeList = getTermsBefore(inside.get(0), example);
List<WhiskRuleItem> afterList = getTermsAfter(inside.get(inside.size() - 1), example);
beforeList.add(null);
afterList.add(null);
Collection<WhiskRule> tempRules = new HashSet<WhiskRule>();
// workaround for better rules:
// only inner begin
for (WhiskRuleItem eachBefore : beforeList) {
for (WhiskRuleItem eachAfter : afterList) {
WhiskRule copy = rule.copy();
TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
if (eachBefore != null) {
textRulerSlotPattern.preFillerPattern.add(eachBefore);
}
textRulerSlotPattern.fillerPattern.add(inside.get(0).copy());
textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
if (eachAfter != null) {
textRulerSlotPattern.postFillerPattern.add(eachAfter);
}
tempRules.add(copy);
}
}
// only inner end
for (WhiskRuleItem eachBefore : beforeList) {
for (WhiskRuleItem eachAfter : afterList) {
WhiskRule copy = rule.copy();
TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
if (eachBefore != null) {
textRulerSlotPattern.preFillerPattern.add(eachBefore);
}
textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
textRulerSlotPattern.fillerPattern.add(inside.get(inside.size() - 1).copy());
if (eachAfter != null) {
textRulerSlotPattern.postFillerPattern.add(eachAfter);
}
tempRules.add(copy);
}
}
if (!beforeList.isEmpty()) {
if (!afterList.isEmpty()) {
for (WhiskRuleItem eachBefore : beforeList) {
for (WhiskRuleItem eachAfter : afterList) {
WhiskRule copy = rule.copy();
TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
if (eachBefore != null) {
textRulerSlotPattern.preFillerPattern.add(eachBefore);
}
textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
if (eachAfter != null) {
textRulerSlotPattern.postFillerPattern.add(eachAfter);
}
tempRules.add(copy);
}
}
} else {
for (WhiskRuleItem eachBefore : beforeList) {
WhiskRule copy = rule.copy();
TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
if (eachBefore != null) {
textRulerSlotPattern.preFillerPattern.add(eachBefore);
}
tempRules.add(copy);
}
}
} else {
for (WhiskRuleItem eachAfter : afterList) {
WhiskRule copy = rule.copy();
TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex);
textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem());
if (eachAfter != null) {
textRulerSlotPattern.postFillerPattern.add(eachAfter);
}
tempRules.add(copy);
}
}
ArrayList<TextRulerRule> rules = new ArrayList<TextRulerRule>(tempRules);
testRulesIfNotCached(rules);
TextRulerRule best = null;
for (TextRulerRule each : rules) {
if (best == null) {
best = each;
} else {
if (each.getCoveringStatistics().getCoveredPositivesCount() > best
.getCoveringStatistics().getCoveredPositivesCount()) {
best = each;
}
}
}
WhiskRule base2 = (WhiskRule) best;
List<TextRulerRule> testRules = new ArrayList<TextRulerRule>();
if (base1 != null) {
TextRulerToolkit.log("base1: " + base1.getRuleString());
testRules.add(base1);
}
if (base2 != null) {
TextRulerToolkit.log("base2: " + base2.getRuleString());
testRules.add(base2);
}
testRulesIfNotCached(testRules);
if (shouldAbort()) {
return null;
}
if (base1 != null && base2 == null) {
TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = "
+ base1.getLaplacian());
result.add(base1);
} else {
TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = "
+ base1.getLaplacian());
TextRulerToolkit.log("\tbase2: " + base2.getCoveringStatistics() + " --> laplacian = "
+ base2.getLaplacian());
if (base2.getCoveringStatistics().getCoveredPositivesCount() > base1
.getCoveringStatistics().getCoveredPositivesCount()) {
result.add(base2);
} else {
result.add(base1);
}
}
}
TextRulerRule best = null;
for (TextRulerRule each : result) {
if (best == null) {
best = each;
} else {
if (each.getCoveringStatistics().getCoveredPositivesCount() > best.getCoveringStatistics()
.getCoveredPositivesCount()) {
best = each;
}
}
}
return (WhiskRule) best;
}