in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerExampleDocument.java [94:238]
public List<TextRulerExample> createSlotInstancesForCAS(CAS aCas, TextRulerTarget target,
boolean createFromRawTypeName) {
List<TextRulerExample> result = new ArrayList<TextRulerExample>();
if (target.isMultiSlot()) {
TypeSystem ts = aCas.getTypeSystem();
int currentSlotIndex = 0;
TextRulerAnnotation[] currentAnnotations = new TextRulerAnnotation[target.slotNames.length];
List<Type> slotTypes = new ArrayList<Type>();
for (String s : target.slotNames)
slotTypes.add(ts.getType(s));
for (FSIterator<AnnotationFS> it = aCas.getAnnotationIndex().iterator(true); it.isValid(); it
.moveToNext()) {
AnnotationFS fs = (AnnotationFS) it.get();
Type theType = fs.getType();
if (slotTypes.contains(theType)) {
int idx = slotTypes.indexOf(theType);
if (idx < currentSlotIndex) // the previous example was not
// complete, so we have to write
// it down:
{
result.add(new TextRulerExample(this, currentAnnotations, true, target));
currentAnnotations = new TextRulerAnnotation[target.slotNames.length];
}
currentAnnotations[idx] = new TextRulerAnnotation(fs, this);
if (idx >= target.slotNames.length - 1) {
result.add(new TextRulerExample(this, currentAnnotations, true, target));
currentAnnotations = new TextRulerAnnotation[target.slotNames.length];
currentSlotIndex = 0;
} else
currentSlotIndex = idx + 1;
}
}
if (currentSlotIndex > 0) {
result.add(new TextRulerExample(this, currentAnnotations, true, target));
}
} else if (target.isLeftCorrection() || target.isRightCorrection()) {
// TODO
TextRulerBasicLearner learner = target.getLearner();
Set<String> filterSet = learner.getFilterSet();
CAS testCAS = learner.getTestCAS();
TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
resetAndFillTestCAS(testCAS, target);
CAS docCAS = getCAS();
TypeSystem ts = docCAS.getTypeSystem();
Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
AnalysisEngine analysisEngine = learner.getAnalysisEngine();
try {
analysisEngine.process(testCAS);
} catch (AnalysisEngineProcessException e) {
// TODO add log here
}
TextRulerTarget newTarget = new TextRulerTarget(target.slotNames, target.getLearner());
if (target.isLeftCorrection()) {
newTarget.type = TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY;
} else {
newTarget.type = TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY;
}
createExamplesForTarget(newTarget);
learner.compareOriginalDocumentWithTestCAS(this, testCAS, newTarget, c, true);
List<TextRulerExample> correctTags = getPositiveExamples();
List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
c.getCoveredNegativeExamples());
for (TextRulerExample wrongTag : wrongTags) {
// test, if there's a corresponding positive example
// somewhere around (within maxDistance)
List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
.getAnnotation().getBegin(), target.getMaxShiftDistance(), TextRulerToolkit
.getFilterSetWithSlotNames(target.slotNames, filterSet), tokensRootType);
List<AnnotationFS> right = TextRulerToolkit.getAnnotationsAfterPosition(docCAS, wrongTag
.getAnnotation().getEnd(), target.getMaxShiftDistance() + 1, TextRulerToolkit
.getFilterSetWithSlotNames(target.slotNames, filterSet), tokensRootType);
right.remove(0);
// TODO stop after the first found match or create one bad
// example for each found occurence ??!!
// for now: stop after one ! so create only ONE bad
// example...
int leftDistance = 0;
TextRulerExample leftCorrectTag = null;
for (int i = left.size() - 1; i >= 0; i--) {
leftDistance++;
TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(left.get(i),
this, target, docCAS.getTypeSystem());
// Only checks the beginning of needle
leftCorrectTag = TextRulerExampleDocument.exampleListContainsAnnotation(correctTags,
needle);
if (leftCorrectTag != null)
break;
}
int rightDistance = 0;
TextRulerExample rightCorrectTag = null;
for (AnnotationFS fs : right) {
rightDistance++;
TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(fs, this, target,
docCAS.getTypeSystem());
// Only checks the beginning of needle
rightCorrectTag = TextRulerExampleDocument.exampleListContainsAnnotation(correctTags,
needle);
if (rightCorrectTag != null)
break;
}
TextRulerExample theCorrectTag = null;
if (rightDistance < leftDistance && rightCorrectTag != null)
theCorrectTag = rightCorrectTag;
else if (rightDistance > leftDistance && leftCorrectTag != null)
theCorrectTag = leftCorrectTag;
else // use the one that would lie in the slot filler:
{
if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY && rightCorrectTag != null)
theCorrectTag = rightCorrectTag;
else
theCorrectTag = leftCorrectTag;
}
if (theCorrectTag != null) {
TextRulerToolkit.log("FOUND BAD EXAMPLE FOR SHIFTING !!");
TextRulerShiftExample shiftExample = new TextRulerShiftExample(this,
wrongTag.getAnnotation(), theCorrectTag.getAnnotation(), true, target);
result.add(shiftExample);
}
}
// GlobalCASSource.releaseCAS(testCAS);
} else {
List<AnnotationFS> slots = TextRulerToolkit.extractAnnotationsForSlotName(
aCas,
createFromRawTypeName ? target.getSingleSlotRawTypeName() : target
.getSingleSlotTypeName()); // do not use
// boundary type
// here since we
// seek for the
// orignial slot
// !
for (AnnotationFS a : slots) {
result.add(new TextRulerExample(this, TextRulerToolkit.convertToTargetAnnotation(a, this,
target, aCas.getTypeSystem()), true, target));
}
}
return result;
}