in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java [294:498]
private List<AnnotationError> createErrorList() {
positiveExamples = new HashMap<String, RankedList>();
List<AnnotationError> result = new ArrayList<AnnotationError>();
List<TrabalAnnotation> matches;
Iterator<TrabalAnnotation> iterator;
Iterator<TrabalAnnotation> docIterator;
List<TextRulerExampleDocument> documents;
List<TextRulerExampleDocument> goldStandard;
documents = additionalDocuments.getDocuments();
goldStandard = exampleDocuments.getDocuments();
for (int i = 0; i < goldStandard.size(); i++) {
if (shouldAbort())
break;
matches = new ArrayList<TrabalAnnotation>();
AnnotationIndex<AnnotationFS> index = goldStandard.get(i).getCAS().getAnnotationIndex();
List<TrabalAnnotation> gold = new ArrayList<TrabalAnnotation>();
List<TrabalAnnotation> docs = new ArrayList<TrabalAnnotation>();
for (AnnotationFS a : index) {
if (isSlotType(a.getType())) {
gold.add(new TrabalAnnotation(a, goldStandard.get(i), enableFeatures));
}
}
AnnotationIndex<AnnotationFS> docIndex = documents.get(i).getCAS().getAnnotationIndex();
for (AnnotationFS b : docIndex) {
if (isSlotType(b.getType())) {
docs.add(new TrabalAnnotation(b, documents.get(i), enableFeatures));
}
}
iterator = gold.iterator();
docIterator = docs.iterator();
TrabalAnnotation a;
TrabalAnnotation b;
// find correct annotated elements
int exampleIndex = 0;
while (iterator.hasNext()) {
if (shouldAbort())
break;
exampleIndex++;
sendStatusUpdateToDelegate(
"Comparing documents " + (i + 1) + " of " + goldStandard.size() + ": example "
+ exampleIndex + " of " + gold.size(),
TextRulerLearnerState.ML_RUNNING, false);
a = iterator.next();
docIterator = docs.iterator();
while (docIterator.hasNext()) {
b = docIterator.next();
if (b.equals(a)) {
matches.add(a);
matches.add(b);
if (positiveExamples.containsKey(a.getType().getShortName())) {
RankedList list = positiveExamples.get(a.getType().getShortName());
list.addAll(createConditions(a));
positiveExamples.put(a.getType().getShortName(), list);
} else {
RankedList list = new RankedList(idf);
list.addAll(createConditions(a));
positiveExamples.put(a.getType().getShortName(), list);
}
break;
}
}
}
// create correction type errors
iterator = gold.iterator();
docIterator = docs.iterator();
while (iterator.hasNext()) {
a = iterator.next();
docIterator = docs.iterator();
while (!matches.contains(a) && docIterator.hasNext()) {
b = docIterator.next();
if (!matches.contains(b)) {
if (b.getBegin() == a.getBegin() && b.getEnd() == a.getEnd()) {
TextRulerTarget target = new TextRulerTarget(b.getType().getName(), this);
TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
result.add(new AnnotationError(error, truth, AnnotationErrorType.CORRECTION));
matches.add(a);
matches.add(b);
break;
}
}
}
}
// create shifting type errors
iterator = gold.iterator();
docIterator = docs.iterator();
List<AnnotationError> tempErrors;
int distance;
while (iterator.hasNext()) {
a = iterator.next();
docIterator = docs.iterator();
tempErrors = new ArrayList<AnnotationError>();
while (!matches.contains(a) && docIterator.hasNext()) {
b = docIterator.next();
if (!matches.contains(b)) {
if (b.getType().getShortName().equals(a.getType().getShortName())
&& b.getEnd() >= a.getBegin() && b.getBegin() <= a.getEnd()) {
TextRulerTarget target = new TextRulerTarget(a.getType().getName(), this);
TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
AnnotationErrorType type;
if ((b.getBegin() == a.getBegin() && b.getEnd() < a.getEnd())
|| (b.getBegin() > a.getBegin() && b.getEnd() == a.getEnd())) {
type = AnnotationErrorType.EXPANSION;
} else if (b.getBegin() > a.getBegin() || b.getEnd() > a.getEnd()) {
type = AnnotationErrorType.SHIFTING_LEFT;
} else {
type = AnnotationErrorType.SHIFTING_RIGHT;
}
AnnotationError err = new AnnotationError(error, truth, type);
if (!tempErrors.contains(err))
tempErrors.add(err);
}
}
}
if (tempErrors.size() > 0) {
AnnotationError err = tempErrors.get(0);
int begin = err.getError().getAnnotation().getBegin();
int end = err.getError().getAnnotation().getEnd();
distance = Math.abs(begin - a.getBegin()) + Math.abs(end - a.getEnd());
for (int j = 1; j < tempErrors.size(); j++) {
int begin2 = tempErrors.get(j).getError().getAnnotation().getBegin();
int end2 = tempErrors.get(j).getError().getAnnotation().getEnd();
if (Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd()) < distance) {
distance = Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd());
err = tempErrors.get(j);
}
}
result.add(err);
matches.add((TrabalAnnotation) err.getTruth().getAnnotation());
matches.add((TrabalAnnotation) err.getError().getAnnotation());
}
}
// creating shifting + correction errors
iterator = gold.iterator();
docIterator = docs.iterator();
while (iterator.hasNext()) {
a = iterator.next();
docIterator = docs.iterator();
tempErrors = new ArrayList<AnnotationError>();
while (!matches.contains(a) && docIterator.hasNext()) {
b = docIterator.next();
if (!matches.contains(b)) {
if (b.getEnd() >= a.getBegin() && b.getBegin() <= a.getEnd()) {
TextRulerTarget target = new TextRulerTarget(b.getType().getName(), this);
TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
AnnotationErrorType type;
if ((b.getBegin() == a.getBegin() && b.getEnd() < a.getEnd())
|| (b.getBegin() > a.getBegin() && b.getEnd() == a.getEnd())) {
type = AnnotationErrorType.EXPANSION;
} else if (b.getBegin() > a.getBegin() || b.getEnd() > a.getEnd()) {
type = AnnotationErrorType.SHIFTING_LEFT;
} else {
type = AnnotationErrorType.SHIFTING_RIGHT;
}
AnnotationError err = new AnnotationError(error, truth, type);
if (!tempErrors.contains(err))
tempErrors.add(err);
}
}
}
if (tempErrors.size() > 0) {
AnnotationError err = tempErrors.get(0);
int begin = err.getError().getAnnotation().getBegin();
int end = err.getError().getAnnotation().getEnd();
distance = Math.abs(begin - a.getBegin()) + Math.abs(end - a.getEnd());
for (int j = 1; j < tempErrors.size(); j++) {
int begin2 = tempErrors.get(j).getError().getAnnotation().getBegin();
int end2 = tempErrors.get(j).getError().getAnnotation().getEnd();
if (Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd()) < distance) {
distance = Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd());
err = tempErrors.get(j);
}
}
result.add(err);
matches.add((TrabalAnnotation) err.getTruth().getAnnotation());
matches.add((TrabalAnnotation) err.getError().getAnnotation());
}
}
// create deleting type errors
docIterator = docs.iterator();
while (docIterator.hasNext()) {
b = docIterator.next();
if (!matches.contains(b)) {
TextRulerTarget target = new TextRulerTarget(b.getType().getName(), this);
TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
result.add(new AnnotationError(error, null, AnnotationErrorType.DELETION));
matches.add(b);
}
}
// create annotation type errors
iterator = gold.iterator();
while (iterator.hasNext()) {
a = iterator.next();
if (!matches.contains(a)) {
TextRulerTarget target = new TextRulerTarget(a.getType().getName(), this);
TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
result.add(new AnnotationError(null, truth, AnnotationErrorType.ANNOTATION));
matches.add(a);
}
}
}
return result;
}