in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java [103:241]
protected abstract AnalysisEngineDescription getAnnotatorDescription(File directory)
throws ResourceInitializationException;
protected abstract Collection<? extends Annotation> getGoldAnnotations(JCas jCas, Segment segment);
protected abstract Collection<? extends Annotation> getSystemAnnotations(JCas jCas, Segment segment);
@Override
protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
throws Exception {
AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
if(this.i2b2Output != null){
aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(WriteI2B2XML.class, WriteI2B2XML.PARAM_OUTPUT_DIR, this.i2b2Output), "TimexView", CAS.NAME_DEFAULT_SOFA);
}
AnnotationStatistics<String> stats = new AnnotationStatistics<>();
Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
new Function<Annotation, List<Integer>>() {
@Override
public List<Integer> apply(Annotation annotation) {
return Arrays.asList(annotation.getBegin(), annotation.getEnd());
}
});
for (Iterator<JCas> casIter = new JCasIterator(collectionReader, aggregateBuilder.createAggregate()); casIter.hasNext();) {
JCas jCas = casIter.next();
JCas goldView = jCas.getView(GOLD_VIEW_NAME);
JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
this.logger.fine("Errors in : " + ViewUriUtil.getURI(jCas).toString());
for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) {
Collection<? extends Annotation> goldAnnotations = this.getGoldAnnotations(goldView, segment);
Collection<? extends Annotation> systemAnnotations = this.getSystemAnnotations(systemView, segment);
stats.add(goldAnnotations, systemAnnotations);
Set<Annotation> goldSet = new TreeSet<>(bySpans);
for (Annotation goldAnnotation : goldAnnotations) {
// TODO: fix data so that this is not necessary
if (goldAnnotation.getBegin() == Integer.MAX_VALUE || goldAnnotation.getEnd() == Integer.MIN_VALUE) {
this.logger.warning("Invalid annotation");
continue;
}
goldSet.add(goldAnnotation);
}
//goldSet.addAll(goldAnnotations);
Set<Annotation> systemSet = new TreeSet<>(bySpans);
systemSet.addAll(systemAnnotations);
Set<Annotation> goldOnly = new TreeSet<>(bySpans);
goldOnly.addAll(goldSet);
goldOnly.removeAll(systemSet);
Set<Annotation> systemOnly = new TreeSet<>(bySpans);
systemOnly.addAll(systemSet);
systemOnly.removeAll(goldSet);
String text = jCas.getDocumentText().replaceAll("[\r\n]", " ");
if (!goldOnly.isEmpty() || !systemOnly.isEmpty()) {
this.logger.fine("Errors in : " + ViewUriUtil.getURI(jCas).toString());
Set<Annotation> errors = new TreeSet<>(bySpans);
errors.addAll(goldOnly);
errors.addAll(systemOnly);
for (Annotation annotation : errors) {
int begin = annotation.getBegin();
int end = annotation.getEnd();
int windowBegin = Math.max(0, begin - 50);
int windowEnd = Math.min(text.length(), end + 50);
String label = goldOnly.contains(annotation) ? "DROPPED:" : "ADDED: ";
this.logger.fine(String.format(
"%s ...%s[!%s!:%d-%d]%s...",
label,
text.substring(windowBegin, begin),
text.substring(begin, end),
begin,
end,
text.substring(end, windowEnd)));
}
//add correct predictions:
for (Annotation annotation: goldSet){
if (!errors.contains(annotation)){
int begin = annotation.getBegin();
int end = annotation.getEnd();
int windowBegin = Math.max(0, begin - 50);
int windowEnd = Math.min(text.length(), end + 50);
String label = "CORRECT:";
this.logger.fine(String.format(
"%s ...%s[!%s!:%d-%d]%s...",
label,
text.substring(windowBegin, begin),
text.substring(begin, end),
begin,
end,
text.substring(end, windowEnd)));
}
}
}
Set<Annotation> partialGold = new HashSet<>();
Set<Annotation> partialSystem = new HashSet<>();
// get overlapping spans
if(this.printOverlapping){
// iterate over all remaining gold annotations
for(Annotation gold : goldOnly){
Annotation bestSystem = null;
int bestOverlap = 0;
for(Annotation system : systemOnly){
if(system.getBegin() >= gold.getBegin() && system.getEnd() <= gold.getEnd()){
// system completely contained by gold
int overlap = system.getEnd() - system.getBegin();
if(overlap > bestOverlap){
bestOverlap = overlap;
bestSystem = system;
}
}else if(gold.getBegin() >= system.getBegin() && gold.getEnd() <= system.getEnd()){
// gold completely contained by gold
int overlap = gold.getEnd() - gold.getBegin();
if(overlap > bestOverlap){
bestOverlap = overlap;
bestSystem = system;
}
}
}
if(bestSystem != null){
this.LOGGER.info(String.format("Allowed overlapping annotation: Gold(%s) => System(%s)\n", gold.getCoveredText(), bestSystem.getCoveredText()));
partialGold.add(gold);
partialSystem.add(bestSystem);
}
}
if(partialGold.size() > 0){
goldOnly.removeAll(partialGold);
systemOnly.removeAll(partialSystem);
assert partialGold.size() == partialSystem.size();
this.LOGGER.info(String.format("Found %d overlapping spans and removed from gold/system errors\n", partialGold.size()));
}
}
}
}
}
return stats;
}