in ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java [550:764]
protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
throws Exception {
AggregateBuilder builder = new AggregateBuilder();
if (this.testOnCTakes) {
// add the modifier extractor
File file = new File("desc/analysis_engine/ModifierExtractorAnnotator.xml");
XMLInputSource source = new XMLInputSource(file);
builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
// remove extraneous entity mentions
builder.add(AnalysisEngineFactory.createEngineDescription(RemoveSmallerEventMentions.class));
} else {
// replace cTAKES entity mentions and modifiers in the system view with
// the gold annotations
builder.add(AnalysisEngineFactory.createEngineDescription(ReplaceCTakesMentionsWithGoldMentions.class));
}
// add the relation extractor, configured for classification mode
AnalysisEngineDescription classifierAnnotator =
AnalysisEngineFactory.createEngineDescription(
this.classifierAnnotatorClass,
this.parameterSettings.configurationParameters);
ConfigurationParameterFactory.addConfigurationParameters(
classifierAnnotator,
GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
JarClassifierBuilder.getModelJarFile(directory));
builder.add(classifierAnnotator);
// statistics will be based on the "category" feature of the
// BinaryTextRelations
AnnotationStatistics<String> stats = new AnnotationStatistics<>();
Function<BinaryTextRelation, HashableArguments> getSpan =
new Function<BinaryTextRelation, HashableArguments>() {
@Override
public HashableArguments apply(BinaryTextRelation relation) {
return new HashableArguments(relation);
}
};
Function<BinaryTextRelation, String> getOutcome =
AnnotationStatistics.annotationToFeatureValue("category");
// calculate statistics, iterating over the results of the classifier
AnalysisEngine engine = builder.createAggregate();
for (Iterator<JCas> casIter = new JCasIterator(collectionReader, engine); casIter.hasNext();) {
JCas jCas = casIter.next();
// get the gold view
JCas goldView;
try {
goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
}
// get the gold and system annotations
Collection<? extends BinaryTextRelation> goldBinaryTextRelations =
JCasUtil.select(goldView, this.relationClass);
Collection<? extends BinaryTextRelation> systemBinaryTextRelations =
JCasUtil.select(jCas, this.relationClass);
if (this.ignoreImpossibleGoldRelations) {
// collect only relations where both arguments have some possible system
// arguments
List<BinaryTextRelation> relations = Lists.newArrayList();
for (BinaryTextRelation relation : goldBinaryTextRelations) {
boolean hasSystemArgs = true;
for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
boolean noSystemArg = JCasUtil.selectCovered(jCas, goldClass, goldArg).isEmpty();
hasSystemArgs = hasSystemArgs && !noSystemArg;
}
if (hasSystemArgs) {
relations.add(relation);
} else {
IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
String messageFormat =
"removing relation between %s and %s which is impossible to "
+ "find with system mentions";
String message = String.format(messageFormat, format(arg1), format(arg2));
UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
}
}
goldBinaryTextRelations = relations;
}
if (this.allowSmallerSystemArguments) {
// collect all the arguments of the manually annotated relations
Set<IdentifiedAnnotation> goldArgs = Sets.newHashSet();
for (BinaryTextRelation relation : goldBinaryTextRelations) {
for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
goldArgs.add((IdentifiedAnnotation) relArg.getArgument());
}
}
// collect all the arguments of system-predicted relations that don't
// match some gold argument
Set<IdentifiedAnnotation> unmatchedSystemArgs = Sets.newHashSet();
for (BinaryTextRelation relation : systemBinaryTextRelations) {
for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
Class<? extends IdentifiedAnnotation> systemClass = systemArg.getClass();
boolean matchesSomeGold = false;
for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(
goldView,
systemClass,
systemArg)) {
if (goldArg.getBegin() == systemArg.getBegin()
&& goldArg.getEnd() == systemArg.getEnd()) {
matchesSomeGold = true;
break;
}
}
if (!matchesSomeGold) {
unmatchedSystemArgs.add(systemArg);
}
}
}
// map each unmatched system argument to the gold argument that encloses
// it
Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
for (IdentifiedAnnotation goldArg : goldArgs) {
Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
if (unmatchedSystemArgs.contains(systemArg)) {
// if there's no mapping yet for this system arg, map it to the
// enclosing gold arg
IdentifiedAnnotation oldGoldArg = systemToGold.get(systemArg);
if (oldGoldArg == null) {
systemToGold.put(systemArg, goldArg);
}
// if there's already a mapping for this system arg, only re-map
// it to match the type
else {
IdentifiedAnnotation current, other;
if (systemArg.getTypeID() == goldArg.getTypeID()) {
systemToGold.put(systemArg, goldArg);
current = goldArg;
other = oldGoldArg;
} else {
current = oldGoldArg;
other = goldArg;
}
// issue a warning since this re-mapping procedure is imperfect
String message =
"system argument %s mapped to gold argument %s, but could also be mapped to %s";
message = String.format(message, format(systemArg), format(current), format(other));
UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
}
}
}
}
// replace system arguments with gold arguments where necessary/possible
for (BinaryTextRelation relation : systemBinaryTextRelations) {
for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
if (matchingGoldArg != null) {
String messageFormat = "replacing system argument %s with gold argument %s";
String message =
String.format(messageFormat, format(systemArg), format(matchingGoldArg));
UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
relArg.setArgument(matchingGoldArg);
}
}
}
}
// update the statistics based on the argument spans of the relation
stats.add(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);
// print errors if requested
if (this.printErrors) {
printInstanceOutput(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);
Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
for (BinaryTextRelation relation : goldBinaryTextRelations) {
goldMap.put(new HashableArguments(relation), relation);
}
Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
for (BinaryTextRelation relation : systemBinaryTextRelations) {
systemMap.put(new HashableArguments(relation), relation);
}
Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
List<HashableArguments> sorted = Lists.newArrayList(all);
Collections.sort(sorted);
File noteFile = new File(ViewUriUtil.getURI(jCas).toString());
String fileName = noteFile.getName();
for (HashableArguments key : sorted) {
BinaryTextRelation goldRelation = goldMap.get(key);
BinaryTextRelation systemRelation = systemMap.get(key);
if (goldRelation == null) {
System.out.printf("[%s] System added: %s\n", fileName, formatRelation(systemRelation));
} else if (systemRelation == null) {
System.out.printf("[%s] System dropped: %s\n", fileName, formatRelation(goldRelation));
} else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
String label = systemRelation.getCategory();
System.out.printf("[%s] System labeled %s for %s\n", fileName, label, formatRelation(systemRelation));
} else if (systemRelation.getCategory().equals(goldRelation.getCategory())) {
System.out.printf("[%s] System nailed it: %s\n", fileName, formatRelation(systemRelation));
}
}
}
}
System.err.print(stats);
System.err.println();
return stats;
}