protected AnnotationStatistics test()

in ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java [550:764]


	protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
			throws Exception {
		AggregateBuilder builder = new AggregateBuilder();
		if (this.testOnCTakes) {
			// add the modifier extractor
			File file = new File("desc/analysis_engine/ModifierExtractorAnnotator.xml");
			XMLInputSource source = new XMLInputSource(file);
			builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
			// remove extraneous entity mentions
			builder.add(AnalysisEngineFactory.createEngineDescription(RemoveSmallerEventMentions.class));
		} else {
			// replace cTAKES entity mentions and modifiers in the system view with
			// the gold annotations
			builder.add(AnalysisEngineFactory.createEngineDescription(ReplaceCTakesMentionsWithGoldMentions.class));
		}
		// add the relation extractor, configured for classification mode
		AnalysisEngineDescription classifierAnnotator =
				AnalysisEngineFactory.createEngineDescription(
						this.classifierAnnotatorClass,
						this.parameterSettings.configurationParameters);
		ConfigurationParameterFactory.addConfigurationParameters(
				classifierAnnotator,
				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
				JarClassifierBuilder.getModelJarFile(directory));
		builder.add(classifierAnnotator);

		// statistics will be based on the "category" feature of the
		// BinaryTextRelations
		AnnotationStatistics<String> stats = new AnnotationStatistics<>();
		Function<BinaryTextRelation, HashableArguments> getSpan =
				new Function<BinaryTextRelation, HashableArguments>() {
			@Override
			public HashableArguments apply(BinaryTextRelation relation) {
				return new HashableArguments(relation);
			}
		};
		Function<BinaryTextRelation, String> getOutcome =
				AnnotationStatistics.annotationToFeatureValue("category");

		// calculate statistics, iterating over the results of the classifier
		AnalysisEngine engine = builder.createAggregate();
		for (Iterator<JCas> casIter = new JCasIterator(collectionReader, engine); casIter.hasNext();) {
			JCas jCas = casIter.next();
			// get the gold view
			JCas goldView;
			try {
				goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
			} catch (CASException e) {
				throw new AnalysisEngineProcessException(e);
			}

			// get the gold and system annotations
			Collection<? extends BinaryTextRelation> goldBinaryTextRelations =
					JCasUtil.select(goldView, this.relationClass);
			Collection<? extends BinaryTextRelation> systemBinaryTextRelations =
					JCasUtil.select(jCas, this.relationClass);

			if (this.ignoreImpossibleGoldRelations) {
				// collect only relations where both arguments have some possible system
				// arguments
				List<BinaryTextRelation> relations = Lists.newArrayList();
				for (BinaryTextRelation relation : goldBinaryTextRelations) {
					boolean hasSystemArgs = true;
					for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
						IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
						Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
						boolean noSystemArg = JCasUtil.selectCovered(jCas, goldClass, goldArg).isEmpty();
						hasSystemArgs = hasSystemArgs && !noSystemArg;
					}
					if (hasSystemArgs) {
						relations.add(relation);
					} else {
						IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
						IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
						String messageFormat =
								"removing relation between %s and %s which is impossible to "
										+ "find with system mentions";
						String message = String.format(messageFormat, format(arg1), format(arg2));
						UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
					}
				}
				goldBinaryTextRelations = relations;
			}

			if (this.allowSmallerSystemArguments) {

				// collect all the arguments of the manually annotated relations
				Set<IdentifiedAnnotation> goldArgs = Sets.newHashSet();
				for (BinaryTextRelation relation : goldBinaryTextRelations) {
					for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
						goldArgs.add((IdentifiedAnnotation) relArg.getArgument());
					}
				}

				// collect all the arguments of system-predicted relations that don't
				// match some gold argument
				Set<IdentifiedAnnotation> unmatchedSystemArgs = Sets.newHashSet();
				for (BinaryTextRelation relation : systemBinaryTextRelations) {
					for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
						IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
						Class<? extends IdentifiedAnnotation> systemClass = systemArg.getClass();
						boolean matchesSomeGold = false;
						for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(
								goldView,
								systemClass,
								systemArg)) {
							if (goldArg.getBegin() == systemArg.getBegin()
									&& goldArg.getEnd() == systemArg.getEnd()) {
								matchesSomeGold = true;
								break;
							}
						}
						if (!matchesSomeGold) {
							unmatchedSystemArgs.add(systemArg);
						}
					}
				}

				// map each unmatched system argument to the gold argument that encloses
				// it
				Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
				for (IdentifiedAnnotation goldArg : goldArgs) {
					Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
					for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
						if (unmatchedSystemArgs.contains(systemArg)) {

							// if there's no mapping yet for this system arg, map it to the
							// enclosing gold arg
							IdentifiedAnnotation oldGoldArg = systemToGold.get(systemArg);
							if (oldGoldArg == null) {
								systemToGold.put(systemArg, goldArg);
							}

							// if there's already a mapping for this system arg, only re-map
							// it to match the type
							else {
								IdentifiedAnnotation current, other;
								if (systemArg.getTypeID() == goldArg.getTypeID()) {
									systemToGold.put(systemArg, goldArg);
									current = goldArg;
									other = oldGoldArg;
								} else {
									current = oldGoldArg;
									other = goldArg;
								}

								// issue a warning since this re-mapping procedure is imperfect
								String message =
										"system argument %s mapped to gold argument %s, but could also be mapped to %s";
								message = String.format(message, format(systemArg), format(current), format(other));
								UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
							}
						}
					}
				}

				// replace system arguments with gold arguments where necessary/possible
				for (BinaryTextRelation relation : systemBinaryTextRelations) {
					for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
						IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
						IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
						if (matchingGoldArg != null) {
							String messageFormat = "replacing system argument %s with gold argument %s";
							String message =
									String.format(messageFormat, format(systemArg), format(matchingGoldArg));
							UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
							relArg.setArgument(matchingGoldArg);
						}
					}
				}
			}

			// update the statistics based on the argument spans of the relation
			stats.add(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);

			// print errors if requested
			if (this.printErrors) {
				printInstanceOutput(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);

				Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
				for (BinaryTextRelation relation : goldBinaryTextRelations) {
					goldMap.put(new HashableArguments(relation), relation);
				}
				Map<HashableArguments, BinaryTextRelation> systemMap = Maps.newHashMap();
				for (BinaryTextRelation relation : systemBinaryTextRelations) {
					systemMap.put(new HashableArguments(relation), relation);
				}
				Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
				List<HashableArguments> sorted = Lists.newArrayList(all);
				Collections.sort(sorted);

				File noteFile = new File(ViewUriUtil.getURI(jCas).toString());
				String fileName = noteFile.getName();

				for (HashableArguments key : sorted) {
					BinaryTextRelation goldRelation = goldMap.get(key);
					BinaryTextRelation systemRelation = systemMap.get(key);
					if (goldRelation == null) {
						System.out.printf("[%s] System added: %s\n", fileName, formatRelation(systemRelation));
					} else if (systemRelation == null) {
						System.out.printf("[%s] System dropped: %s\n", fileName, formatRelation(goldRelation));
					} else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
						String label = systemRelation.getCategory();
						System.out.printf("[%s] System labeled %s for %s\n", fileName, label, formatRelation(systemRelation));
					} else if (systemRelation.getCategory().equals(goldRelation.getCategory())) {
						System.out.printf("[%s] System nailed it: %s\n", fileName, formatRelation(systemRelation));
					} 
				}
			}
		}

		System.err.print(stats);
		System.err.println();
		return stats;
	}