protected abstract AnalysisEngineDescription getAnnotatorDescription()

in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java [103:241]


	protected abstract AnalysisEngineDescription getAnnotatorDescription(File directory)
			throws ResourceInitializationException;

	protected abstract Collection<? extends Annotation> getGoldAnnotations(JCas jCas, Segment segment);

  protected abstract Collection<? extends Annotation> getSystemAnnotations(JCas jCas, Segment segment);

	@Override
	protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
			throws Exception {
		AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
		aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
		if(this.i2b2Output != null){
			aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(WriteI2B2XML.class, WriteI2B2XML.PARAM_OUTPUT_DIR, this.i2b2Output), "TimexView", CAS.NAME_DEFAULT_SOFA);
		}
		AnnotationStatistics<String> stats = new AnnotationStatistics<>();
		Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
				new Function<Annotation, List<Integer>>() {
					@Override
					public List<Integer> apply(Annotation annotation) {
						return Arrays.asList(annotation.getBegin(), annotation.getEnd());
					}
				});
		for (Iterator<JCas> casIter = new JCasIterator(collectionReader, aggregateBuilder.createAggregate()); casIter.hasNext();) {
			JCas jCas = casIter.next();
			JCas goldView = jCas.getView(GOLD_VIEW_NAME);
			JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
      this.logger.fine("Errors in : " + ViewUriUtil.getURI(jCas).toString());
			for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
				if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) {
					Collection<? extends Annotation> goldAnnotations = this.getGoldAnnotations(goldView, segment);
					Collection<? extends Annotation> systemAnnotations = this.getSystemAnnotations(systemView, segment);
					stats.add(goldAnnotations, systemAnnotations);

					Set<Annotation> goldSet = new TreeSet<>(bySpans);
					for (Annotation goldAnnotation : goldAnnotations) {
						// TODO: fix data so that this is not necessary
						if (goldAnnotation.getBegin() == Integer.MAX_VALUE || goldAnnotation.getEnd() == Integer.MIN_VALUE) {
							this.logger.warning("Invalid annotation");
							continue;
						}
						goldSet.add(goldAnnotation);
					}
					//goldSet.addAll(goldAnnotations);
					Set<Annotation> systemSet = new TreeSet<>(bySpans);
					systemSet.addAll(systemAnnotations);

					Set<Annotation> goldOnly = new TreeSet<>(bySpans);
					goldOnly.addAll(goldSet);
					goldOnly.removeAll(systemSet);

					Set<Annotation> systemOnly = new TreeSet<>(bySpans);
					systemOnly.addAll(systemSet);
					systemOnly.removeAll(goldSet);

					String text = jCas.getDocumentText().replaceAll("[\r\n]", " ");
					if (!goldOnly.isEmpty() || !systemOnly.isEmpty()) {
						this.logger.fine("Errors in : " + ViewUriUtil.getURI(jCas).toString());
						Set<Annotation> errors = new TreeSet<>(bySpans);
						errors.addAll(goldOnly);
						errors.addAll(systemOnly);
						for (Annotation annotation : errors) {
							int begin = annotation.getBegin();
							int end = annotation.getEnd();
							int windowBegin = Math.max(0, begin - 50);
							int windowEnd = Math.min(text.length(), end + 50);
							String label = goldOnly.contains(annotation) ? "DROPPED:" : "ADDED:  ";
							this.logger.fine(String.format(
									"%s  ...%s[!%s!:%d-%d]%s...",
									label,
									text.substring(windowBegin, begin),
									text.substring(begin, end),
									begin,
									end,
									text.substring(end, windowEnd)));
						}
						//add correct predictions:
						for (Annotation annotation: goldSet){
							if (!errors.contains(annotation)){
								int begin = annotation.getBegin();
								int end = annotation.getEnd();
								int windowBegin = Math.max(0, begin - 50);
								int windowEnd = Math.min(text.length(), end + 50);
								String label = "CORRECT:";
								this.logger.fine(String.format(
										"%s  ...%s[!%s!:%d-%d]%s...",
										label,
										text.substring(windowBegin, begin),
										text.substring(begin, end),
										begin,
										end,
										text.substring(end, windowEnd)));
							}
						}
					}
					Set<Annotation> partialGold = new HashSet<>();
					Set<Annotation> partialSystem = new HashSet<>();

					// get overlapping spans
					if(this.printOverlapping){
						// iterate over all remaining gold annotations
						for(Annotation gold : goldOnly){
							Annotation bestSystem = null;
							int bestOverlap = 0;
							for(Annotation system : systemOnly){
								if(system.getBegin() >= gold.getBegin() && system.getEnd() <= gold.getEnd()){
									// system completely contained by gold
									int overlap = system.getEnd() - system.getBegin();
									if(overlap > bestOverlap){
										bestOverlap = overlap;
										bestSystem = system;
									}
								}else if(gold.getBegin() >= system.getBegin() && gold.getEnd() <= system.getEnd()){
									// gold completely contained by gold
									int overlap = gold.getEnd() - gold.getBegin();
									if(overlap > bestOverlap){
										bestOverlap = overlap;
										bestSystem = system;
									}
								}
							}
							if(bestSystem != null){
								this.LOGGER.info(String.format("Allowed overlapping annotation: Gold(%s) => System(%s)\n", gold.getCoveredText(), bestSystem.getCoveredText()));
								partialGold.add(gold);
								partialSystem.add(bestSystem);
							}
						}
						if(partialGold.size() > 0){
							goldOnly.removeAll(partialGold);
							systemOnly.removeAll(partialSystem);
							assert partialGold.size() == partialSystem.size();
							this.LOGGER.info(String.format("Found %d overlapping spans and removed from gold/system errors\n", partialGold.size()));
						}
					}
				}
			}
		}
		return stats;
	}