private List createErrorList()

in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java [294:498]


  private List<AnnotationError> createErrorList() {
    positiveExamples = new HashMap<String, RankedList>();
    List<AnnotationError> result = new ArrayList<AnnotationError>();
    List<TrabalAnnotation> matches;
    Iterator<TrabalAnnotation> iterator;
    Iterator<TrabalAnnotation> docIterator;
    List<TextRulerExampleDocument> documents;
    List<TextRulerExampleDocument> goldStandard;
    documents = additionalDocuments.getDocuments();
    goldStandard = exampleDocuments.getDocuments();
    for (int i = 0; i < goldStandard.size(); i++) {
      if (shouldAbort())
        break;
      matches = new ArrayList<TrabalAnnotation>();
      AnnotationIndex<AnnotationFS> index = goldStandard.get(i).getCAS().getAnnotationIndex();
      List<TrabalAnnotation> gold = new ArrayList<TrabalAnnotation>();
      List<TrabalAnnotation> docs = new ArrayList<TrabalAnnotation>();
      for (AnnotationFS a : index) {
        if (isSlotType(a.getType())) {
          gold.add(new TrabalAnnotation(a, goldStandard.get(i), enableFeatures));
        }
      }
      AnnotationIndex<AnnotationFS> docIndex = documents.get(i).getCAS().getAnnotationIndex();
      for (AnnotationFS b : docIndex) {
        if (isSlotType(b.getType())) {
          docs.add(new TrabalAnnotation(b, documents.get(i), enableFeatures));
        }
      }
      iterator = gold.iterator();
      docIterator = docs.iterator();
      TrabalAnnotation a;
      TrabalAnnotation b;
      // find correct annotated elements
      int exampleIndex = 0;
      while (iterator.hasNext()) {
        if (shouldAbort())
          break;
        exampleIndex++;
        sendStatusUpdateToDelegate(
                "Comparing documents " + (i + 1) + " of " + goldStandard.size() + ": example "
                        + exampleIndex + " of " + gold.size(),
                TextRulerLearnerState.ML_RUNNING, false);
        a = iterator.next();
        docIterator = docs.iterator();
        while (docIterator.hasNext()) {
          b = docIterator.next();
          if (b.equals(a)) {
            matches.add(a);
            matches.add(b);
            if (positiveExamples.containsKey(a.getType().getShortName())) {
              RankedList list = positiveExamples.get(a.getType().getShortName());
              list.addAll(createConditions(a));
              positiveExamples.put(a.getType().getShortName(), list);
            } else {
              RankedList list = new RankedList(idf);
              list.addAll(createConditions(a));
              positiveExamples.put(a.getType().getShortName(), list);
            }
            break;
          }
        }
      }
      // create correction type errors
      iterator = gold.iterator();
      docIterator = docs.iterator();
      while (iterator.hasNext()) {
        a = iterator.next();
        docIterator = docs.iterator();
        while (!matches.contains(a) && docIterator.hasNext()) {
          b = docIterator.next();
          if (!matches.contains(b)) {
            if (b.getBegin() == a.getBegin() && b.getEnd() == a.getEnd()) {
              TextRulerTarget target = new TextRulerTarget(b.getType().getName(), this);
              TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
              TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
              result.add(new AnnotationError(error, truth, AnnotationErrorType.CORRECTION));
              matches.add(a);
              matches.add(b);
              break;
            }
          }
        }
      }
      // create shifting type errors
      iterator = gold.iterator();
      docIterator = docs.iterator();
      List<AnnotationError> tempErrors;
      int distance;
      while (iterator.hasNext()) {
        a = iterator.next();
        docIterator = docs.iterator();
        tempErrors = new ArrayList<AnnotationError>();
        while (!matches.contains(a) && docIterator.hasNext()) {
          b = docIterator.next();
          if (!matches.contains(b)) {
            if (b.getType().getShortName().equals(a.getType().getShortName())
                    && b.getEnd() >= a.getBegin() && b.getBegin() <= a.getEnd()) {
              TextRulerTarget target = new TextRulerTarget(a.getType().getName(), this);
              TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
              TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
              AnnotationErrorType type;
              if ((b.getBegin() == a.getBegin() && b.getEnd() < a.getEnd())
                      || (b.getBegin() > a.getBegin() && b.getEnd() == a.getEnd())) {
                type = AnnotationErrorType.EXPANSION;
              } else if (b.getBegin() > a.getBegin() || b.getEnd() > a.getEnd()) {
                type = AnnotationErrorType.SHIFTING_LEFT;
              } else {
                type = AnnotationErrorType.SHIFTING_RIGHT;
              }
              AnnotationError err = new AnnotationError(error, truth, type);
              if (!tempErrors.contains(err))
                tempErrors.add(err);
            }
          }
        }
        if (tempErrors.size() > 0) {
          AnnotationError err = tempErrors.get(0);
          int begin = err.getError().getAnnotation().getBegin();
          int end = err.getError().getAnnotation().getEnd();
          distance = Math.abs(begin - a.getBegin()) + Math.abs(end - a.getEnd());
          for (int j = 1; j < tempErrors.size(); j++) {
            int begin2 = tempErrors.get(j).getError().getAnnotation().getBegin();
            int end2 = tempErrors.get(j).getError().getAnnotation().getEnd();
            if (Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd()) < distance) {
              distance = Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd());
              err = tempErrors.get(j);
            }
          }
          result.add(err);
          matches.add((TrabalAnnotation) err.getTruth().getAnnotation());
          matches.add((TrabalAnnotation) err.getError().getAnnotation());
        }
      }
      // creating shifting + correction errors
      iterator = gold.iterator();
      docIterator = docs.iterator();
      while (iterator.hasNext()) {
        a = iterator.next();
        docIterator = docs.iterator();
        tempErrors = new ArrayList<AnnotationError>();
        while (!matches.contains(a) && docIterator.hasNext()) {
          b = docIterator.next();
          if (!matches.contains(b)) {
            if (b.getEnd() >= a.getBegin() && b.getBegin() <= a.getEnd()) {
              TextRulerTarget target = new TextRulerTarget(b.getType().getName(), this);
              TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
              TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
              AnnotationErrorType type;
              if ((b.getBegin() == a.getBegin() && b.getEnd() < a.getEnd())
                      || (b.getBegin() > a.getBegin() && b.getEnd() == a.getEnd())) {
                type = AnnotationErrorType.EXPANSION;
              } else if (b.getBegin() > a.getBegin() || b.getEnd() > a.getEnd()) {
                type = AnnotationErrorType.SHIFTING_LEFT;
              } else {
                type = AnnotationErrorType.SHIFTING_RIGHT;
              }
              AnnotationError err = new AnnotationError(error, truth, type);
              if (!tempErrors.contains(err))
                tempErrors.add(err);
            }
          }
        }
        if (tempErrors.size() > 0) {
          AnnotationError err = tempErrors.get(0);
          int begin = err.getError().getAnnotation().getBegin();
          int end = err.getError().getAnnotation().getEnd();
          distance = Math.abs(begin - a.getBegin()) + Math.abs(end - a.getEnd());
          for (int j = 1; j < tempErrors.size(); j++) {
            int begin2 = tempErrors.get(j).getError().getAnnotation().getBegin();
            int end2 = tempErrors.get(j).getError().getAnnotation().getEnd();
            if (Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd()) < distance) {
              distance = Math.abs(begin2 - a.getBegin()) + Math.abs(end2 - a.getEnd());
              err = tempErrors.get(j);
            }
          }
          result.add(err);
          matches.add((TrabalAnnotation) err.getTruth().getAnnotation());
          matches.add((TrabalAnnotation) err.getError().getAnnotation());
        }
      }
      // create deleting type errors
      docIterator = docs.iterator();
      while (docIterator.hasNext()) {
        b = docIterator.next();
        if (!matches.contains(b)) {
          TextRulerTarget target = new TextRulerTarget(b.getType().getName(), this);
          TextRulerExample error = new TextRulerExample(b.getDocument(), b, false, target);
          result.add(new AnnotationError(error, null, AnnotationErrorType.DELETION));
          matches.add(b);
        }
      }
      // create annotation type errors
      iterator = gold.iterator();
      while (iterator.hasNext()) {
        a = iterator.next();
        if (!matches.contains(a)) {
          TextRulerTarget target = new TextRulerTarget(a.getType().getName(), this);
          TextRulerExample truth = new TextRulerExample(a.getDocument(), a, true, target);
          result.add(new AnnotationError(null, truth, AnnotationErrorType.ANNOTATION));
          matches.add(a);
        }
      }
    }
    return result;
  }