public List createSlotInstancesForCAS()

in ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerExampleDocument.java [94:238]


  public List<TextRulerExample> createSlotInstancesForCAS(CAS aCas, TextRulerTarget target,
          boolean createFromRawTypeName) {
    List<TextRulerExample> result = new ArrayList<TextRulerExample>();

    if (target.isMultiSlot()) {
      TypeSystem ts = aCas.getTypeSystem();
      int currentSlotIndex = 0;
      TextRulerAnnotation[] currentAnnotations = new TextRulerAnnotation[target.slotNames.length];
      List<Type> slotTypes = new ArrayList<Type>();
      for (String s : target.slotNames)
        slotTypes.add(ts.getType(s));

      for (FSIterator<AnnotationFS> it = aCas.getAnnotationIndex().iterator(true); it.isValid(); it
              .moveToNext()) {
        AnnotationFS fs = (AnnotationFS) it.get();
        Type theType = fs.getType();
        if (slotTypes.contains(theType)) {
          int idx = slotTypes.indexOf(theType);
          if (idx < currentSlotIndex) // the previous example was not
          // complete, so we have to write
          // it down:
          {
            result.add(new TextRulerExample(this, currentAnnotations, true, target));
            currentAnnotations = new TextRulerAnnotation[target.slotNames.length];
          }
          currentAnnotations[idx] = new TextRulerAnnotation(fs, this);
          if (idx >= target.slotNames.length - 1) {
            result.add(new TextRulerExample(this, currentAnnotations, true, target));
            currentAnnotations = new TextRulerAnnotation[target.slotNames.length];
            currentSlotIndex = 0;
          } else
            currentSlotIndex = idx + 1;
        }
      }
      if (currentSlotIndex > 0) {
        result.add(new TextRulerExample(this, currentAnnotations, true, target));
      }

    } else if (target.isLeftCorrection() || target.isRightCorrection()) {
      // TODO
      TextRulerBasicLearner learner = target.getLearner();
      Set<String> filterSet = learner.getFilterSet();
      CAS testCAS = learner.getTestCAS();
      TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
      resetAndFillTestCAS(testCAS, target);
      CAS docCAS = getCAS();
      TypeSystem ts = docCAS.getTypeSystem();
      Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
      AnalysisEngine analysisEngine = learner.getAnalysisEngine();
      try {
        analysisEngine.process(testCAS);
      } catch (AnalysisEngineProcessException e) {
        // TODO add log here
      }
      TextRulerTarget newTarget = new TextRulerTarget(target.slotNames, target.getLearner());
      if (target.isLeftCorrection()) {
        newTarget.type = TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY;
      } else {
        newTarget.type = TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY;
      }
      createExamplesForTarget(newTarget);
      learner.compareOriginalDocumentWithTestCAS(this, testCAS, newTarget, c, true);
      List<TextRulerExample> correctTags = getPositiveExamples();
      List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
              c.getCoveredNegativeExamples());
      for (TextRulerExample wrongTag : wrongTags) {
        // test, if there's a corresponding positive example
        // somewhere around (within maxDistance)
        List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
                .getAnnotation().getBegin(), target.getMaxShiftDistance(), TextRulerToolkit
                .getFilterSetWithSlotNames(target.slotNames, filterSet), tokensRootType);
        List<AnnotationFS> right = TextRulerToolkit.getAnnotationsAfterPosition(docCAS, wrongTag
                .getAnnotation().getEnd(), target.getMaxShiftDistance() + 1, TextRulerToolkit
                .getFilterSetWithSlotNames(target.slotNames, filterSet), tokensRootType);

        right.remove(0);

        // TODO stop after the first found match or create one bad
        // example for each found occurence ??!!
        // for now: stop after one ! so create only ONE bad
        // example...
        int leftDistance = 0;
        TextRulerExample leftCorrectTag = null;
        for (int i = left.size() - 1; i >= 0; i--) {
          leftDistance++;
          TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(left.get(i),
                  this, target, docCAS.getTypeSystem());
          // Only checks the beginning of needle
          leftCorrectTag = TextRulerExampleDocument.exampleListContainsAnnotation(correctTags,
                  needle);
          if (leftCorrectTag != null)
            break;
        }

        int rightDistance = 0;
        TextRulerExample rightCorrectTag = null;
        for (AnnotationFS fs : right) {
          rightDistance++;
          TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(fs, this, target,
                  docCAS.getTypeSystem());
          // Only checks the beginning of needle
          rightCorrectTag = TextRulerExampleDocument.exampleListContainsAnnotation(correctTags,
                  needle);
          if (rightCorrectTag != null)
            break;
        }

        TextRulerExample theCorrectTag = null;
        if (rightDistance < leftDistance && rightCorrectTag != null)
          theCorrectTag = rightCorrectTag;
        else if (rightDistance > leftDistance && leftCorrectTag != null)
          theCorrectTag = leftCorrectTag;
        else // use the one that would lie in the slot filler:
        {
          if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY && rightCorrectTag != null)
            theCorrectTag = rightCorrectTag;
          else
            theCorrectTag = leftCorrectTag;
        }

        if (theCorrectTag != null) {
          TextRulerToolkit.log("FOUND BAD EXAMPLE FOR SHIFTING !!");
          TextRulerShiftExample shiftExample = new TextRulerShiftExample(this,
                  wrongTag.getAnnotation(), theCorrectTag.getAnnotation(), true, target);
          result.add(shiftExample);
        }
      }
      // GlobalCASSource.releaseCAS(testCAS);
    } else {
      List<AnnotationFS> slots = TextRulerToolkit.extractAnnotationsForSlotName(
              aCas,
              createFromRawTypeName ? target.getSingleSlotRawTypeName() : target
                      .getSingleSlotTypeName()); // do not use
      // boundary type
      // here since we
      // seek for the
      // orignial slot
      // !
      for (AnnotationFS a : slots) {
        result.add(new TextRulerExample(this, TextRulerToolkit.convertToTargetAnnotation(a, this,
                target, aCas.getTypeSystem()), true, target));
      }
    }
    return result;
  }