public void process()

in RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java [331:498]


  public void process(CAS aCAS) throws AnalysisEngineProcessException {

    // iterate over all concepts one after the other to process them
    for (int i = 0; i < this.regexConcepts.length; i++) {
      // System.out.println(this.regexConcepts[i]);

      // list of all annotation that must be added to the CAS for this
      // concept
      ArrayList<FeatureStructure> annotsToAdd = new ArrayList<FeatureStructure>();

      // get the rules for the current concept
      Rule[] conceptRules = this.regexConcepts[i].getRules();
      boolean foundMatch = false;
      for (int ruleCount = 0; ruleCount < conceptRules.length; ruleCount++) {

        // get the regex pattern for the current rule
        Pattern pattern = conceptRules[ruleCount].getRegexPattern();

        // get the match type where the rule should be processed on
        Type matchType = conceptRules[ruleCount].getMatchType();

        // get match type iterator from the CAS
        FSIterator<?> mtIterator = aCAS.getAnnotationIndex(matchType).iterator();

        String matchValue = null;
        AnnotationFS currentAnnot = null;

        // iterate over all match type annotations where the
        // current rule should be processed on
        while (mtIterator.hasNext()) {

          // get next match type annotation
          currentAnnot = (AnnotationFS) mtIterator.next();

          // check filter features, if all conditions are true
          FilterFeature[] filterFeatures = conceptRules[ruleCount].getMatchTypeFilterFeatures();
          boolean passed = true;
          for (int ff = 0; ff < filterFeatures.length; ff++) {
            // get the current filterFeature featurePath value
            String featureValue = filterFeatures[ff].getFeaturePath().getValue(currentAnnot);
            // check if feature value is set
            if (featureValue != null) {
              // create matcher for the current feature value
              Matcher matcher = filterFeatures[ff].getPattern().matcher(featureValue);
              // check matches - use MATCH_COMPLETE
              if (!matcher.matches()) {
                // no match - stop processing
                passed = false;
                break;
              }
            } else {
              // feature value not set - stop processing
              passed = false;
              break;
            }
          }
          // check if the filter feature check passed all conditions
          if (!passed) {
            // conditions for the current annotation not passed, go on
            // with the next
            continue;
          }

          // get the specified feature path value from the current
          // annotation to run the regex on
          matchValue = conceptRules[ruleCount].getMatchTypeFeaturePath().getValue(currentAnnot);

          // check matchValue result, if it is null we don't have to match
          // anything and can go on with the next annotation
          if (matchValue == null) {
            continue;
          }

          // try to match the current pattern on the text
          Matcher matcher = pattern.matcher(matchValue);

          // check the match strategy we have for this rule
          // MatchStrategy - MATCH_ALL
          if (conceptRules[ruleCount].getMatchStrategy() == Rule.MATCH_ALL) {
            int pos = 0;
            while (matcher.find(pos)) {
              // we have a match

              // check rule exceptions
              if (!matchRuleExceptions(conceptRules[ruleCount].getExceptions(), aCAS, currentAnnot)) {

                // create annotations and features
                processConceptInstructions(matcher, currentAnnot, matchValue, aCAS,
                    this.regexConcepts[i], ruleCount, annotsToAdd);

                // set match found
                foundMatch = true;
              }
              // set start match position for the next match to the
              // current end match position
              if (matcher.end() == pos) {
                // Special case: matched the empty string. If at the end of the input, need
                // to break.
                if (pos == matchValue.length()) {
                  break;
                }
                // Otherwise increment search pos so as not to loop.
                ++pos;
              } else {
                // Default case: match was non-empty.
                pos = matcher.end();
              }
            }
          }
          // MatchStrategy - MATCH_COMPLETE
          else if (conceptRules[ruleCount].getMatchStrategy() == Rule.MATCH_COMPLETE) {
            if (matcher.matches()) {
              // we have a match

              // check rule exceptions
              if (!matchRuleExceptions(conceptRules[ruleCount].getExceptions(), aCAS, currentAnnot)) {

                // create annotations and features
                processConceptInstructions(matcher, currentAnnot, matchValue, aCAS,
                    this.regexConcepts[i], ruleCount, annotsToAdd);

                // set match found
                foundMatch = true;
              }
            }
          }
          // MatchStrategy - MATCH_FIRST
          else if (conceptRules[ruleCount].getMatchStrategy() == Rule.MATCH_FIRST) {
            if (matcher.find()) {
              // we have a match

              // check rule exceptions
              if (!matchRuleExceptions(conceptRules[ruleCount].getExceptions(), aCAS, currentAnnot)) {

                // create annotations and features
                processConceptInstructions(matcher, currentAnnot, matchValue, aCAS,
                    this.regexConcepts[i], ruleCount, annotsToAdd);

                // set match found
                foundMatch = true;
              }
            }
          }

          // all analysis is done, we can go to the next annotation
        }
        if (foundMatch) {
          // check setting of processAllConceptRules to decide if
          // we go on with the next rule or not
          if (!this.regexConcepts[i].processAllConceptRules()) {
            // we found a match for the current rule and we don't want go
            // on with further rules of this concept
            break;
          }
        }
      }

      // add all created annotations to the CAS index before moving to the
      // next concept
      for (int x = 0; x < annotsToAdd.size(); x++) {
        aCAS.getIndexRepository().addFS(annotsToAdd.get(x));
      }

      // reset last rule exception annotation since we move to the next rule
      // and everything is new
      this.lastRuleExceptionAnnotation = null;
    }
  }