in RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java [331:498]
public void process(CAS aCAS) throws AnalysisEngineProcessException {
// iterate over all concepts one after the other to process them
for (int i = 0; i < this.regexConcepts.length; i++) {
// System.out.println(this.regexConcepts[i]);
// list of all annotation that must be added to the CAS for this
// concept
ArrayList<FeatureStructure> annotsToAdd = new ArrayList<FeatureStructure>();
// get the rules for the current concept
Rule[] conceptRules = this.regexConcepts[i].getRules();
boolean foundMatch = false;
for (int ruleCount = 0; ruleCount < conceptRules.length; ruleCount++) {
// get the regex pattern for the current rule
Pattern pattern = conceptRules[ruleCount].getRegexPattern();
// get the match type where the rule should be processed on
Type matchType = conceptRules[ruleCount].getMatchType();
// get match type iterator from the CAS
FSIterator<?> mtIterator = aCAS.getAnnotationIndex(matchType).iterator();
String matchValue = null;
AnnotationFS currentAnnot = null;
// iterate over all match type annotations where the
// current rule should be processed on
while (mtIterator.hasNext()) {
// get next match type annotation
currentAnnot = (AnnotationFS) mtIterator.next();
// check filter features, if all conditions are true
FilterFeature[] filterFeatures = conceptRules[ruleCount].getMatchTypeFilterFeatures();
boolean passed = true;
for (int ff = 0; ff < filterFeatures.length; ff++) {
// get the current filterFeature featurePath value
String featureValue = filterFeatures[ff].getFeaturePath().getValue(currentAnnot);
// check if feature value is set
if (featureValue != null) {
// create matcher for the current feature value
Matcher matcher = filterFeatures[ff].getPattern().matcher(featureValue);
// check matches - use MATCH_COMPLETE
if (!matcher.matches()) {
// no match - stop processing
passed = false;
break;
}
} else {
// feature value not set - stop processing
passed = false;
break;
}
}
// check if the filter feature check passed all conditions
if (!passed) {
// conditions for the current annotation not passed, go on
// with the next
continue;
}
// get the specified feature path value from the current
// annotation to run the regex on
matchValue = conceptRules[ruleCount].getMatchTypeFeaturePath().getValue(currentAnnot);
// check matchValue result, if it is null we don't have to match
// anything and can go on with the next annotation
if (matchValue == null) {
continue;
}
// try to match the current pattern on the text
Matcher matcher = pattern.matcher(matchValue);
// check the match strategy we have for this rule
// MatchStrategy - MATCH_ALL
if (conceptRules[ruleCount].getMatchStrategy() == Rule.MATCH_ALL) {
int pos = 0;
while (matcher.find(pos)) {
// we have a match
// check rule exceptions
if (!matchRuleExceptions(conceptRules[ruleCount].getExceptions(), aCAS, currentAnnot)) {
// create annotations and features
processConceptInstructions(matcher, currentAnnot, matchValue, aCAS,
this.regexConcepts[i], ruleCount, annotsToAdd);
// set match found
foundMatch = true;
}
// set start match position for the next match to the
// current end match position
if (matcher.end() == pos) {
// Special case: matched the empty string. If at the end of the input, need
// to break.
if (pos == matchValue.length()) {
break;
}
// Otherwise increment search pos so as not to loop.
++pos;
} else {
// Default case: match was non-empty.
pos = matcher.end();
}
}
}
// MatchStrategy - MATCH_COMPLETE
else if (conceptRules[ruleCount].getMatchStrategy() == Rule.MATCH_COMPLETE) {
if (matcher.matches()) {
// we have a match
// check rule exceptions
if (!matchRuleExceptions(conceptRules[ruleCount].getExceptions(), aCAS, currentAnnot)) {
// create annotations and features
processConceptInstructions(matcher, currentAnnot, matchValue, aCAS,
this.regexConcepts[i], ruleCount, annotsToAdd);
// set match found
foundMatch = true;
}
}
}
// MatchStrategy - MATCH_FIRST
else if (conceptRules[ruleCount].getMatchStrategy() == Rule.MATCH_FIRST) {
if (matcher.find()) {
// we have a match
// check rule exceptions
if (!matchRuleExceptions(conceptRules[ruleCount].getExceptions(), aCAS, currentAnnot)) {
// create annotations and features
processConceptInstructions(matcher, currentAnnot, matchValue, aCAS,
this.regexConcepts[i], ruleCount, annotsToAdd);
// set match found
foundMatch = true;
}
}
}
// all analysis is done, we can go to the next annotation
}
if (foundMatch) {
// check setting of processAllConceptRules to decide if
// we go on with the next rule or not
if (!this.regexConcepts[i].processAllConceptRules()) {
// we found a match for the current rule and we don't want go
// on with further rules of this concept
break;
}
}
}
// add all created annotations to the CAS index before moving to the
// next concept
for (int x = 0; x < annotsToAdd.size(); x++) {
aCAS.getIndexRepository().addFS(annotsToAdd.get(x));
}
// reset last rule exception annotation since we move to the next rule
// and everything is new
this.lastRuleExceptionAnnotation = null;
}
}