in languagetool-core/src/main/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleHandler.java [289:465]
public void endElement(String namespaceURI, String sName,
String qName) throws SAXException {
switch (qName) {
case RULE:
DisambiguationPatternRule rule = new DisambiguationPatternRule(id,
name, language, patternTokens, disambiguatedPOS, posSelector,
disambigAction);
endPositionCorrection = endPos - tokenCountForMarker;
if (startPos != -1 && endPos != -1) {
rule.setStartPositionCorrection(startPos);
rule.setEndPositionCorrection(endPositionCorrection);
} else {
startPos = 0;
endPos = tokenCountForMarker;
}
rule.setSubId(inRuleGroup ? StringInterner.intern(Integer.toString(subId)) : "1");
int matchedTokenCount = endPos - startPos;
if (newWdList != null) {
if (disambigAction == DisambiguationPatternRule.DisambiguatorAction.ADDCHUNK ||
disambigAction == DisambiguationPatternRule.DisambiguatorAction.ADD || disambigAction == DisambiguationPatternRule.DisambiguatorAction.REMOVE
|| disambigAction == DisambiguationPatternRule.DisambiguatorAction.REPLACE) {
if ((!newWdList.isEmpty() && disambigAction == DisambiguationPatternRule.DisambiguatorAction.REPLACE)
&& newWdList.size() != matchedTokenCount) {
throw new SAXException(
language.getName() + " rule error. The number of interpretations specified with wd: "
+ newWdList.size()
+ " must be equal to the number of matched tokens (" + matchedTokenCount + ")"
+ "\n Line: " + pLocator.getLineNumber() + ", column: "
+ pLocator.getColumnNumber() + ".");
}
rule.setNewInterpretations(newWdList.toArray(new AnalyzedToken[0]));
}
newWdList.clear();
}
caseSensitive = false;
if (disambExamples != null) {
rule.setExamples(disambExamples);
}
if (untouchedExamples != null) {
rule.setUntouchedExamples(untouchedExamples);
}
setRuleFilter(filterClassName, filterArgs, rule);
if (!rulegroupAntiPatterns.isEmpty()) {
rule.setAntiPatterns(rulegroupAntiPatterns);
}
if (!ruleAntiPatterns.isEmpty()) {
rule.setAntiPatterns(ruleAntiPatterns);
ruleAntiPatterns.clear();
}
rules.add(rule);
if (disambigAction == DisambiguationPatternRule.DisambiguatorAction.UNIFY && matchedTokenCount != uniCounter) {
throw new SAXException(language.getName() + " rule error. The number unified tokens: "
+ uniCounter + " must be equal to the number of matched tokens: " + matchedTokenCount
+ "\n Line: " + pLocator.getLineNumber() + ", column: "
+ pLocator.getColumnNumber() + ".");
}
boolean singleTokenCorrection = endPos - startPos > 1;
if ((!singleTokenCorrection && (disambigAction == DisambiguationPatternRule.DisambiguatorAction.FILTER || disambigAction == DisambiguationPatternRule.DisambiguatorAction.REPLACE))
&& (matchedTokenCount > 1)) {
throw new SAXException(
language.getName() + " rule error. Cannot replace or filter more than one token at a time."
+ "\n Line: " + pLocator.getLineNumber() + ", column: "
+ pLocator.getColumnNumber() + ".");
}
patternTokens.clear();
posSelector = null;
disambExamples = null;
untouchedExamples = null;
startPos = -1;
endPos = -1;
filterClassName = null;
filterArgs = null;
inRule = false;
break;
case EXCEPTION:
finalizeExceptions();
break;
case AND:
inAndGroup = false;
andGroupCounter = 0;
tokenCounter++;
break;
case TOKEN:
if (inUnification && !inAndGroup) {
uniCounter++;
}
// The disambiguation Unifier is always in the default language variant
finalizeTokens(language.getDefaultLanguageVariant().getDisambiguationUnifierConfiguration());
break;
case PATTERN:
inPattern = false;
tokenCounter = 0;
break;
case MATCH:
if (inDisambiguation) {
posSelector.setLemmaString(match.toString());
} else if (inToken) {
tokenReference.setLemmaString(match.toString());
}
inMatch = false;
break;
case DISAMBIG:
inDisambiguation = false;
break;
case RULEGROUP:
inRuleGroup = false;
break;
case UNIFICATION:
if (inUnificationDef) {
inUnificationDef = false;
tokenCounter = 0;
}
break;
case "feature":
equivalenceFeatures.put(uFeature, uTypeList);
uTypeList = new ArrayList<>();
break;
case UNIFY:
inUnification = false;
equivalenceFeatures = new HashMap<>();
//set negation on the last token only!
int lastElement = patternTokens.size() - 1;
patternTokens.get(lastElement).setLastInUnification();
if (uniNegation) {
patternTokens.get(lastElement).setUniNegation();
}
break;
case UNIFY_IGNORE:
inUnificationNeutral = false;
break;
case WD:
addNewWord(wd.toString(), wdLemma, wdPos);
inWord = false;
break;
case ANTIPATTERN:
final DisambiguationPatternRule disRule = new DisambiguationPatternRule(
id + "_antipattern:" + antiPatternCounter,
"antipattern", language, patternTokens, null, null,
DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE);
if (startPos != -1 && endPos != -1) {
disRule.setStartPositionCorrection(startPos);
disRule.setEndPositionCorrection(endPos - tokenCountForMarker);
}
patternTokens.clear();
if (inRule) {
if (ruleAntiPatterns == null) {
ruleAntiPatterns = new ArrayList<>();
}
ruleAntiPatterns.add(disRule);
} else { // a rulegroup shares all antipatterns not included in a single rule
if (rulegroupAntiPatterns == null) {
rulegroupAntiPatterns = new ArrayList<>();
}
rulegroupAntiPatterns.add(disRule);
}
tokenCounter = 0;
inAntiPattern = false;
break;
case EXAMPLE:
inExample = false;
if (untouched) {
untouchedExamples.add(example.toString());
} else {
disambExamples.add(new DisambiguatedExample(example.toString(), input, output));
}
break;
case MARKER:
example.append("</marker>");
if (inPattern || inAntiPattern) {
endPos = tokenCountForMarker;
inMarker = false;
}
break;
}
}