in languagetool-language-modules/fr/src/main/java/org/languagetool/rules/fr/PostponedAdjectiveConcordanceFilter.java [126:467]
public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments, int patternTokenPos,
AnalyzedTokenReadings[] patternTokens, List<Integer> tokenPositions) throws IOException {
// if (match.getSentence().getText().toString().contains("manifestement fausses")) {
// int i = 0;
// i++;
// }
AnalyzedTokenReadings[] tokens = match.getSentence().getTokensWithoutWhitespace();
int i = patternTokenPos;
int j;
boolean isPlural = true;
boolean isPrevNoun = false;
Pattern substPattern = null;
Pattern gnPattern = null;
Pattern adjPattern = null;
boolean canBeMS = false;
boolean canBeFS = false;
boolean canBeMP = false;
boolean canBeFP = false;
boolean canBeP = false;
/* Count all nouns and determiners before the adjectives */
// Takes care of acceptable combinations.
int[] cNt = new int[maxLevels];
int[] cNMS = new int[maxLevels];
int[] cNFS = new int[maxLevels];
int[] cNMP = new int[maxLevels];
int[] cNMN = new int[maxLevels];
int[] cNFP = new int[maxLevels];
int[] cNCS = new int[maxLevels];
int[] cNCP = new int[maxLevels];
int[] cDMS = new int[maxLevels];
int[] cDFS = new int[maxLevels];
int[] cDMP = new int[maxLevels];
int[] cDFP = new int[maxLevels];
int[] cN = new int[maxLevels];
int[] cD = new int[maxLevels];
int level = 0;
j = 1;
initializeApparitions();
while (i - j > 0 && keepCounting(tokens[i - j]) && level < maxLevels) {
if (!isPrevNoun) {
if (matchPostagRegexp(tokens[i - j], NOM) || (
// adjectiu o participi sense nom, però amb algun determinant davant
i - j - 1 > 0 && !matchPostagRegexp(tokens[i - j], NOM) && matchPostagRegexp(tokens[i - j], ADJECTIU)
&& matchPostagRegexp(tokens[i - j - 1], DET))) {
if (matchPostagRegexp(tokens[i - j], _GN_MS)) {
cNMS[level]++;
canBeMS = true;
}
if (matchPostagRegexp(tokens[i - j], _GN_FS)) {
cNFS[level]++;
canBeFS = true;
}
if (matchPostagRegexp(tokens[i - j], _GN_MP)) {
cNMP[level]++;
canBeMP = true;
}
if (matchPostagRegexp(tokens[i - j], _GN_FP)) {
cNFP[level]++;
canBeFP = true;
}
}
if (!matchPostagRegexp(tokens[i - j], _GN_)) {
if (matchPostagRegexp(tokens[i - j], NOM_MS)) {
cNMS[level]++;
canBeMS = true;
} else if (matchPostagRegexp(tokens[i - j], NOM_FS)) {
cNFS[level]++;
canBeFS = true;
} else if (matchPostagRegexp(tokens[i - j], NOM_MP)) {
cNMP[level]++;
canBeMP = true;
} else if (matchPostagRegexp(tokens[i - j], NOM_MN)) {
cNMN[level]++;
canBeMS = true;
canBeMP = true;
} else if (matchPostagRegexp(tokens[i - j], NOM_FP)) {
cNFP[level]++;
canBeFP = true;
} else if (matchPostagRegexp(tokens[i - j], NOM_CS)) {
cNCS[level]++;
canBeMS = true;
canBeFS = true;
} else if (matchPostagRegexp(tokens[i - j], NOM_CP)) {
cNCP[level]++;
canBeFP = true;
canBeMP = true;
}
}
}
// avoid two consecutive nouns
if (matchPostagRegexp(tokens[i - j], NOM)) {
cNt[level]++;
isPrevNoun = true;
// initializeApparitions();
} else {
isPrevNoun = false;
}
if (matchPostagRegexp(tokens[i - j], DET_CS)) {
if (matchPostagRegexp(tokens[i - j + 1], NOM_MS)) {
cDMS[level]++;
canBeMS = true;
}
if (matchPostagRegexp(tokens[i - j + 1], NOM_FS)) {
cDFS[level]++;
canBeFS = true;
}
}
if (matchPostagRegexp(tokens[i - j], DET_CP)) {
if (matchPostagRegexp(tokens[i - j + 1], NOM_MP)) {
cDMS[level]++;
canBeMP = true;
}
if (matchPostagRegexp(tokens[i - j + 1], NOM_FP)) {
cDFS[level]++;
canBeFP = true;
}
}
//TODO DET_CS, DET_CP without noun afterwards
if (!matchPostagRegexp(tokens[i - j], ADVERBI)) {
if (matchPostagRegexp(tokens[i - j], DET_MS)) {
cDMS[level]++;
canBeMS = true;
}
if (matchPostagRegexp(tokens[i - j], DET_FS)) {
cDFS[level]++;
canBeFS = true;
}
if (matchPostagRegexp(tokens[i - j], DET_MP)) {
cDMP[level]++;
canBeMP = true;
}
if (matchPostagRegexp(tokens[i - j], DET_FP)) {
cDFP[level]++;
canBeFP = true;
}
}
if (i - j - 1 > 0) {
if (matchRegexp(tokens[i - j].getToken(), PREPOSICIO_CANVI_NIVELL)
&& matchPostagRegexp(tokens[i - j], PREPOSICIONS) // exclude "des" when it is only determiner
&& !matchPostagRegexp(tokens[i - j], CONJUNCIO) // "com" com a conjunció
&& !matchRegexp(tokens[i - j - 1].getToken(), COORDINACIO_IONI)
&& !matchPostagRegexp(tokens[i - j + 1], ADVERBI)) {
level++;
//exception: d'environ
} else if (tokens[i - j].getToken().equalsIgnoreCase("d'")
&& tokens[i - j + 1].getToken().equalsIgnoreCase("environ")) {
level++;
}
}
j = updateJValue(tokens, i, j, level);
updateApparitions(tokens[i - j]);
j++;
}
level++;
if (level > maxLevels) {
level = maxLevels;
}
j = 0;
int cNtotal = 0;
int cDtotal = 0;
while (j < level) {
cN[j] = cNMS[j] + cNFS[j] + cNMP[j] + cNFP[j] + cNCS[j] + cNCP[j] + cNMN[j];
cD[j] = cDMS[j] + cDFS[j] + cDMP[j] + cDFP[j];
cNtotal += cN[j];
cDtotal += cD[j];
// exceptions: adjective is plural and there are several nouns before
if (matchPostagRegexp(tokens[i], ADJECTIU_MP) && (cN[j] > 1 || cD[j] > 1)
&& (cNMS[j] + cNMN[j] + cNMP[j] + cNCS[j] + cNCP[j] + cDMS[j] + cDMP[j]) > 0
&& (cNFS[j] + cNFP[j] <= cNt[j])) {
return null;
}
if (matchPostagRegexp(tokens[i], ADJECTIU_FP) && (cN[j] > 1 || cD[j] > 1)
&& ((cNMS[j] + cNMP[j] + cNMN[j] + cDMS[j] + cDMP[j]) == 0 || (cNt[j] > 0 && cNFS[j] + cNFP[j] >= cNt[j]))) {
return null;
}
// Adjective can't be singular
if (cN[j] + cD[j] > 0) { // && level>1
isPlural = isPlural && cD[j] > 1 && level>1; // cN[j]>1
canBeP = canBeP || cN[j]>1;
}
j++;
}
// comma + plural noun
isPlural = isPlural || (i - 2 > 0 && cNMP[0] + cNFP[0] + cNCP[0] > 0 && tokens[i - 2].getToken().equals(","));
// there is no noun, (no determinant --> && cDtotal==0)
if (cNtotal == 0 && cDtotal == 0) {
return null;
}
// patterns according to the analyzed adjective
if (matchPostagRegexp(tokens[i], ADJECTIU_CS)) {
substPattern = GN_CS;
adjPattern = ADJECTIU_S;
gnPattern = _GN_CS;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_CP)) {
substPattern = GN_CP;
adjPattern = ADJECTIU_P;
gnPattern = _GN_CP;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_MN)) {
substPattern = GN_MN;
adjPattern = ADJECTIU_M;
gnPattern = _GN_MN;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_FN)) {
substPattern = GN_FN;
adjPattern = ADJECTIU_FN;
gnPattern = _GN_FN;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_MS)) {
substPattern = GN_MS;
adjPattern = ADJECTIU_MS;
gnPattern = _GN_MS;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_FS)) {
substPattern = GN_FS;
adjPattern = ADJECTIU_FS;
gnPattern = _GN_FS;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_MP)) {
substPattern = GN_MP;
adjPattern = ADJECTIU_MP;
gnPattern = _GN_MP;
} else if (matchPostagRegexp(tokens[i], ADJECTIU_FP)) {
substPattern = GN_FP;
adjPattern = ADJECTIU_FP;
gnPattern = _GN_FP;
}
if (substPattern == null || gnPattern == null || adjPattern == null) {
return null;
}
// combinations Det/Nom + adv (1,2..) + adj.
// If there is agreement, the rule doesn't match
j = 1;
boolean keepCount = true;
while (i - j > 0 && keepCount) {
if (matchPostagRegexp(tokens[i - j], NOM_DET) && matchPostagRegexp(tokens[i - j], gnPattern)) {
return null; // there is a previous agreeing noun
} else if (!matchPostagRegexp(tokens[i - j], _GN_) && matchPostagRegexp(tokens[i - j], substPattern)) {
return null; // there is a previous agreeing noun
}
keepCount = !matchPostagRegexp(tokens[i - j], NOM_DET);
j++;
}
// Necessary condition: previous token is a non-agreeing noun
// or it is adjective or adverb (not preceded by verb)
// /*&& !matchPostagRegexp(tokens[i],NOM)*/
if ( (matchPostagRegexp(tokens[i - 1], NOM) && !matchPostagRegexp(tokens[i - 1], substPattern))
|| (matchPostagRegexp(tokens[i - 1], _GN_) && !matchPostagRegexp(tokens[i - 1], gnPattern))
|| (matchPostagRegexp(tokens[i - 1], ADJECTIU) && !matchPostagRegexp(tokens[i - 1], adjPattern))
|| (i > 2 && matchPostagRegexp(tokens[i - 1], ADVERBIS_ACCEPTATS) && !matchPostagRegexp(tokens[i - 2], VERB)
&& !matchPostagRegexp(tokens[i - 2], PREPOSICIONS))
|| (i > 3 && matchPostagRegexp(tokens[i - 1], LOC_ADV) && matchPostagRegexp(tokens[i - 2], LOC_ADV)
&& !matchPostagRegexp(tokens[i - 3], VERB) && !matchPostagRegexp(tokens[i - 3], PREPOSICIONS))) {
} else {
return null;
}
// Adjective can't be singular. The rule matches
if (!(isPlural && matchPostagRegexp(tokens[i], ADJECTIU_S))) {
// look into previous words
j = 1;
initializeApparitions();
while (i - j > 0 && keepCounting(tokens[i - j]) && (level > 1 || j < 4)) {
// there is a previous agreeing noun
if (!matchPostagRegexp(tokens[i - j], _GN_) && matchPostagRegexp(tokens[i - j], NOM_DET)
&& matchPostagRegexp(tokens[i - j], substPattern)) {
return null;
// there is a previous agreeing adjective (in a nominal group)
} else if (matchPostagRegexp(tokens[i - j], gnPattern)) {
return null;
// if there is no nominal group, it requires noun
} /*
* else if (!matchPostagRegexp(tokens[i - j], _GN_) &&
* matchPostagRegexp(tokens[i - j], substPattern)) { return null; // there is a
* previous agreeing noun }
*/
j = updateJValue(tokens, i, j, 0);
updateApparitions(tokens[i - j]);
j++;
}
}
FrenchSynthesizer synth = FrenchSynthesizer.INSTANCE;
// The rule matches
// Synthesize suggestions
List<String> suggestions = new ArrayList<>();
AnalyzedToken at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_CS);
if (at != null) {
suggestions.addAll(Arrays.asList(synth.synthesize(at,"J e p", true)));
}
if (suggestions.isEmpty()) {
at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_CP);
if (at != null) {
suggestions.addAll(Arrays.asList(synth.synthesize(at,"J e s", true)));
}
}
if (suggestions.isEmpty() && isPlural) {
at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_P);
if (at != null) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J . p|V ppa . p", true)));
}
}
at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU);
if (at != null && suggestions.isEmpty()) {
if (canBeMS && !isPlural) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [me] sp?|V ppa m s", true)));
}
if (canBeFS && !isPlural) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [fe] sp?|V ppa f s", true)));
}
if (canBeMP) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [me] s?p|V ppa m p", true)));
}
if (canBeFP) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [fe] s?p|V ppa f p", true)));
}
if (canBeMS && (isPlural || canBeP)) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [me] s?p|V ppa m p", true)));
}
if (canBeFS && !canBeMS && (isPlural || canBeP)) {
suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [fe] s?p|V ppa f p", true)));
}
}
//set suggestion removing duplicates
suggestions = suggestions.stream().distinct().collect(Collectors.toList());
// avoid the original token as suggestion
if (suggestions.contains(tokens[patternTokenPos].getToken().toLowerCase())) {
suggestions.remove(tokens[patternTokenPos].getToken().toLowerCase());
}
if (suggestions.isEmpty()) {
return null;
}
match.setSuggestedReplacements(suggestions);
return match;
}