public RuleMatch acceptRuleMatch()

in languagetool-language-modules/fr/src/main/java/org/languagetool/rules/fr/PostponedAdjectiveConcordanceFilter.java [126:467]


  public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments, int patternTokenPos,
                                   AnalyzedTokenReadings[] patternTokens, List<Integer> tokenPositions) throws IOException {
    
//    if (match.getSentence().getText().toString().contains("manifestement fausses")) {
//      int i = 0;
//      i++;
//    }
    AnalyzedTokenReadings[] tokens = match.getSentence().getTokensWithoutWhitespace();
    int i = patternTokenPos;
    int j;
    boolean isPlural = true;
    boolean isPrevNoun = false;
    Pattern substPattern = null;
    Pattern gnPattern = null;
    Pattern adjPattern = null;
    boolean canBeMS = false;
    boolean canBeFS = false;
    boolean canBeMP = false;
    boolean canBeFP = false;
    boolean canBeP = false;
    /* Count all nouns and determiners before the adjectives */
    // Takes care of acceptable combinations.
    int[] cNt = new int[maxLevels];
    int[] cNMS = new int[maxLevels];
    int[] cNFS = new int[maxLevels];
    int[] cNMP = new int[maxLevels];
    int[] cNMN = new int[maxLevels];
    int[] cNFP = new int[maxLevels];
    int[] cNCS = new int[maxLevels];
    int[] cNCP = new int[maxLevels];
    int[] cDMS = new int[maxLevels];
    int[] cDFS = new int[maxLevels];
    int[] cDMP = new int[maxLevels];
    int[] cDFP = new int[maxLevels];
    int[] cN = new int[maxLevels];
    int[] cD = new int[maxLevels];
    int level = 0;
    j = 1;
    initializeApparitions();
    while (i - j > 0 && keepCounting(tokens[i - j]) && level < maxLevels) {
      if (!isPrevNoun) {
        if (matchPostagRegexp(tokens[i - j], NOM) || (
        // adjectiu o participi sense nom, però amb algun determinant davant
        i - j - 1 > 0 && !matchPostagRegexp(tokens[i - j], NOM) && matchPostagRegexp(tokens[i - j], ADJECTIU)
            && matchPostagRegexp(tokens[i - j - 1], DET))) {
          if (matchPostagRegexp(tokens[i - j], _GN_MS)) {
            cNMS[level]++;
            canBeMS = true;
          }
          if (matchPostagRegexp(tokens[i - j], _GN_FS)) {
            cNFS[level]++;
            canBeFS = true;
          }
          if (matchPostagRegexp(tokens[i - j], _GN_MP)) {
            cNMP[level]++;
            canBeMP = true;
          }
          if (matchPostagRegexp(tokens[i - j], _GN_FP)) {
            cNFP[level]++;
            canBeFP = true;
          }
        }
        if (!matchPostagRegexp(tokens[i - j], _GN_)) {
          if (matchPostagRegexp(tokens[i - j], NOM_MS)) {
            cNMS[level]++;
            canBeMS = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_FS)) {
            cNFS[level]++;
            canBeFS = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_MP)) {
            cNMP[level]++;
            canBeMP = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_MN)) {
            cNMN[level]++;
            canBeMS = true;
            canBeMP = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_FP)) {
            cNFP[level]++;
            canBeFP = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_CS)) {
            cNCS[level]++;
            canBeMS = true;
            canBeFS = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_CP)) {
            cNCP[level]++;
            canBeFP = true;
            canBeMP = true;
          }
        }
      }
      // avoid two consecutive nouns
      if (matchPostagRegexp(tokens[i - j], NOM)) {
        cNt[level]++;
        isPrevNoun = true;
        // initializeApparitions();
      } else {
        isPrevNoun = false;
      }

      if (matchPostagRegexp(tokens[i - j], DET_CS)) {
        if (matchPostagRegexp(tokens[i - j + 1], NOM_MS)) {
          cDMS[level]++;
          canBeMS = true;
        }
        if (matchPostagRegexp(tokens[i - j + 1], NOM_FS)) {
          cDFS[level]++;
          canBeFS = true;
        }
      }
      if (matchPostagRegexp(tokens[i - j], DET_CP)) {
        if (matchPostagRegexp(tokens[i - j + 1], NOM_MP)) {
          cDMS[level]++;
          canBeMP = true;
        }
        if (matchPostagRegexp(tokens[i - j + 1], NOM_FP)) {
          cDFS[level]++;
          canBeFP = true;
        }
      }
      //TODO DET_CS, DET_CP without noun afterwards
      if (!matchPostagRegexp(tokens[i - j], ADVERBI)) {
        if (matchPostagRegexp(tokens[i - j], DET_MS)) {
          cDMS[level]++;
          canBeMS = true;
        }
        if (matchPostagRegexp(tokens[i - j], DET_FS)) {
          cDFS[level]++;
          canBeFS = true;
        }
        if (matchPostagRegexp(tokens[i - j], DET_MP)) {
          cDMP[level]++;
          canBeMP = true;
        }
        if (matchPostagRegexp(tokens[i - j], DET_FP)) {
          cDFP[level]++;
          canBeFP = true;
        }
      }
      if (i - j - 1 > 0) {
        if (matchRegexp(tokens[i - j].getToken(), PREPOSICIO_CANVI_NIVELL)
            && matchPostagRegexp(tokens[i - j], PREPOSICIONS) // exclude "des" when it is only determiner
            && !matchPostagRegexp(tokens[i - j], CONJUNCIO) // "com" com a conjunció
            && !matchRegexp(tokens[i - j - 1].getToken(), COORDINACIO_IONI)
            && !matchPostagRegexp(tokens[i - j + 1], ADVERBI)) {
          level++;
          //exception: d'environ
        } else if (tokens[i - j].getToken().equalsIgnoreCase("d'")
            && tokens[i - j + 1].getToken().equalsIgnoreCase("environ")) {
          level++;
        }
      }
      j = updateJValue(tokens, i, j, level);
      updateApparitions(tokens[i - j]);
      j++;
    }
    level++;
    if (level > maxLevels) {
      level = maxLevels;
    }
    j = 0;
    int cNtotal = 0;
    int cDtotal = 0;
    while (j < level) {
      cN[j] = cNMS[j] + cNFS[j] + cNMP[j] + cNFP[j] + cNCS[j] + cNCP[j] + cNMN[j];
      cD[j] = cDMS[j] + cDFS[j] + cDMP[j] + cDFP[j];
      cNtotal += cN[j];
      cDtotal += cD[j];

      // exceptions: adjective is plural and there are several nouns before
      if (matchPostagRegexp(tokens[i], ADJECTIU_MP) && (cN[j] > 1 || cD[j] > 1)
          && (cNMS[j] + cNMN[j] + cNMP[j] + cNCS[j] + cNCP[j] + cDMS[j] + cDMP[j]) > 0
          && (cNFS[j] + cNFP[j] <= cNt[j])) {
        return null;
      }
      if (matchPostagRegexp(tokens[i], ADJECTIU_FP) && (cN[j] > 1 || cD[j] > 1)
          && ((cNMS[j] + cNMP[j] + cNMN[j] + cDMS[j] + cDMP[j]) == 0 || (cNt[j] > 0 && cNFS[j] + cNFP[j] >= cNt[j]))) {
        return null;
      }
      // Adjective can't be singular
      if (cN[j] + cD[j] > 0) { // && level>1
        isPlural = isPlural && cD[j] > 1 && level>1; // cN[j]>1
        canBeP = canBeP || cN[j]>1;
      }
      j++;
    }
    // comma + plural noun
    isPlural = isPlural || (i - 2 > 0 && cNMP[0] + cNFP[0] + cNCP[0] > 0 && tokens[i - 2].getToken().equals(","));

    // there is no noun, (no determinant --> && cDtotal==0)
    if (cNtotal == 0 && cDtotal == 0) {
      return null;
    }

    // patterns according to the analyzed adjective
    if (matchPostagRegexp(tokens[i], ADJECTIU_CS)) {
      substPattern = GN_CS;
      adjPattern = ADJECTIU_S;
      gnPattern = _GN_CS;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_CP)) {
      substPattern = GN_CP;
      adjPattern = ADJECTIU_P;
      gnPattern = _GN_CP;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_MN)) {
      substPattern = GN_MN;
      adjPattern = ADJECTIU_M;
      gnPattern = _GN_MN;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_FN)) {
      substPattern = GN_FN;
      adjPattern = ADJECTIU_FN;
      gnPattern = _GN_FN;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_MS)) {
      substPattern = GN_MS;
      adjPattern = ADJECTIU_MS;
      gnPattern = _GN_MS;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_FS)) {
      substPattern = GN_FS;
      adjPattern = ADJECTIU_FS;
      gnPattern = _GN_FS;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_MP)) {
      substPattern = GN_MP;
      adjPattern = ADJECTIU_MP;
      gnPattern = _GN_MP;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_FP)) {
      substPattern = GN_FP;
      adjPattern = ADJECTIU_FP;
      gnPattern = _GN_FP;
    }

    if (substPattern == null || gnPattern == null || adjPattern == null) {
      return null;
    }

    // combinations Det/Nom + adv (1,2..) + adj.
    // If there is agreement, the rule doesn't match
    j = 1;
    boolean keepCount = true;
    while (i - j > 0 && keepCount) {
      if (matchPostagRegexp(tokens[i - j], NOM_DET) && matchPostagRegexp(tokens[i - j], gnPattern)) {
        return null; // there is a previous agreeing noun
      } else if (!matchPostagRegexp(tokens[i - j], _GN_) && matchPostagRegexp(tokens[i - j], substPattern)) {
        return null; // there is a previous agreeing noun
      }
      keepCount = !matchPostagRegexp(tokens[i - j], NOM_DET);
      j++;
    }

    // Necessary condition: previous token is a non-agreeing noun
    // or it is adjective or adverb (not preceded by verb)
    // /*&& !matchPostagRegexp(tokens[i],NOM)*/
    if ( (matchPostagRegexp(tokens[i - 1], NOM) && !matchPostagRegexp(tokens[i - 1], substPattern)) 
        || (matchPostagRegexp(tokens[i - 1], _GN_) && !matchPostagRegexp(tokens[i - 1], gnPattern))
        || (matchPostagRegexp(tokens[i - 1], ADJECTIU) && !matchPostagRegexp(tokens[i - 1], adjPattern))
        || (i > 2 && matchPostagRegexp(tokens[i - 1], ADVERBIS_ACCEPTATS) && !matchPostagRegexp(tokens[i - 2], VERB)
            && !matchPostagRegexp(tokens[i - 2], PREPOSICIONS))
        || (i > 3 && matchPostagRegexp(tokens[i - 1], LOC_ADV) && matchPostagRegexp(tokens[i - 2], LOC_ADV)
            && !matchPostagRegexp(tokens[i - 3], VERB) && !matchPostagRegexp(tokens[i - 3], PREPOSICIONS))) {

    } else {
      return null;
    }

    // Adjective can't be singular. The rule matches
    if (!(isPlural && matchPostagRegexp(tokens[i], ADJECTIU_S))) {
      // look into previous words
      j = 1;
      initializeApparitions();
      while (i - j > 0 && keepCounting(tokens[i - j]) && (level > 1 || j < 4)) {
        // there is a previous agreeing noun
        if (!matchPostagRegexp(tokens[i - j], _GN_) && matchPostagRegexp(tokens[i - j], NOM_DET)
            && matchPostagRegexp(tokens[i - j], substPattern)) {
          return null;
          // there is a previous agreeing adjective (in a nominal group)
        } else if (matchPostagRegexp(tokens[i - j], gnPattern)) {
          return null;
          // if there is no nominal group, it requires noun
        } /*
           * else if (!matchPostagRegexp(tokens[i - j], _GN_) &&
           * matchPostagRegexp(tokens[i - j], substPattern)) { return null; // there is a
           * previous agreeing noun }
           */
        j = updateJValue(tokens, i, j, 0);
        updateApparitions(tokens[i - j]);
        j++;
      }
    }

    FrenchSynthesizer synth = FrenchSynthesizer.INSTANCE;

    // The rule matches
    // Synthesize suggestions  
    List<String> suggestions = new ArrayList<>();
    AnalyzedToken at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_CS);
    if (at != null) {
      suggestions.addAll(Arrays.asList(synth.synthesize(at,"J e p", true)));
    }
    if (suggestions.isEmpty()) {
      at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_CP);
      if (at != null) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at,"J e s", true)));
      }  
    }
    if (suggestions.isEmpty() && isPlural) {
      at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_P);
      if (at != null) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J . p|V ppa . p", true)));
      }  
    }
    at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU);
    if (at != null && suggestions.isEmpty()) {
      if (canBeMS && !isPlural) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [me] sp?|V ppa m s", true)));
      }
      if (canBeFS && !isPlural) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [fe] sp?|V ppa f s", true)));
      }
      if (canBeMP) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [me] s?p|V ppa m p", true)));
      }
      if (canBeFP) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [fe] s?p|V ppa f p", true)));
      }
      if (canBeMS && (isPlural || canBeP)) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [me] s?p|V ppa m p", true)));
      }
      if (canBeFS && !canBeMS && (isPlural || canBeP)) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "J [fe] s?p|V ppa f p", true)));
      }
    }
    
    //set suggestion removing duplicates    
    suggestions = suggestions.stream().distinct().collect(Collectors.toList());
    // avoid the original token as suggestion 
    if (suggestions.contains(tokens[patternTokenPos].getToken().toLowerCase())) {
      suggestions.remove(tokens[patternTokenPos].getToken().toLowerCase());
    }
    if (suggestions.isEmpty()) {
      return null;
    }
    match.setSuggestedReplacements(suggestions);
    return match;

  }