public RuleMatch acceptRuleMatch()

in languagetool-language-modules/es/src/main/java/org/languagetool/rules/es/PostponedAdjectiveConcordanceFilter.java [114:451]


  public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments, int patternTokenPos,
                                   AnalyzedTokenReadings[] patternTokens, List<Integer> tokenPositions) throws IOException {

    /*if (match.getSentence().getText().contains("Dictador descubierta")) {
      int kk=0;
      kk++;
    }*/
    boolean addComma = getOptional("addComma", arguments, "false").equalsIgnoreCase("true")? true : false;
    AnalyzedTokenReadings[] tokens = match.getSentence().getTokensWithoutWhitespace();
    int i = patternTokenPos;  
    int j;
    boolean isPlural = true;
    boolean isPrevNoun = false;
    Pattern substPattern = null;
    Pattern gnPattern = null;
    Pattern adjPattern = null;
    boolean canBeMS = false;
    boolean canBeFS = false;
    boolean canBeMP = false;
    boolean canBeFP = false;
    boolean canBeP = false;
    /* Count all nouns and determiners before the adjectives */
    // Takes care of acceptable combinations.
    int[] cNt = new int[maxLevels];
    int[] cNMS = new int[maxLevels];
    int[] cNFS = new int[maxLevels];
    int[] cNMP = new int[maxLevels];
    int[] cNMN = new int[maxLevels];
    int[] cNFP = new int[maxLevels];
    int[] cNCS = new int[maxLevels];
    int[] cNCP = new int[maxLevels];
    int[] cDMS = new int[maxLevels];
    int[] cDFS = new int[maxLevels];
    int[] cDMP = new int[maxLevels];
    int[] cDFP = new int[maxLevels];
    int[] cN = new int[maxLevels];
    int[] cD = new int[maxLevels];
    int level = 0;
    j = 1;
    initializeApparitions();
    while (i - j > 0 && keepCounting(tokens[i - j]) && level < maxLevels) {
      if (!isPrevNoun) {
        if (matchPostagRegexp(tokens[i - j], NOM) || (
        // adjectiu o participi sense nom, però amb algun determinant davant
        i - j - 1 > 0 && !matchPostagRegexp(tokens[i - j], NOM) && matchPostagRegexp(tokens[i - j], ADJECTIU)
            && matchPostagRegexp(tokens[i - j - 1], DET))) {
          if (matchPostagRegexp(tokens[i - j], _GN_MS)) {
            cNMS[level]++;
            canBeMS = true;
          }
          if (matchPostagRegexp(tokens[i - j], _GN_FS)) {
            cNFS[level]++;
            canBeFS = true;
          }
          if (matchPostagRegexp(tokens[i - j], _GN_MP)) {
            cNMP[level]++;
            canBeMP = true;
          }
          if (matchPostagRegexp(tokens[i - j], _GN_FP)) {
            cNFP[level]++;
            canBeFP = true;
          }
        }
        if (!matchPostagRegexp(tokens[i - j], _GN_)) {
          if (matchPostagRegexp(tokens[i - j], NOM_MS)) {
            cNMS[level]++;
            canBeMS = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_FS)) {
            cNFS[level]++;
            canBeFS = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_MP)) {
            cNMP[level]++;
            canBeMP = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_MN)) {
            cNMN[level]++;
            canBeMS = true;
            canBeMP = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_FP)) {
            cNFP[level]++;
            canBeFP = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_CS)) {
            cNCS[level]++;
            canBeMS = true;
            canBeFS = true;
          } else if (matchPostagRegexp(tokens[i - j], NOM_CP)) {
            cNCP[level]++;
            canBeFP = true;
            canBeMP = true;
          }
        }
      }
      // avoid two consecutive nouns
      if (matchPostagRegexp(tokens[i - j], NOM)) {
        cNt[level]++;
        isPrevNoun = true;
        // initializeApparitions();
      } else {
        isPrevNoun = false;
      }

      if (matchPostagRegexp(tokens[i - j], DET_CS)) {
        if (matchPostagRegexp(tokens[i - j + 1], NOM_MS)) {
          cDMS[level]++;
          canBeMS = true;
        }
        if (matchPostagRegexp(tokens[i - j + 1], NOM_FS)) {
          cDFS[level]++;
          canBeFS = true;
        }
      }
      if (!matchPostagRegexp(tokens[i - j], ADVERBI)) {
        if (matchPostagRegexp(tokens[i - j], DET_MS)) {
          cDMS[level]++;
          canBeMS = true;
        }
        if (matchPostagRegexp(tokens[i - j], DET_FS)) {
          cDFS[level]++;
          canBeFS = true;
        }
        if (matchPostagRegexp(tokens[i - j], DET_MP)) {
          cDMP[level]++;
          canBeMP = true;
        }
        if (matchPostagRegexp(tokens[i - j], DET_FP)) {
          cDFP[level]++;
          canBeFP = true;
        }
      }
      if (i - j > 0) {
        if (matchRegexp(tokens[i - j].getToken(), PREPOSICIO_CANVI_NIVELL)
            && !matchRegexp(tokens[i - j - 1].getToken(), COORDINACIO_IONI)
            && !matchPostagRegexp(tokens[i - j + 1], ADVERBI)) {
          level++;
        }
      }
      if (level > 0 && matchRegexp(tokens[i - j].getToken(), COORDINACIO_IONI)) {
        int k = 1;
        while (k < 4 && i - j - k > 0
            && (matchPostagRegexp(tokens[i - j - k], KEEP_COUNT)
                || matchRegexp(tokens[i - j - k].getToken(), KEEP_COUNT2)
                || matchPostagRegexp(tokens[i - j - k], ADVERBIS_ACCEPTATS))
            && (!matchRegexp(tokens[i - j - k].getToken(), STOP_COUNT))) {
          if (matchPostagRegexp(tokens[i - j - k], PREPOSICIONS)) {
            j = j + k;
            break;
          }
          k++;
        }
      }
      updateApparitions(tokens[i - j]);
      j++;
    }
    level++;
    if (level > maxLevels) {
      level = maxLevels;
    }
    j = 0;
    int cNtotal = 0;
    int cDtotal = 0;
    while (j < level) {
      cN[j] = cNMS[j] + cNFS[j] + cNMP[j] + cNFP[j] + cNCS[j] + cNCP[j] + cNMN[j];
      cD[j] = cDMS[j] + cDFS[j] + cDMP[j] + cDFP[j];
      cNtotal += cN[j];
      cDtotal += cD[j];

      // exceptions: adjective is plural and there are several nouns before
      if (matchPostagRegexp(tokens[i], ADJECTIU_MP) && (cN[j] > 1 || cD[j] > 1)
          && (cNMS[j] + cNMN[j] + cNMP[j] + cNCS[j] + cNCP[j] + cDMS[j] + cDMP[j]) > 0
          && (cNFS[j] + cNFP[j] <= cNt[j])) {
        return null;
      }
      if (matchPostagRegexp(tokens[i], ADJECTIU_FP) && (cN[j] > 1 || cD[j] > 1)
          && ((cNMS[j] + cNMP[j] + cNMN[j] + cDMS[j] + cDMP[j]) == 0 || (cNt[j] > 0 && cNFS[j] + cNFP[j] >= cNt[j]))) {
        return null;
      }
      // Adjective can't be singular
      if (cN[j] + cD[j] > 0) { // && level>1
        isPlural = isPlural && cD[j] > 1; // cN[j]>1
        canBeP = canBeP || cN[j]>1;
      }
      j++;
    }
    // comma + plural noun
    isPlural = isPlural || (i - 2 > 0 && cNMP[0] + cNFP[0] + cNCP[0] > 0 && tokens[i - 2].getToken().equals(","));
    
    // there is no noun, (no determinant --> && cDtotal==0)
    if (cNtotal == 0 && cDtotal == 0) {
      return null;
    }

    // patterns according to the analyzed adjective
    if (matchPostagRegexp(tokens[i], ADJECTIU_CS)) {
      substPattern = GN_CS;
      adjPattern = ADJECTIU_S;
      gnPattern = _GN_CS;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_CP)) {
      substPattern = GN_CP;
      adjPattern = ADJECTIU_P;
      gnPattern = _GN_CP;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_MS)) {
      substPattern = GN_MS;
      adjPattern = ADJECTIU_MS;
      gnPattern = _GN_MS;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_FS)) {
      substPattern = GN_FS;
      adjPattern = ADJECTIU_FS;
      gnPattern = _GN_FS;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_MP)) {
      substPattern = GN_MP;
      adjPattern = ADJECTIU_MP;
      gnPattern = _GN_MP;
    } else if (matchPostagRegexp(tokens[i], ADJECTIU_FP)) {
      substPattern = GN_FP;
      adjPattern = ADJECTIU_FP;
      gnPattern = _GN_FP;
    }

    if (substPattern == null || gnPattern == null || adjPattern == null) {
      return null;
    }

    // combinations Det/Nom + adv (1,2..) + adj.
    // If there is agreement, the rule doesn't match
    j = 1;
    boolean keepCount = true;
    while (i - j > 0 && keepCount) {
      if (matchPostagRegexp(tokens[i - j], NOM_DET) && matchPostagRegexp(tokens[i - j], gnPattern)) {
        return null; // there is a previous agreeing noun
      } else if (!matchPostagRegexp(tokens[i - j], _GN_) && matchPostagRegexp(tokens[i - j], substPattern)) {
        return null; // there is a previous agreeing noun
      }
      keepCount = !matchPostagRegexp(tokens[i - j], NOM_DET);
      j++;
    }

    // Necessary condition: previous token is a non-agreeing noun
    // or it is adjective or adverb (not preceded by verb)
    // /*&& !matchPostagRegexp(tokens[i],NOM)*/
    if ((matchPostagRegexp(tokens[i - 1], NOM) && !matchPostagRegexp(tokens[i - 1], substPattern))
        || (matchPostagRegexp(tokens[i - 1], ADJECTIU) && !matchPostagRegexp(tokens[i - 1], gnPattern))
        || (matchPostagRegexp(tokens[i - 1], ADJECTIU) && !matchPostagRegexp(tokens[i - 1], adjPattern))
        || (i > 2 && matchPostagRegexp(tokens[i - 1], ADVERBIS_ACCEPTATS) && !matchPostagRegexp(tokens[i - 2], VERB)
            && !matchPostagRegexp(tokens[i - 2], PREPOSICIONS))
        || (i > 3 && matchPostagRegexp(tokens[i - 1], LOC_ADV) && matchPostagRegexp(tokens[i - 2], LOC_ADV)
            && !matchPostagRegexp(tokens[i - 3], VERB) && !matchPostagRegexp(tokens[i - 3], PREPOSICIONS))) {

    } else {
      return null;
    }

    // Adjective can't be singular. The rule matches
    if (!(isPlural && matchPostagRegexp(tokens[i], ADJECTIU_S))) {
      // look into previous words
      j = 1;
      initializeApparitions();
      while (i - j > 0 && keepCounting(tokens[i - j]) && (level > 1 || j < 4)) {
        // there is a previous agreeing noun
        if (!matchPostagRegexp(tokens[i - j], _GN_) && matchPostagRegexp(tokens[i - j], NOM_DET)
            && matchPostagRegexp(tokens[i - j], substPattern)) {
          return null;
          // there is a previous agreeing adjective (in a nominal group)
        } else if (matchPostagRegexp(tokens[i - j], gnPattern)) {
          return null;
          // if there is no nominal group, it requires noun
        } /*
           * else if (!matchPostagRegexp(tokens[i - j], _GN_) &&
           * matchPostagRegexp(tokens[i - j], substPattern)) { return null; // there is a
           * previous agreeing noun }
           */
        updateApparitions(tokens[i - j]);
        j++;
      }
    }

    SpanishSynthesizer synth = SpanishSynthesizer.INSTANCE;

    // The rule matches
    // Synthesize suggestions  
    List<String> suggestions = new ArrayList<>();
    AnalyzedToken at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_CS);
    if (at != null) {
      suggestions.addAll(Arrays.asList(synth.synthesize(at,"A..CP.", true)));
    }
    if (suggestions.isEmpty()) {
      at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_CP);
      if (at != null) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at,"A..CS.", true)));
      }  
    }
    if (suggestions.isEmpty() && isPlural) {
      at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU_P);
      if (at != null) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A...P.|V.P..P.|PX..P.*", true)));
      }  
    }
    at = getAnalyzedToken(tokens[patternTokenPos], ADJECTIU);
    if (at != null && suggestions.isEmpty()) {
      if (canBeMS && !isPlural) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A..MS.|V.P..SM|PX.MS.*", true)));
      }
      if (canBeFS && !isPlural) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A..FS.|V.P..SF|PX.FS.*", true)));
      }
      if (canBeMP) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A..MP.|V.P..PM|PX.MP.*", true)));
      }
      if (canBeFP) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A..FP.|V.P..PF|PX.FP.*", true)));
      }
      if (canBeMS && (isPlural || canBeP)) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A..MP.|V.P..PM|PX.MP.*", true)));
      }
      if (canBeFS && !canBeMS && (isPlural || canBeP)) {
        suggestions.addAll(Arrays.asList(synth.synthesize(at, "A..FP.|V.P..PF|PX.FP.*", true)));
      }
    }
    // avoid the original token as suggestion 
    if (suggestions.contains(tokens[patternTokenPos].getToken().toLowerCase())) {
      suggestions.remove(tokens[patternTokenPos].getToken().toLowerCase());
    }
    if (suggestions.isEmpty()) {
      return null;
    }
    List<String> definitiveSugestions = new ArrayList<>();
    if (addComma) {
      definitiveSugestions.add(", " + tokens[patternTokenPos].getToken());
      for (String s : suggestions) {
        definitiveSugestions.add(" " + s);
      }
      match.setOffsetPosition(match.getFromPos() - 1,  match.getToPos());
      match.setSentencePosition(match.getFromPosSentence() - 1, match.getToPosSentence());
    } else {
      definitiveSugestions.addAll(suggestions);
    }
    match.setSuggestedReplacements(definitiveSugestions.stream().distinct().collect(Collectors.toList()));
    return match;

  }