protected List getRuleMatches()

in languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java [325:549]


  protected List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentence sentence, List<RuleMatch> ruleMatchesSoFar, int idx, AnalyzedTokenReadings[] tokens) throws IOException {
    // We create only one rule match. 
    // Several rule matches on the same word or words can not be shown to the user.
    List<RuleMatch> ruleMatches = new ArrayList<>();
    RuleMatch ruleMatch = null;
    
    if (!isMisspelled(speller1, word) && !isProhibited(word)) {
      return ruleMatches;
    }

    if (ignorePotentiallyMisspelledWord(word)) {
      return ruleMatches;
    }
    
    //the current word is already dealt with in the previous match, so do nothing
    if (ruleMatchesSoFar.size() > 0 && ruleMatchesSoFar.get(ruleMatchesSoFar.size() - 1).getToPos() > startPos) {
      return ruleMatches;
    }
    
    String beforeSuggestionStr = ""; //to be added before the suggestion if there is a suggestion for a split word
    String afterSuggestionStr = "";  //to be added after
    
    // Check for split word with previous word
    if (idx > 0 && tokens[idx].isWhitespaceBefore()) {
      String prevWord = tokens[idx - 1].getToken();
      if (prevWord.length() > 0 && !StringUtils.containsAny(prevWord, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9")
          && getFrequency(speller1, prevWord) < MAX_FREQUENCY_FOR_SPLITTING) {
        int prevStartPos = tokens[idx - 1].getStartPos();
        // "thanky ou" -> "thank you"
        String sugg1a = prevWord.substring(0, prevWord.length() - 1);
        String sugg1b = prevWord.substring(prevWord.length() - 1) + word;
        if (sugg1a.length() > 1 && sugg1b.length() > 2 && !isMisspelled(speller1, sugg1a)
            && !isMisspelled(speller1, sugg1b)
            && getFrequency(speller1, sugg1a) + getFrequency(speller1, sugg1b) > getFrequency(speller1, prevWord)) {
          ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, startPos, word, sugg1a, sugg1b, prevStartPos);
          beforeSuggestionStr = prevWord + " ";
        }
        // "than kyou" -> "thank you" ; but not "She awaked" -> "Shea waked"
        String sugg2a = prevWord + word.charAt(0);
        String sugg2b = word.substring(1);
        if (sugg2b.length() > 2 && !isMisspelled(speller1, sugg2a) && !isMisspelled(speller1, sugg2b)) {
          if (ruleMatch == null) {
            if (getFrequency(speller1, sugg2a) + getFrequency(speller1, sugg2b) > getFrequency(speller1, prevWord)) {
              ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, startPos, word, sugg2a, sugg2b,
                  prevStartPos);
              beforeSuggestionStr = prevWord + " ";
            }
          } else {
            ruleMatch.addSuggestedReplacement((sugg2a + " " + sugg2b).trim());
          }
        }
        // "g oing-> "going"
        String sugg = prevWord + word;
        if (word.equals(word.toLowerCase()) && !isMisspelled(speller1, sugg)) {
          if (ruleMatch == null) {
            if (getFrequency(speller1, sugg) >= getFrequency(speller1, prevWord)) {
              ruleMatch = new RuleMatch(this, sentence, prevStartPos, startPos + word.length(),
                  messages.getString("spelling"), messages.getString("desc_spelling_short"));
              ruleMatch.setType(RuleMatch.Type.UnknownWord);
              beforeSuggestionStr = prevWord + " ";
              ruleMatch.setSuggestedReplacement(sugg);
            }
          } else {
            ruleMatch.addSuggestedReplacement(sugg);
          }
        }
        if (ruleMatch != null && isMisspelled(speller1, prevWord)) {
          ruleMatches.add(ruleMatch);
          return ruleMatches;
        }
      }
    }
        
    // Check for split word with next word
    if (ruleMatch == null && idx < tokens.length - 1 && tokens[idx + 1].isWhitespaceBefore()) {
      String nextWord = tokens[idx + 1].getToken();
      if (nextWord.length() > 0 && !StringUtils.containsAny(nextWord, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
          && getFrequency(speller1, nextWord) < MAX_FREQUENCY_FOR_SPLITTING) {
        int nextStartPos = tokens[idx + 1].getStartPos();
        String sugg1a = word.substring(0, word.length() - 1);
        String sugg1b = word.substring(word.length() - 1) + nextWord;
        if (sugg1a.length() > 1 && sugg1b.length() > 2 && !isMisspelled(speller1, sugg1a) && !isMisspelled(speller1, sugg1b) &&
            getFrequency(speller1, sugg1a) + getFrequency(speller1, sugg1b) > getFrequency(speller1, nextWord)) {
          ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, nextStartPos, nextWord, sugg1a, sugg1b, startPos);
          afterSuggestionStr = " " + nextWord;
        }
        String sugg2a = word + nextWord.charAt(0);
        String sugg2b = nextWord.substring(1);
        if (sugg2b.length() > 2 && !isMisspelled(speller1, sugg2a) && !isMisspelled(speller1, sugg2b)) {
          if (ruleMatch == null) {
            if (getFrequency(speller1, sugg2a) + getFrequency(speller1, sugg2b) > getFrequency(speller1, nextWord)) {
              ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, nextStartPos, nextWord, sugg2a, sugg2b, startPos);
              afterSuggestionStr = " " + nextWord;
            }
          } else {
            ruleMatch.addSuggestedReplacement((sugg2a + " " + sugg2b).trim());
          }
        }
        String sugg = word + nextWord;
        if (nextWord.equals(nextWord.toLowerCase()) && !isMisspelled(speller1, sugg)) {
          if (ruleMatch == null) {
            if (getFrequency(speller1, sugg) >= getFrequency(speller1, nextWord)) {
              ruleMatch = new RuleMatch(this, sentence, startPos, nextStartPos + nextWord.length(),
                  messages.getString("spelling"), messages.getString("desc_spelling_short"));
              ruleMatch.setType(RuleMatch.Type.UnknownWord);
              afterSuggestionStr = " " + nextWord;
              ruleMatch.setSuggestedReplacement(sugg);
            }
          } else {
            ruleMatch.addSuggestedReplacement(sugg);
          }
        }
        if (ruleMatch != null && isMisspelled(speller1, nextWord)) {
          ruleMatches.add(ruleMatch);
          return ruleMatches;
        }
      }
    }
 
    int translationSuggestionCount = 0;
    boolean preventFurtherSuggestions = false;
    
    //Translator translator = getTranslator(globalConfig);
    Translator translator = null;
    if (translator != null && ruleMatch == null && motherTongue != null &&
        language.getShortCode().equals("en") && motherTongue.getShortCode().equals("de")) {
      List<PhraseToTranslate> phrasesToTranslate = new ArrayList<>();
      if (idx + 1 < tokens.length) {
        String nextWord = tokens[idx + 1].getToken();
        if (isMisspelled(nextWord)) {
          phrasesToTranslate.add(new PhraseToTranslate(word + " " + nextWord, tokens[idx + 1].getEndPos()));
        }
      }
      phrasesToTranslate.add(new PhraseToTranslate(word, startPos + word.length()));
      for (PhraseToTranslate phraseToTranslate : phrasesToTranslate) {
        List<TranslationEntry> translations = translator.translate(phraseToTranslate.phrase, motherTongue.getShortCode(), language.getShortCode());
        if (!translations.isEmpty()) {
          logger.info("Translated: {}", word);   // privacy: logging a single word without IP address etc. is okay
          ruleMatch = new RuleMatch(this, sentence, startPos, phraseToTranslate.endPos, translator.getMessage());
          ruleMatch.setType(RuleMatch.Type.Hint);
          ruleMatch.setSuggestedReplacements(new ArrayList<>());
          List<SuggestedReplacement> l = new ArrayList<>();
          String prevWord = idx > 0 ? tokens[idx-1].getToken() : null;
          for (TranslationEntry translation : translations) {
            for (String s : translation.getL2()) {
              String suffix = translator.getTranslationSuffix(s);
              SuggestedReplacement repl = new SuggestedReplacement(translator.cleanTranslationForReplace(s, prevWord), String.join(", ", translation.getL1()), suffix.isEmpty() ? null : suffix);
              repl.setType(SuggestedReplacement.SuggestionType.Translation);
              if (!repl.getReplacement().equals(word)) {
                l.add(repl);
              }
            }
          }
          List<SuggestedReplacement> mergedRepl = mergeSuggestionsWithSameTranslation(l);
          if (!mergedRepl.isEmpty()) {
            ruleMatch.setSuggestedReplacementObjects(mergedRepl);
            translationSuggestionCount = mergedRepl.size();
            if (phraseToTranslate.phrase.contains(" ")) {
              preventFurtherSuggestions = true;  // mark gets extended, so suggestions for the original marker won't make sense
            }
            break;  // let's assume the first phrase is the best because it's longer
          }
        }
      }
    }

    if (ruleMatch == null) {
      ruleMatch = new RuleMatch(this, sentence, startPos, startPos + word.length(), messages.getString("spelling"),
              messages.getString("desc_spelling_short"));
      ruleMatch.setType(RuleMatch.Type.UnknownWord);
    }

    if (userConfig != null && !userConfig.isSuggestionsEnabled()){
      ruleMatches.add(ruleMatch);
      return ruleMatches;
    }
    
    //word starting with numbers or bullets    
    String cleanWord = word;
    String firstPart = "";
    String secondPart = "";
    Matcher mStartsWithNumbersBullets = pStartsWithNumbersBullets.matcher(word);
    if (mStartsWithNumbersBullets.matches()) {
      Matcher mStartsWithNumbersBulletsExceptions = pStartsWithNumbersBulletsExceptions.matcher(word);
      if (!mStartsWithNumbersBulletsExceptions.matches()) {
        firstPart = mStartsWithNumbersBullets.group(1);
        secondPart = mStartsWithNumbersBullets.group(2);
        List<String> secondPartTokens = this.language.getWordTokenizer().tokenize(secondPart);
        boolean multitokenIsMisspeled = secondPartTokens.stream().anyMatch(str -> isMisspelled(speller1, str));;
        if ((!multitokenIsMisspeled || isIgnoredNoCase(secondPart)) && !isProhibited(secondPart)) {
          ruleMatch.addSuggestedReplacement(firstPart + " " + secondPart);
          preventFurtherSuggestions = true;
        } else {
          beforeSuggestionStr = firstPart + " ";
          cleanWord = secondPart;
        }  
      }
    }

    boolean fullResults = SuggestionsChanges.getInstance() != null &&
      SuggestionsChanges.getInstance().getCurrentExperiment() != null &&
      (boolean) SuggestionsChanges.getInstance().getCurrentExperiment()
        .parameters.getOrDefault("fullSuggestionCandidates", Boolean.FALSE);

    if (userConfig == null || userConfig.getMaxSpellingSuggestions() == 0 
        || ruleMatchesSoFar.size() <= userConfig.getMaxSpellingSuggestions()) {
      if (translationSuggestionCount > 0) {
        List<SuggestedReplacement> prev = ruleMatch.getSuggestedReplacementObjects();
        ruleMatch = new RuleMatch(ruleMatch.getRule(), ruleMatch.getSentence(), ruleMatch.getFromPos(), ruleMatch.getToPos(),
          messages.getString("spelling") + " Translations to English are also offered.");
        ruleMatch.setSuggestedReplacementObjects(prev);
      }

      if (!preventFurtherSuggestions) {
        ruleMatch.setLazySuggestedReplacements(appendLazySuggestions(cleanWord, beforeSuggestionStr, afterSuggestionStr,
          fullResults, ruleMatch.getSuggestedReplacementObjects()));
      }
    } else {
      // limited to save CPU
      ruleMatch.setSuggestedReplacement(messages.getString("too_many_errors"));
    }

    ruleMatches.add(ruleMatch);
    return ruleMatches;
  }