in languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java [325:549]
protected List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentence sentence, List<RuleMatch> ruleMatchesSoFar, int idx, AnalyzedTokenReadings[] tokens) throws IOException {
// We create only one rule match.
// Several rule matches on the same word or words can not be shown to the user.
List<RuleMatch> ruleMatches = new ArrayList<>();
RuleMatch ruleMatch = null;
if (!isMisspelled(speller1, word) && !isProhibited(word)) {
return ruleMatches;
}
if (ignorePotentiallyMisspelledWord(word)) {
return ruleMatches;
}
//the current word is already dealt with in the previous match, so do nothing
if (ruleMatchesSoFar.size() > 0 && ruleMatchesSoFar.get(ruleMatchesSoFar.size() - 1).getToPos() > startPos) {
return ruleMatches;
}
String beforeSuggestionStr = ""; //to be added before the suggestion if there is a suggestion for a split word
String afterSuggestionStr = ""; //to be added after
// Check for split word with previous word
if (idx > 0 && tokens[idx].isWhitespaceBefore()) {
String prevWord = tokens[idx - 1].getToken();
if (prevWord.length() > 0 && !StringUtils.containsAny(prevWord, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9")
&& getFrequency(speller1, prevWord) < MAX_FREQUENCY_FOR_SPLITTING) {
int prevStartPos = tokens[idx - 1].getStartPos();
// "thanky ou" -> "thank you"
String sugg1a = prevWord.substring(0, prevWord.length() - 1);
String sugg1b = prevWord.substring(prevWord.length() - 1) + word;
if (sugg1a.length() > 1 && sugg1b.length() > 2 && !isMisspelled(speller1, sugg1a)
&& !isMisspelled(speller1, sugg1b)
&& getFrequency(speller1, sugg1a) + getFrequency(speller1, sugg1b) > getFrequency(speller1, prevWord)) {
ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, startPos, word, sugg1a, sugg1b, prevStartPos);
beforeSuggestionStr = prevWord + " ";
}
// "than kyou" -> "thank you" ; but not "She awaked" -> "Shea waked"
String sugg2a = prevWord + word.charAt(0);
String sugg2b = word.substring(1);
if (sugg2b.length() > 2 && !isMisspelled(speller1, sugg2a) && !isMisspelled(speller1, sugg2b)) {
if (ruleMatch == null) {
if (getFrequency(speller1, sugg2a) + getFrequency(speller1, sugg2b) > getFrequency(speller1, prevWord)) {
ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, startPos, word, sugg2a, sugg2b,
prevStartPos);
beforeSuggestionStr = prevWord + " ";
}
} else {
ruleMatch.addSuggestedReplacement((sugg2a + " " + sugg2b).trim());
}
}
// "g oing-> "going"
String sugg = prevWord + word;
if (word.equals(word.toLowerCase()) && !isMisspelled(speller1, sugg)) {
if (ruleMatch == null) {
if (getFrequency(speller1, sugg) >= getFrequency(speller1, prevWord)) {
ruleMatch = new RuleMatch(this, sentence, prevStartPos, startPos + word.length(),
messages.getString("spelling"), messages.getString("desc_spelling_short"));
ruleMatch.setType(RuleMatch.Type.UnknownWord);
beforeSuggestionStr = prevWord + " ";
ruleMatch.setSuggestedReplacement(sugg);
}
} else {
ruleMatch.addSuggestedReplacement(sugg);
}
}
if (ruleMatch != null && isMisspelled(speller1, prevWord)) {
ruleMatches.add(ruleMatch);
return ruleMatches;
}
}
}
// Check for split word with next word
if (ruleMatch == null && idx < tokens.length - 1 && tokens[idx + 1].isWhitespaceBefore()) {
String nextWord = tokens[idx + 1].getToken();
if (nextWord.length() > 0 && !StringUtils.containsAny(nextWord, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
&& getFrequency(speller1, nextWord) < MAX_FREQUENCY_FOR_SPLITTING) {
int nextStartPos = tokens[idx + 1].getStartPos();
String sugg1a = word.substring(0, word.length() - 1);
String sugg1b = word.substring(word.length() - 1) + nextWord;
if (sugg1a.length() > 1 && sugg1b.length() > 2 && !isMisspelled(speller1, sugg1a) && !isMisspelled(speller1, sugg1b) &&
getFrequency(speller1, sugg1a) + getFrequency(speller1, sugg1b) > getFrequency(speller1, nextWord)) {
ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, nextStartPos, nextWord, sugg1a, sugg1b, startPos);
afterSuggestionStr = " " + nextWord;
}
String sugg2a = word + nextWord.charAt(0);
String sugg2b = nextWord.substring(1);
if (sugg2b.length() > 2 && !isMisspelled(speller1, sugg2a) && !isMisspelled(speller1, sugg2b)) {
if (ruleMatch == null) {
if (getFrequency(speller1, sugg2a) + getFrequency(speller1, sugg2b) > getFrequency(speller1, nextWord)) {
ruleMatch = createWrongSplitMatch(sentence, ruleMatchesSoFar, nextStartPos, nextWord, sugg2a, sugg2b, startPos);
afterSuggestionStr = " " + nextWord;
}
} else {
ruleMatch.addSuggestedReplacement((sugg2a + " " + sugg2b).trim());
}
}
String sugg = word + nextWord;
if (nextWord.equals(nextWord.toLowerCase()) && !isMisspelled(speller1, sugg)) {
if (ruleMatch == null) {
if (getFrequency(speller1, sugg) >= getFrequency(speller1, nextWord)) {
ruleMatch = new RuleMatch(this, sentence, startPos, nextStartPos + nextWord.length(),
messages.getString("spelling"), messages.getString("desc_spelling_short"));
ruleMatch.setType(RuleMatch.Type.UnknownWord);
afterSuggestionStr = " " + nextWord;
ruleMatch.setSuggestedReplacement(sugg);
}
} else {
ruleMatch.addSuggestedReplacement(sugg);
}
}
if (ruleMatch != null && isMisspelled(speller1, nextWord)) {
ruleMatches.add(ruleMatch);
return ruleMatches;
}
}
}
int translationSuggestionCount = 0;
boolean preventFurtherSuggestions = false;
//Translator translator = getTranslator(globalConfig);
Translator translator = null;
if (translator != null && ruleMatch == null && motherTongue != null &&
language.getShortCode().equals("en") && motherTongue.getShortCode().equals("de")) {
List<PhraseToTranslate> phrasesToTranslate = new ArrayList<>();
if (idx + 1 < tokens.length) {
String nextWord = tokens[idx + 1].getToken();
if (isMisspelled(nextWord)) {
phrasesToTranslate.add(new PhraseToTranslate(word + " " + nextWord, tokens[idx + 1].getEndPos()));
}
}
phrasesToTranslate.add(new PhraseToTranslate(word, startPos + word.length()));
for (PhraseToTranslate phraseToTranslate : phrasesToTranslate) {
List<TranslationEntry> translations = translator.translate(phraseToTranslate.phrase, motherTongue.getShortCode(), language.getShortCode());
if (!translations.isEmpty()) {
logger.info("Translated: {}", word); // privacy: logging a single word without IP address etc. is okay
ruleMatch = new RuleMatch(this, sentence, startPos, phraseToTranslate.endPos, translator.getMessage());
ruleMatch.setType(RuleMatch.Type.Hint);
ruleMatch.setSuggestedReplacements(new ArrayList<>());
List<SuggestedReplacement> l = new ArrayList<>();
String prevWord = idx > 0 ? tokens[idx-1].getToken() : null;
for (TranslationEntry translation : translations) {
for (String s : translation.getL2()) {
String suffix = translator.getTranslationSuffix(s);
SuggestedReplacement repl = new SuggestedReplacement(translator.cleanTranslationForReplace(s, prevWord), String.join(", ", translation.getL1()), suffix.isEmpty() ? null : suffix);
repl.setType(SuggestedReplacement.SuggestionType.Translation);
if (!repl.getReplacement().equals(word)) {
l.add(repl);
}
}
}
List<SuggestedReplacement> mergedRepl = mergeSuggestionsWithSameTranslation(l);
if (!mergedRepl.isEmpty()) {
ruleMatch.setSuggestedReplacementObjects(mergedRepl);
translationSuggestionCount = mergedRepl.size();
if (phraseToTranslate.phrase.contains(" ")) {
preventFurtherSuggestions = true; // mark gets extended, so suggestions for the original marker won't make sense
}
break; // let's assume the first phrase is the best because it's longer
}
}
}
}
if (ruleMatch == null) {
ruleMatch = new RuleMatch(this, sentence, startPos, startPos + word.length(), messages.getString("spelling"),
messages.getString("desc_spelling_short"));
ruleMatch.setType(RuleMatch.Type.UnknownWord);
}
if (userConfig != null && !userConfig.isSuggestionsEnabled()){
ruleMatches.add(ruleMatch);
return ruleMatches;
}
//word starting with numbers or bullets
String cleanWord = word;
String firstPart = "";
String secondPart = "";
Matcher mStartsWithNumbersBullets = pStartsWithNumbersBullets.matcher(word);
if (mStartsWithNumbersBullets.matches()) {
Matcher mStartsWithNumbersBulletsExceptions = pStartsWithNumbersBulletsExceptions.matcher(word);
if (!mStartsWithNumbersBulletsExceptions.matches()) {
firstPart = mStartsWithNumbersBullets.group(1);
secondPart = mStartsWithNumbersBullets.group(2);
List<String> secondPartTokens = this.language.getWordTokenizer().tokenize(secondPart);
boolean multitokenIsMisspeled = secondPartTokens.stream().anyMatch(str -> isMisspelled(speller1, str));;
if ((!multitokenIsMisspeled || isIgnoredNoCase(secondPart)) && !isProhibited(secondPart)) {
ruleMatch.addSuggestedReplacement(firstPart + " " + secondPart);
preventFurtherSuggestions = true;
} else {
beforeSuggestionStr = firstPart + " ";
cleanWord = secondPart;
}
}
}
boolean fullResults = SuggestionsChanges.getInstance() != null &&
SuggestionsChanges.getInstance().getCurrentExperiment() != null &&
(boolean) SuggestionsChanges.getInstance().getCurrentExperiment()
.parameters.getOrDefault("fullSuggestionCandidates", Boolean.FALSE);
if (userConfig == null || userConfig.getMaxSpellingSuggestions() == 0
|| ruleMatchesSoFar.size() <= userConfig.getMaxSpellingSuggestions()) {
if (translationSuggestionCount > 0) {
List<SuggestedReplacement> prev = ruleMatch.getSuggestedReplacementObjects();
ruleMatch = new RuleMatch(ruleMatch.getRule(), ruleMatch.getSentence(), ruleMatch.getFromPos(), ruleMatch.getToPos(),
messages.getString("spelling") + " Translations to English are also offered.");
ruleMatch.setSuggestedReplacementObjects(prev);
}
if (!preventFurtherSuggestions) {
ruleMatch.setLazySuggestedReplacements(appendLazySuggestions(cleanWord, beforeSuggestionStr, afterSuggestionStr,
fullResults, ruleMatch.getSuggestedReplacementObjects()));
}
} else {
// limited to save CPU
ruleMatch.setSuggestedReplacement(messages.getString("too_many_errors"));
}
ruleMatches.add(ruleMatch);
return ruleMatches;
}