in src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java [206:258]
public boolean isEncodeEquals(String name1, String name2) {
// Bulletproof for trivial input - NINO
if (name1 == null || EMPTY.equalsIgnoreCase(name1) || SPACE.equalsIgnoreCase(name1)) {
return false;
}
if (name2 == null || EMPTY.equalsIgnoreCase(name2) || SPACE.equalsIgnoreCase(name2)) {
return false;
}
if (name1.length() == 1 || name2.length() == 1) {
return false;
}
if (name1.equalsIgnoreCase(name2)) {
return true;
}
// Preprocessing
name1 = cleanName(name1);
name2 = cleanName(name2);
// Actual MRA Algorithm
// 1. Remove vowels
name1 = removeVowels(name1);
name2 = removeVowels(name2);
// 2. Remove double consonants
name1 = removeDoubleConsonants(name1);
name2 = removeDoubleConsonants(name2);
// 3. Reduce down to 3 letters
name1 = getFirst3Last3(name1);
name2 = getFirst3Last3(name2);
// 4. Check for length difference - if 3 or greater, then no similarity
// comparison is done
if (Math.abs(name1.length() - name2.length()) >= 3) {
return false;
}
// 5. Obtain the minimum rating value by calculating the length sum of the
// encoded Strings and sending it down.
final int sumLength = Math.abs(name1.length() + name2.length());
final int minRating = getMinRating(sumLength);
// 6. Process the encoded Strings from left to right and remove any
// identical characters found from both Strings respectively.
final int count = leftToRightThenRightToLeftProcessing(name1, name2);
// 7. Each PNI item that has a similarity rating equal to or greater than
// the min is considered to be a good candidate match
return count >= minRating;
}