in src/main/java/org/apache/commons/text/similarity/JaccardSimilarity.java [69:93]
public <E> Double apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
if (left == null || right == null) {
throw new IllegalArgumentException("Input cannot be null");
}
final int leftLength = left.length();
final int rightLength = right.length();
if (leftLength == 0 && rightLength == 0) {
return 1d;
}
if (leftLength == 0 || rightLength == 0) {
return 0d;
}
final Set<E> leftSet = new HashSet<>();
for (int i = 0; i < leftLength; i++) {
leftSet.add(left.at(i));
}
final Set<E> rightSet = new HashSet<>();
for (int i = 0; i < rightLength; i++) {
rightSet.add(right.at(i));
}
final Set<E> unionSet = new HashSet<>(leftSet);
unionSet.addAll(rightSet);
final int intersectionSize = leftSet.size() + rightSet.size() - unionSet.size();
return 1.0d * intersectionSize / unionSet.size();
}