in DuplicateCodeDetector/Utils/SparseVector.cs [41:60]
public double JaccardSimilarity(SparseVector other)
{
int numerator = 0;
int denominator = 0;
foreach(var idx in _vector.Keys.Concat(other._vector.Keys).Distinct())
{
if (!_vector.TryGetValue(idx, out var thisIdxCount))
{
thisIdxCount = 0;
}
if (!other._vector.TryGetValue(idx, out var otherIdxCount))
{
otherIdxCount = 0;
}
numerator += Math.Min(thisIdxCount, otherIdxCount);
denominator += Math.Max(thisIdxCount, otherIdxCount);
}
return denominator==0?0: ((double)numerator / denominator);
}