in DuplicateCodeDetector/CloneDetector.cs [127:152]
private IEnumerable<(string File1, string File2, double JaccardSimilarity, double KeyJacardSimilarity)> FindNearDuplicates(double keyJaccardThreshold, double jaccardThreshold, string project1, string project2)
{
return _index[project1].AsParallel().Where(f => !_alreadyDuplicatedFiles.ContainsKey(f.Key)).SelectMany(fileInProject1 =>
{
IEnumerable<(string File1, string File2, double JaccardSimilarity, double KeyJacardSimilarity)> ComputeSimilarity()
{
foreach (var fileInProject2 in _index[project2].Where(f => !_alreadyDuplicatedFiles.ContainsKey(f.Key)))
{
if (fileInProject1.Key.Equals(fileInProject2.Key))
{
continue; // The file is itself
}
var keyJaccardSimilarity = fileInProject1.Value.KeyJaccardSimilarity(fileInProject2.Value);
if (keyJaccardSimilarity < keyJaccardThreshold) continue;
var jaccardSimilarity = fileInProject1.Value.JaccardSimilarity(fileInProject2.Value);
if (jaccardSimilarity < jaccardThreshold) continue;
_alreadyDuplicatedFiles.TryAdd(fileInProject2.Key, true);
AddDuplicate(fileInProject1.Key, fileInProject2.Key);
yield return (fileInProject1.Key, fileInProject2.Key, jaccardSimilarity, keyJaccardSimilarity);
}
}
return ComputeSimilarity();
});
}