public void BuildIndexForProject()

in DuplicateCodeDetector/CloneDetector.cs [74:104]


        public void BuildIndexForProject(string parsedJsonlPath)
        {
            var projectIndex = new Dictionary<string, SparseVector>();
            _index.Add(parsedJsonlPath, projectIndex);

            using (var stream = new FileStream(parsedJsonlPath, FileMode.Open))
            using (var uncompressed = new GZipStream(stream, CompressionMode.Decompress))
            using (var text = new StreamReader(uncompressed))
            {
                string line = text.ReadLine();
                while (line != null)
                {
                    if (line == "null")
                    {
                        line = text.ReadLine();
                        continue;
                    }
                    var tokenData = JsonConvert.DeserializeObject<IDictionary<string, object>>(line);
                    var tokenCounter = Count(((JArray)tokenData[_tokensFieldName]).Select(t=>t.ToString()));

                    if (tokenCounter.Sum(tc => tc.Count) >= MIN_NUM_TOKENS_FOR_FILE)
                    {
                        var spVect = new SparseVector();
                        spVect.AddElements(tokenCounter.Select(tc => (_dict.AddOrGet(tc.Token), tc.Count)));
                        var entryIdentifier = string.Join(":", _identifyingFields.Select(idf => tokenData[idf].ToString()));
                        projectIndex[entryIdentifier] = spVect;
                    }
                    line = text.ReadLine();
                }
            }
        }