internal List SubstringSearch()

in src/Trinity.TSL/Trinity.TSL.CodeTemplates/InvertedIndex/Bigram/Searcher.cs [190:312]


        internal List<long> SubstringSearch(params string[] keywords)
        {
            if (keywords == null || keywords.Length == 0)
                return new List<long>(0);

            if (keywords.Length == 1)
                return SubstringSearch(keywords[0]);

            List<long> Result = new List<long>();

            List<List<IndexItem>> partialResults = new List<List<IndexItem>>(keywords.Length);

            for (int i = 0; i < keywords.Length; i++)
            {
                string query = keywords[i];

                if (query.Trim().Length == 0)
                    return new List<long>(0);

                string q = query.ToLowerInvariant();
                byte[] bytes = Encoding.UTF8.GetBytes(q);

                if (bytes.Length < 2 || bytes.Length > 255)
                    return new List<long>(0);

                List<PairInfo> pairList = SplitKeyword(bytes);

                if (pairList.Count == 0)
                    return new List<long>(0);

                var _result = SearchSubString4WildcardSearch(pairList);
                if (_result == null || _result.Count == 0)
                    return new List<long>(0);

                partialResults.Add(_result);
            }

            #region Another K-way search, similar to that of SearchSubString
            /****************************************************************************************************
             * Each element in the partialResults represents the result list of a substring in the keywords list.
             * Each IndexItem list is sorted by IndexItem.m_cellId and IndexItem.Offset
             * We perform k-way search starting from the first indexitem of the first list
             * For a cellId at partialResults[0][iterator_0],
             * if we can successfully go through all the pairCount elements in IndexItemList,
             * then we found a match. Otherwise, we continue by checking the next elements in IndexItemList[0].
             *
             * Successfully going through all the elements in IndexItemList means:
             * The distance between the current item offset in partialResults[i] and
             * the current item offset in partialResults[i-1] >= keywords[i-1].Length.
             * ***************************************************************************************************/
            int keywordCount = keywords.Length;
            int[] IteratorList = new int[keywordCount]; // with default initialized value 0

            int iterator_0 = 0; // iterator for the first IndexItem list
            long cellId = long.MinValue;
            int offset = 0;
            do
            {
                IndexItem current_item = partialResults[0][iterator_0];
                if (current_item.m_cellId >= cellId)
                {
                    offset = current_item.Offset;
                    int i = 1;
                    for (; i < keywordCount; i++)
                    {
                        IndexItem item = new IndexItem();
                        bool match = false;
                        int j = IteratorList[i];
                        // skip the items whose CellIds are smaller than the current CellId
                        for (; j < partialResults[i].Count; j++)
                        {
                            item = partialResults[i][j];
                            if (item.m_cellId >= current_item.m_cellId)
                            {
                                break;
                            }
                        }
                        if (j == partialResults[i].Count) // We reach the end of the current index item list
                            return Result;

                        IteratorList[i] = j;
                        if (item.m_cellId > current_item.m_cellId) // did not find any item whose CellId equals the current CellId
                        {
                            cellId = item.m_cellId;
                            iterator_0++; // jump to the first index item, and check the next index item
                            break;
                        }

                        // _item.m_cellId == item.m_cellId
                        Debug.Assert(item.m_cellId == current_item.m_cellId);
                        do
                        {
                            item = partialResults[i][j];
                            int distance = item.Offset - offset;
                            if (distance >= keywords[i - 1].Length)
                            {
                                offset = item.Offset;
                                match = true;
                                break;
                            }

                            j++;
                        } while (j < partialResults[i].Count && partialResults[i][j].m_cellId == current_item.m_cellId);

                        if (!match)
                        {
                            iterator_0++;
                            break;
                        }
                    }
                    if (i == keywordCount)
                    {
                        Result.Add(current_item.m_cellId);
                        while (iterator_0 < partialResults[0].Count && current_item.m_cellId == partialResults[0][iterator_0].m_cellId)
                            iterator_0++;
                    }
                }
                else
                    iterator_0++; // skip current item if its m_cellId is smaller than the current m_cellId
            } while (iterator_0 < partialResults[0].Count);
            #endregion
            return Result;
        }