private List SearchSubString()

in src/Trinity.TSL/Trinity.TSL.CodeTemplates/InvertedIndex/Bigram/Searcher.cs [363:485]


        private List<long> SearchSubString(List<PairInfo> pairList)
        {
            List<GCHandle> gchandlers = null;
            if (!InMemory)
                gchandlers = new List<GCHandle>();

            List<long> Result = new List<long>();
            int pairCount = pairList.Count;
            if (pairCount == 0)
                return Result;

            IndexItem*[] IndexItemList = new IndexItem*[pairCount];
            int[] DistanceList = new int[pairCount];
            int[] ItemCountList = new int[pairCount];
            int[] IteratorList = new int[pairCount]; // with default initialized value 0

            int offset = pairList[0].pos;
            for (int i = 0; i < pairCount; i++)
            {
                if (InMemory)
                    IndexItemList[i] = ReadIndexItemListFromRAM(pairList[i].A, pairList[i].B, out ItemCountList[i]);
                else
                    IndexItemList[i] = ReadIndexItemListFromDisk(pairList[i].A, pairList[i].B, out ItemCountList[i], ref gchandlers);
                DistanceList[i] = pairList[i].pos - offset;
                offset = pairList[i].pos;
                if (ItemCountList[i] == 0)
                {
                    if (!InMemory)
                        ReleaseGCHandle(gchandlers);
                    return null;
                }
            }

            #region K-way search
            /****************************************************************************************************
             * Each element in the IndexItemList represents a bigram pair (e.g., ab or bc).
             * Each IndexItem is sorted by IndexItem.m_cellId and IndexItem.Offset
             * We perform k-way search starting from IndexItemList[0]
             * For a cellId at IndexItemList[0][iterator_0],
             * if we can successfully go through all the pairCount elements in IndexItemList,
             * then we found a match. Otherwise, we continue by checking the next elements in IndexItemList[0].
             *
             * Successfully going through all the elements in IndexItemList means:
             * The distance between the current item offset in IndexItemList[i] and
             * the current item offset in IndexItemList[i-1] equals DistanceList[i].
             * ***************************************************************************************************/
            int iterator_0 = 0; // iterator for the first IndexItem list IndexItemList[0]
            long cellId = long.MinValue;
            offset = 0;
            do
            {
                IndexItem current_item = IndexItemList[0][iterator_0];
                if (current_item.m_cellId >= cellId)
                {
                    offset = current_item.Offset;
                    int i = 1;
                    for (; i < pairCount; i++)
                    {
                        IndexItem item = new IndexItem();
                        bool match = false;
                        int j = IteratorList[i];
                        // skip the items whose CellIds are smaller than the current CellId
                        for (; j < ItemCountList[i]; j++)
                        {
                            item = IndexItemList[i][j];
                            if (item.m_cellId >= current_item.m_cellId)
                            {
                                break;
                            }
                        }
                        if (j == ItemCountList[i]) // We reach the end of one index item list for a bigram pair
                        {
                            if (!InMemory)
                                ReleaseGCHandle(gchandlers);
                            return Result;
                        }

                        IteratorList[i] = j;
                        if (item.m_cellId > current_item.m_cellId) // did not find any item whose CellId equals the current CellId
                        {
                            cellId = item.m_cellId;
                            iterator_0++; // jump to the first index item, and check the next index item
                            break;
                        }

                        // _item.m_cellId == item.m_cellId
                        Debug.Assert(item.m_cellId == current_item.m_cellId);
                        do
                        {
                            item = IndexItemList[i][j];
                            int distance = item.Offset - offset;
                            if (distance == DistanceList[i])
                            {
                                offset = item.Offset;
                                match = true;
                                break;
                            }
                            else if (distance > DistanceList[i])
                                break;
                            j++;
                        } while (j < ItemCountList[i] && IndexItemList[i][j].m_cellId == current_item.m_cellId);

                        if (!match)
                        {
                            iterator_0++;
                            break;
                        }
                    }
                    if (i == pairCount)
                    {
                        Result.Add(current_item.m_cellId);
                        while (iterator_0 < ItemCountList[0] && current_item.m_cellId == IndexItemList[0][iterator_0].m_cellId)
                            iterator_0++;
                    }
                }
                else
                    iterator_0++; // skip current item if its CellId is smaller than the current CellId
            } while (iterator_0 < ItemCountList[0]);
            #endregion
            if (!InMemory)
                ReleaseGCHandle(gchandlers);
            return Result;
        }