in src/Trinity.TSL/Trinity.TSL.CodeTemplates/InvertedIndex/Bigram/Searcher.cs [363:485]
private List<long> SearchSubString(List<PairInfo> pairList)
{
List<GCHandle> gchandlers = null;
if (!InMemory)
gchandlers = new List<GCHandle>();
List<long> Result = new List<long>();
int pairCount = pairList.Count;
if (pairCount == 0)
return Result;
IndexItem*[] IndexItemList = new IndexItem*[pairCount];
int[] DistanceList = new int[pairCount];
int[] ItemCountList = new int[pairCount];
int[] IteratorList = new int[pairCount]; // with default initialized value 0
int offset = pairList[0].pos;
for (int i = 0; i < pairCount; i++)
{
if (InMemory)
IndexItemList[i] = ReadIndexItemListFromRAM(pairList[i].A, pairList[i].B, out ItemCountList[i]);
else
IndexItemList[i] = ReadIndexItemListFromDisk(pairList[i].A, pairList[i].B, out ItemCountList[i], ref gchandlers);
DistanceList[i] = pairList[i].pos - offset;
offset = pairList[i].pos;
if (ItemCountList[i] == 0)
{
if (!InMemory)
ReleaseGCHandle(gchandlers);
return null;
}
}
#region K-way search
/****************************************************************************************************
* Each element in the IndexItemList represents a bigram pair (e.g., ab or bc).
* Each IndexItem is sorted by IndexItem.m_cellId and IndexItem.Offset
* We perform k-way search starting from IndexItemList[0]
* For a cellId at IndexItemList[0][iterator_0],
* if we can successfully go through all the pairCount elements in IndexItemList,
* then we found a match. Otherwise, we continue by checking the next elements in IndexItemList[0].
*
* Successfully going through all the elements in IndexItemList means:
* The distance between the current item offset in IndexItemList[i] and
* the current item offset in IndexItemList[i-1] equals DistanceList[i].
* ***************************************************************************************************/
int iterator_0 = 0; // iterator for the first IndexItem list IndexItemList[0]
long cellId = long.MinValue;
offset = 0;
do
{
IndexItem current_item = IndexItemList[0][iterator_0];
if (current_item.m_cellId >= cellId)
{
offset = current_item.Offset;
int i = 1;
for (; i < pairCount; i++)
{
IndexItem item = new IndexItem();
bool match = false;
int j = IteratorList[i];
// skip the items whose CellIds are smaller than the current CellId
for (; j < ItemCountList[i]; j++)
{
item = IndexItemList[i][j];
if (item.m_cellId >= current_item.m_cellId)
{
break;
}
}
if (j == ItemCountList[i]) // We reach the end of one index item list for a bigram pair
{
if (!InMemory)
ReleaseGCHandle(gchandlers);
return Result;
}
IteratorList[i] = j;
if (item.m_cellId > current_item.m_cellId) // did not find any item whose CellId equals the current CellId
{
cellId = item.m_cellId;
iterator_0++; // jump to the first index item, and check the next index item
break;
}
// _item.m_cellId == item.m_cellId
Debug.Assert(item.m_cellId == current_item.m_cellId);
do
{
item = IndexItemList[i][j];
int distance = item.Offset - offset;
if (distance == DistanceList[i])
{
offset = item.Offset;
match = true;
break;
}
else if (distance > DistanceList[i])
break;
j++;
} while (j < ItemCountList[i] && IndexItemList[i][j].m_cellId == current_item.m_cellId);
if (!match)
{
iterator_0++;
break;
}
}
if (i == pairCount)
{
Result.Add(current_item.m_cellId);
while (iterator_0 < ItemCountList[0] && current_item.m_cellId == IndexItemList[0][iterator_0].m_cellId)
iterator_0++;
}
}
else
iterator_0++; // skip current item if its CellId is smaller than the current CellId
} while (iterator_0 < ItemCountList[0]);
#endregion
if (!InMemory)
ReleaseGCHandle(gchandlers);
return Result;
}