in src/Trinity.TSL/Trinity.TSL.CodeTemplates/InvertedIndex/Bigram/Searcher.cs [190:312]
internal List<long> SubstringSearch(params string[] keywords)
{
if (keywords == null || keywords.Length == 0)
return new List<long>(0);
if (keywords.Length == 1)
return SubstringSearch(keywords[0]);
List<long> Result = new List<long>();
List<List<IndexItem>> partialResults = new List<List<IndexItem>>(keywords.Length);
for (int i = 0; i < keywords.Length; i++)
{
string query = keywords[i];
if (query.Trim().Length == 0)
return new List<long>(0);
string q = query.ToLowerInvariant();
byte[] bytes = Encoding.UTF8.GetBytes(q);
if (bytes.Length < 2 || bytes.Length > 255)
return new List<long>(0);
List<PairInfo> pairList = SplitKeyword(bytes);
if (pairList.Count == 0)
return new List<long>(0);
var _result = SearchSubString4WildcardSearch(pairList);
if (_result == null || _result.Count == 0)
return new List<long>(0);
partialResults.Add(_result);
}
#region Another K-way search, similar to that of SearchSubString
/****************************************************************************************************
* Each element in the partialResults represents the result list of a substring in the keywords list.
* Each IndexItem list is sorted by IndexItem.m_cellId and IndexItem.Offset
* We perform k-way search starting from the first indexitem of the first list
* For a cellId at partialResults[0][iterator_0],
* if we can successfully go through all the pairCount elements in IndexItemList,
* then we found a match. Otherwise, we continue by checking the next elements in IndexItemList[0].
*
* Successfully going through all the elements in IndexItemList means:
* The distance between the current item offset in partialResults[i] and
* the current item offset in partialResults[i-1] >= keywords[i-1].Length.
* ***************************************************************************************************/
int keywordCount = keywords.Length;
int[] IteratorList = new int[keywordCount]; // with default initialized value 0
int iterator_0 = 0; // iterator for the first IndexItem list
long cellId = long.MinValue;
int offset = 0;
do
{
IndexItem current_item = partialResults[0][iterator_0];
if (current_item.m_cellId >= cellId)
{
offset = current_item.Offset;
int i = 1;
for (; i < keywordCount; i++)
{
IndexItem item = new IndexItem();
bool match = false;
int j = IteratorList[i];
// skip the items whose CellIds are smaller than the current CellId
for (; j < partialResults[i].Count; j++)
{
item = partialResults[i][j];
if (item.m_cellId >= current_item.m_cellId)
{
break;
}
}
if (j == partialResults[i].Count) // We reach the end of the current index item list
return Result;
IteratorList[i] = j;
if (item.m_cellId > current_item.m_cellId) // did not find any item whose CellId equals the current CellId
{
cellId = item.m_cellId;
iterator_0++; // jump to the first index item, and check the next index item
break;
}
// _item.m_cellId == item.m_cellId
Debug.Assert(item.m_cellId == current_item.m_cellId);
do
{
item = partialResults[i][j];
int distance = item.Offset - offset;
if (distance >= keywords[i - 1].Length)
{
offset = item.Offset;
match = true;
break;
}
j++;
} while (j < partialResults[i].Count && partialResults[i][j].m_cellId == current_item.m_cellId);
if (!match)
{
iterator_0++;
break;
}
}
if (i == keywordCount)
{
Result.Add(current_item.m_cellId);
while (iterator_0 < partialResults[0].Count && current_item.m_cellId == partialResults[0][iterator_0].m_cellId)
iterator_0++;
}
}
else
iterator_0++; // skip current item if its m_cellId is smaller than the current m_cellId
} while (iterator_0 < partialResults[0].Count);
#endregion
return Result;
}