in Runtime/Tokenizers/PostProcessors/PostProcessors.cs [101:146]
public override List<string> PostProcess(List<string> tokens, List<string> tokensPair = null)
{
// Check the type of sequence (based on if tokensPair is provided or not)
// If tokensPair is null => assign Single to Type
// Else assign Pair to Type
JArray Type = tokensPair == null ? Single : Pair;
// Create an empty List<string> to store the resulting tokens after processing
List<string> ToReturn = new List<string>();
// The function iterates over each item in the Type List
foreach (JToken item in Type)
{
JObject itemJson = (JObject)item;
// If the curent item has a property called "Special Token"
// it means that this item is a special token.
if (itemJson.ContainsKey("SpecialToken"))
{
// We extracts the id of the special token and adds it to the toReturn List.
// We need to parse the JSON string and extract the id here
string specialTokenId = (string)itemJson["SpecialToken"]["id"];
ToReturn.Add(specialTokenId);
}
// If the current item has a property called "Sequence" it means that this item
// represents a sequence identifier (like 'A' or 'B')
else if (itemJson.ContainsKey("Sequence"))
{
string sequenceId = (string)itemJson["Sequence"]["id"];
if (sequenceId == "A")
{
// Add the elements of another collection to the list
// Equivalent to merge in JS
// Merge sequence tokens
ToReturn.AddRange(tokens);
}
else if (sequenceId == "B")
{
// Merge tokens_pair
ToReturn.AddRange(tokensPair);
}
}
}
return ToReturn;
}