in Runtime/Tokenizers/Decoders/Decoders.cs [216:259]
public override List<string> DecodeChain(List<string> tokens)
{
var newTokens = new List<string>();
var previousByteTokens = new List<byte>();
foreach (var token in tokens)
{
byte? bytes = null;
// We check if a token is in the <0xXX> format (where XX is a hexadecimal byte) and try to parse it to a byte
if (token.Length == 6 && token.StartsWith("<0x") && token.EndsWith(">"))
{
if (byte.TryParse(token.Substring(3, 2), System.Globalization.NumberStyles.HexNumber, null, out byte byteValue))
{
bytes = byteValue;
}
}
// If successful we add it to previousByteTokens
if (bytes != null)
{
previousByteTokens.Add(bytes.Value);
}
else
{
// If a token is not in the byte format, we check if there are any bytes in previousByteTokens, decode them into a string, add it to newTokens, and clear previousByteTokens.
if (previousByteTokens.Count > 0)
{
var decodedString = uTF8Encoding.GetString(previousByteTokens.ToArray());
newTokens.Add(decodedString);
previousByteTokens.Clear();
}
newTokens.Add(token);
}
}
if (previousByteTokens.Count > 0)
{
var decodedString = uTF8Encoding.GetString(previousByteTokens.ToArray());
newTokens.Add(decodedString);
previousByteTokens.Clear();
}
return newTokens;
}