in Runtime/Tokenizers/Tokenizers/Tokenizers.cs [192:210]
public WordPieceTokenizer(JObject config) : base(config)
{
Config = config;
// Parse the JSON data located at config["vocab"] into a Dictionary<string, int> named vocab.
// Each key-value pair in the JSON will be represented as an entry in the dictionary.
JObject vocabJson = config["vocab"].ToObject<JObject>();
// A mapping of tokens to ids.
TokensToIds = vocabJson.ToObject<Dictionary<string, int>>();
// Id of the unknown token
UnkTokenId = (int)config["vocab"]["[UNK]"];
// The unknown token string.
UnkToken = (string)config["unk_token"];
ContinuingSubwordPrefix = (string)config["continuing_subword_prefix"];
}