public WordPieceTokenizer()

in Runtime/Tokenizers/Tokenizers/Tokenizers.cs [192:210]


        public WordPieceTokenizer(JObject config) : base(config)
        {
            Config = config;

            //  Parse the JSON data located at config["vocab"] into a Dictionary<string, int> named vocab.
            //  Each key-value pair in the JSON will be represented as an entry in the dictionary.
            JObject vocabJson = config["vocab"].ToObject<JObject>();

            // A mapping of tokens to ids.
            TokensToIds = vocabJson.ToObject<Dictionary<string, int>>();

            // Id of the unknown token
            UnkTokenId = (int)config["vocab"]["[UNK]"];

            // The unknown token string.
            UnkToken = (string)config["unk_token"];

            ContinuingSubwordPrefix = (string)config["continuing_subword_prefix"];
        }