in empchat/datasets/parlai_dictionary.py [0:0]
def create_from_reddit_style(reddit_style_dic):
res = ParlAIDictionary()
for w in reddit_style_dic["words"].keys():
res.tok2ind[w] = reddit_style_dic["words"][w]
for i in range(len(reddit_style_dic["iwords"])):
res.ind2tok[i] = reddit_style_dic["iwords"][i]
res.null_token = "<PAD>" # res.ind2tok[len(res.ind2tok)-1]
res.unk_token = "<UNK>" # res.ind2tok[len(res.ind2tok)-2]
if "bert_tokenizer" in reddit_style_dic:
res.bert_tokenizer = reddit_style_dic["bert_tokenizer"]
return res