in empchat/datasets/parlai_dictionary.py [0:0]
def __init__(self, file_path=None):
"""
Initializes the dictionary with the same type of file that ParlAI's
dictionary uses: tab separated dics
"""
self.tok2ind = {}
self.ind2tok = {}
self.freq = {}
print(f"Loading dictionary from {file_path}")
if file_path is not None:
with open(file_path, "r") as f:
counter = 0
for line in f:
splited = line[0:-1].split("\t")
if splited[0] not in self.tok2ind:
self.tok2ind[splited[0]] = counter
self.ind2tok[counter] = splited[0]
self.freq[splited[0]] = int(splited[1])
counter += 1
self.null_token = self.ind2tok[counter - 1]
self.unk_token = self.ind2tok[counter - 2]