in data.py [0:0]
def build(self, path):
assert os.path.exists(path)
# Add words to the dictionary
with open(path, "r", encoding="utf8") as f:
for line in f:
words = type(self)._split_line(line) + ["<eos>"]
for word in words:
self.add_count(word)
if os.path.exists(path + ".labels"):
with open(path + ".labels", "r", encoding="utf8") as f:
for line in f:
words = line.split() + ["<eos>"]
for word in words:
self.add_count(word)
# Sort dictionary by count and build indices accordingly:
self.build_indices()