def build()

in data.py [0:0]


    def build(self, path):
        assert os.path.exists(path)
        # Add words to the dictionary
        with open(path, "r", encoding="utf8") as f:
            for line in f:
                words = type(self)._split_line(line) + ["<eos>"]
                for word in words:
                    self.add_count(word)

        if os.path.exists(path + ".labels"):
            with open(path + ".labels", "r", encoding="utf8") as f:
                for line in f:
                    words = line.split() + ["<eos>"]
                    for word in words:
                        self.add_count(word)
        # Sort dictionary by count and build indices accordingly:
        self.build_indices()