def to_index()

in datasets/audioset.py [0:0]


    def to_index(self, line):
        tok_to_idx = self.graphemes_to_index
        if self.lexicon is not None:
            if len(line) > 0:
                # If the word is not found in the lexicon, fall back to letters.
                line = [
                    t
                    for w in line.split(self.wordsep)
                    for t in self.lexicon.get(w, self.wordsep + w)
                ]
            tok_to_idx = self.tokens_to_index
        # In some cases we require the target to start with self.wordsep, for
        # example when learning word piece decompositions.
        if self._prepend_wordsep:
            line = itertools.chain([self.wordsep], line)
        return torch.LongTensor([tok_to_idx[t] for t in line])