in datasets/iamdb.py [0:0]
def to_index(self, line):
tok_to_idx = self.graphemes_to_index
if self.lexicon is not None:
if len(line) > 0:
# If the word is not found in the lexicon, fall back to letters.
line = [
t
for w in line.split(self.wordsep)
for t in self.lexicon.get(w, self.wordsep + w)
]
tok_to_idx = self.tokens_to_index
if self._prepend_wordsep:
line = itertools.chain([self.wordsep], line)
return torch.LongTensor([tok_to_idx[t] for t in line])