def embed()

in align/models.py [0:0]


    def embed(self, words, langcode='en_XX'):
        lbs, rbs = list(), list()
        tokens, word_ids = list(), list()
        for word in words:
            word_tokens = self.tokenizer.tokenize(word)
            lbs.append(len(tokens))
            tokens.extend(word_tokens)
            rbs.append(len(tokens))
        tokens = [tokens + ['</s>', langcode]]
        lengths = [len(x) for x in tokens]
        max_length = max(lengths)
        for i in range(len(tokens)):
            word_ids.append(self.tokenizer.convert_tokens_to_ids(['<pad>'] * (max_length - len(tokens[i])) + tokens[i]))
        encoder_input = {
            'src_tokens': torch.tensor(word_ids).to(self.device),
            'src_lengths': torch.tensor(lengths).to(self.device)
        }
        encoder_outs = self.model.forward_encoder(encoder_input)
        np_encoder_outs = encoder_outs[0].encoder_out.float().detach()
        word_features = list()
        for i, lb in enumerate(lbs):
            rb = rbs[i]
            word_features.append(np_encoder_outs[lb:rb].mean(0))
        word_features = torch.cat(word_features, dim=0)
        return word_features