in dynalab/handler.py [0:0]
def preprocess(self, samples) -> dict:
samples = [self.preprocess_one(s) for s in samples]
prefix_tokens = torch.tensor([[s["tgt_token"]] for s in samples])
src_lengths = torch.tensor([s["src_length"] for s in samples])
src_tokens = data_utils.collate_tokens(
[torch.tensor(s["src_tokens"]) for s in samples],
self.vocab.pad(),
self.vocab.eos(),
)
return {
"nsentences": len(samples),
"ntokens": src_lengths.sum().item(),
"net_input": {
"src_tokens": src_tokens.to(self.device),
"src_lengths": src_lengths.to(self.device),
},
"prefix_tokens": prefix_tokens.to(self.device),
}