in code/data_prep.py [0:0]
def __getitem__(self, item):
sequence = str(self.sequence[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
sequence,
truncation=True,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
padding='max_length',
return_attention_mask=True,
return_tensors='pt',
)
return {
'protein_sequence': sequence,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}