in grade_school_math/dataset.py [0:0]
def __getitem__(self, idx):
qn_tokens = self.qns["input_ids"][idx]
ans_tokens = self.ans["input_ids"][idx]
pad_tokens = [0] * (self.max_len - len(qn_tokens) - len(ans_tokens))
tokens = qn_tokens + ans_tokens + pad_tokens
mask = (
([int(self.loss_on_prefix)] * len(qn_tokens))
+ ([1] * len(ans_tokens))
+ ([0] * len(pad_tokens))
)
tokens = th.tensor(tokens)
mask = th.tensor(mask)
return dict(input_ids=tokens, attention_mask=mask)