in training/data.py [0:0]
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
labels = [torch.tensor(feature["labels"]).transpose(0, 1) for feature in features]
# (bsz, seq_len, num_codebooks)
labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=-100)
if self.audio_max_length is not None and self.padding == "max_length":
labels = torch.nn.functional.pad(
labels, pad=(0, 0, 0, max(self.audio_max_length - labels.shape[1], 0)), value=-100
)
input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
input_ids = self.description_tokenizer.pad(
input_ids,
return_tensors="pt",
padding=self.padding,
pad_to_multiple_of=self.pad_to_multiple_of,
max_length=self.description_max_length,
)
batch = {"labels": labels, **input_ids}
prompt_input_ids = [{"input_ids": feature["prompt_input_ids"]} for feature in features]
prompt_input_ids = self.prompt_tokenizer.pad(
prompt_input_ids,
return_tensors="pt",
padding=self.padding,
pad_to_multiple_of=self.pad_to_multiple_of,
max_length=self.prompt_max_length,
)
batch["prompt_input_ids"] = prompt_input_ids["input_ids"]
if "attention_mask" in prompt_input_ids:
batch["prompt_attention_mask"] = prompt_input_ids["attention_mask"]
return batch