in training/flax/run_eval.py [0:0]
def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
# split inputs and labels since they have to be of different lengths and need
# different padding methods
model_input_name = self.processor.model_input_names[0]
# dataloader returns a list of features which we convert to a dict
input_features = {model_input_name: [feature[model_input_name] for feature in features]}
label_features = {"input_ids": [feature["labels"] for feature in features]}
# reformat list to dict and set to pytorch format
batch = self.processor.feature_extractor.pad(
input_features,
padding=self.input_padding,
return_tensors="np",
)
labels_batch = self.processor.tokenizer.pad(
label_features,
max_length=self.max_target_length,
padding=self.target_padding,
return_tensors="np",
)
# if bos token is appended in previous tokenization step,
# cut bos token here as it's append later anyways
labels = labels_batch["input_ids"]
if (labels[:, 0] == self.decoder_start_token_id).all().item():
labels = labels[:, 1:]
labels_batch.attention_mask = labels_batch.attention_mask[:, 1:]
decoder_input_ids = shift_tokens_right(labels, self.decoder_start_token_id)
# replace padding with -100 to ignore correctly when computing the loss
labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
labels = labels.filled(fill_value=-100)
batch["labels"] = labels
batch["decoder_input_ids"] = decoder_input_ids
if self.log_audio:
audio_samples = [feature[self.audio_column_name] for feature in features]
batch["audio"] = audio_samples
return batch