in optimum/exporters/onnx/model_configs.py [0:0]
def inputs(self) -> Dict[str, Dict[int, str]]:
# Batched inference is not supported in Transformers.
if self.model_part == "text_encoder":
common_inputs = {
"input_ids": {0: "batch_size", 1: "encoder_sequence_length"},
"attention_mask": {0: "batch_size", 1: "encoder_sequence_length"},
}
elif self.model_part == "encodec_decode":
# 0: always 1 for chunk_length_s=None, 2: num_quantizers fixed.
common_inputs = {"audio_codes": {1: "batch_size", 3: "chunk_length"}}
elif self.model_part == "build_delay_pattern_mask":
common_inputs = {
"input_ids": {0: "batch_size_x_num_codebooks"},
"pad_token_id": {},
"max_length": {},
}
elif self._behavior is ConfigBehavior.DECODER:
# Naming it total_batch_size as in case we use guidance_scale, the dimension 0 may be larger than simply the batch_size.
# Reference: https://github.com/huggingface/transformers/blob/31c575bcf13c2b85b65d652dd1b5b401f99be999/src/transformers/models/musicgen/modeling_musicgen.py#L1932-L1935
common_inputs = {
"decoder_input_ids": {0: "total_batch_size_x_num_codebooks"},
"encoder_outputs": {0: "total_batch_size", 1: "encoder_sequence_length"},
# MusicgenForConditionalGeneration maps attention_mask to encoder_attention_mask.
"attention_mask": {
0: "batch_size",
1: "encoder_sequence_length",
},
}
if self.use_past_in_inputs:
# TODO: validate the axis name for attention_mask
# common_inputs["attention_mask"][1] = "past_encoder_sequence_length + sequence_length"
self.add_past_key_values(common_inputs, direction="inputs")
else:
common_inputs["decoder_input_ids"] = {
0: "total_batch_size_x_num_codebooks",
1: "decoder_sequence_length",
}
else:
raise ValueError(
"This should not happen. Please open an issue at https://github.com/huggingface/optimum/issues."
)
return common_inputs