in src/exporters/coreml/config.py [0:0]
def _input_descriptions(self) -> "OrderedDict[str, InputDescription]":
if self.modality in ["text", "audio"] and self.seq2seq == "decoder":
return OrderedDict(
[
(
"decoder_input_ids",
InputDescription(
"decoder_input_ids",
"Indices of decoder input sequence tokens in the vocabulary",
)
),
(
"decoder_attention_mask",
InputDescription(
"decoder_attention_mask",
"Mask to avoid performing attention on padding token indices (1 = not masked, 0 = masked)",
)
),
(
"encoder_outputs",
InputDescription(
"encoder_last_hidden_state",
"Sequence of hidden states at the output of the last layer of the encoder",
)
),
(
"attention_mask",
InputDescription(
"encoder_attention_mask",
"Mask to avoid performing attention on padding token indices (1 = not masked, 0 = masked)",
)
),
]
)
if self.modality == "text" and self.task in [
"feature-extraction",
"text-generation",
"fill-mask",
"question-answering",
"text-classification",
"text2text-generation",
"token-classification",
]:
return OrderedDict(
[
(
"input_ids",
InputDescription(
"input_ids",
"Indices of input sequence tokens in the vocabulary",
sequence_length=self.input_ids_sequence_length,
)
),
(
"attention_mask",
InputDescription(
"attention_mask",
"Mask to avoid performing attention on padding token indices (1 = not masked, 0 = masked)",
)
),
]
)
if self.task in [
"multiple-choice",
"next-sentence-prediction",
]:
return OrderedDict(
[
(
"input_ids",
InputDescription(
"input_ids",
"Indices of input sequence tokens in the vocabulary",
sequence_length=self.input_ids_sequence_length,
)
),
(
"attention_mask",
InputDescription(
"attention_mask",
"Mask to avoid performing attention on padding token indices (1 = not masked, 0 = masked)",
)
),
(
"token_type_ids",
InputDescription(
"token_type_ids",
"Segment token indices to indicate first and second portions of the inputs (0 = sentence A, 1 = sentence B)",
)
),
]
)
if self.modality == "vision" and self.task in [
"feature-extraction",
"object-detection",
"semantic-segmentation"
]:
return OrderedDict(
[
(
"pixel_values",
InputDescription("image", "Input image", color_layout="RGB")
),
]
)
if self.task == "image-classification":
return OrderedDict(
[
(
"pixel_values",
InputDescription("image", "Image to be classified", color_layout="RGB")
),
]
)
if self.task == "masked-im":
return OrderedDict(
[
(
"pixel_values",
InputDescription("image", "Image to be classified", color_layout="RGB")
),
(
"bool_masked_pos",
InputDescription("bool_masked_pos", "Indicates which patches are masked (1) and which aren't (0)"),
),
]
)
if self.modality == "audio":
if self._get_mel_bins() > 0:
audio_input = (
"input_features",
InputDescription(
"input_features",
"Mel features extracted from the raw speech waveform",
sequence_length=(1, -1),
)
)
else:
audio_input = (
"input_values",
InputDescription(
"input_values",
"Raw speech waveform",
sequence_length=(1, -1),
)
)
return OrderedDict(
[
audio_input,
(
"attention_mask",
InputDescription(
"attention_mask",
"Mask to avoid performing attention on padding token indices (1 = not masked, 0 = masked)",
)
),
]
)
raise AssertionError(f"Unsupported task '{self.task}' for modality '{self.modality}'")