in optimum/intel/openvino/modeling_decoder.py [0:0]
def _deduplicate_inputs(self, model_inputs: Dict):
input_ids = model_inputs["input_ids"]
upd_model_inputs = {}
unique_input_ids, indicies, reverse_indicies = np.unique(
input_ids, axis=0, return_index=True, return_inverse=True
)
export_transformers_version = get_export_transformers_version(self.model, self.config)
for input_name, input_tensor in model_inputs.items():
if input_name not in ["input_ids", "beam_idx"]:
if input_name not in self.key_value_input_names:
upd_model_inputs[input_name] = input_tensor[indicies]
else:
shape = input_tensor.shape if isinstance(input_tensor, Tensor) else list(input_tensor.shape)
dtype = input_tensor.element_type if isinstance(input_tensor, Tensor) else Type(input_tensor.dtype)
upd_batch_size = indicies.shape[0]
if self.config.model_type == "bloom" and compare_versions(
export_transformers_version, "<", "4.44"
):
upd_batch_size *= self.config.num_attention_heads
shape[
(
0
if not (self.config.model_type == "chatglm" and not hasattr(self.config, "rope_ratio"))
else 1
)
] = upd_batch_size
upd_model_inputs[input_name] = Tensor(dtype, shape)
upd_model_inputs["input_ids"] = unique_input_ids
if "beam_idx" in model_inputs:
beam_range = (
unique_input_ids.shape[0] * self.config.num_attention_heads
if (self.config.model_type == "bloom" and compare_versions(export_transformers_version, "<", "4.44"))
else unique_input_ids.shape[0]
)
beam_idx = np.arange(beam_range, dtype=int)
upd_model_inputs["beam_idx"] = beam_idx
return upd_model_inputs, reverse_indicies