in optimum/intel/openvino/modeling_decoder.py [0:0]
def _get_past_length(self, past_key_values=None):
if past_key_values is None:
return 0
if self.stateful:
return self._past_length
if self.config.model_type in MULTI_QUERY_ATTN_MODELS and not (
self.config.model_type == "falcon" and self.config.new_decoder_architecture
):
return past_key_values[0].shape[-2]
seq_length_dim = -2
if self.config.model_type == "chatglm" and not hasattr(self.config, "rope_ratio"):
seq_length_dim = 0
elif self.config.model_type == "qwen":
seq_length_dim = 1
# input is tuple of pairs
if isinstance(past_key_values[0], (tuple, list)):
return past_key_values[0][1].shape[seq_length_dim]
# past key values comes after flattening
return past_key_values[1].shape[seq_length_dim]