in optimum/exporters/openvino/model_patcher.py [0:0]
def patched_forward(*args, **kwargs):
from transformers.cache_utils import EncoderDecoderCache
signature = inspect.signature(self.orig_forward)
args, kwargs = override_arguments(args, kwargs, signature, model_kwargs=self.model_kwargs)
return_legacy_cache = False
pkv_in_args = False
legacy_pkv = None
if "past_key_values" in kwargs:
legacy_pkv = kwargs.pop("past_key_values", None)
sign_names = list(signature.parameters.keys())
pkv_argument_index = sign_names.index("past_key_values")
if legacy_pkv is None and len(args) > pkv_argument_index:
legacy_pkv = args[pkv_argument_index]
pkv_in_args = True
if legacy_pkv is not None:
if isinstance(legacy_pkv, EncoderDecoderCache):
legacy_pkv = legacy_pkv.to_legacy_cache()
only_self_cache = [cache_item[:2] for cache_item in legacy_pkv]
pkv = EncoderDecoderCache.from_legacy_cache(only_self_cache)
return_legacy_cache = True
if not pkv_in_args:
kwargs["past_key_values"] = pkv
else:
args[pkv_argument_index] = pkv
outputs = model.__orig_forward(*args, **kwargs)
if return_legacy_cache:
outputs.past_key_values = outputs.past_key_values.to_legacy_cache()
return outputs