in optimum/intel/openvino/modeling_decoder.py [0:0]
def update_pkv_precision(self, force_fp32=False):
if not self.use_cache or self.stateful or self._compile_only:
return
pkv_precision = Type.f32
if not force_fp32:
device = self._device.upper()
try:
if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"):
pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT")
except RuntimeError: # use default precision when get_property fails, e.g. when device is "AUTO:GPU"
pass
# ov_config["INFERENCE_PRECISION_HINT"] may override the prefer precision
if self.ov_config:
inference_precision_hint = self.ov_config.get("INFERENCE_PRECISION_HINT", "")
if inference_precision_hint in STR_TO_OV_TYPE:
pkv_precision = STR_TO_OV_TYPE[inference_precision_hint]
self.model = self._get_model_with_updated_pkv_precision(self.model, pkv_precision)
self._pkv_precision = pkv_precision
self.request = None
else:
if hasattr(self, "_pkv_precision") and self._pkv_precision != Type.f32:
self.model = self._get_model_with_updated_pkv_precision(self.model, Type.f32)
self._pkv_precision = Type.f32
if self.is_dynamic:
self.model = self._reshape(self.model, -1, -1)
self.request = None