def update_pkv_precision()

in optimum/intel/openvino/modeling_decoder.py [0:0]


    def update_pkv_precision(self, force_fp32=False):
        if not self.use_cache or self.stateful or self._compile_only:
            return

        pkv_precision = Type.f32
        if not force_fp32:
            device = self._device.upper()
            try:
                if "INFERENCE_PRECISION_HINT" in core.get_property(device, "SUPPORTED_PROPERTIES"):
                    pkv_precision = core.get_property(device, "INFERENCE_PRECISION_HINT")
            except RuntimeError:  # use default precision when get_property fails, e.g. when device is "AUTO:GPU"
                pass

            # ov_config["INFERENCE_PRECISION_HINT"] may override the prefer precision
            if self.ov_config:
                inference_precision_hint = self.ov_config.get("INFERENCE_PRECISION_HINT", "")
                if inference_precision_hint in STR_TO_OV_TYPE:
                    pkv_precision = STR_TO_OV_TYPE[inference_precision_hint]

            self.model = self._get_model_with_updated_pkv_precision(self.model, pkv_precision)
            self._pkv_precision = pkv_precision
            self.request = None
        else:
            if hasattr(self, "_pkv_precision") and self._pkv_precision != Type.f32:
                self.model = self._get_model_with_updated_pkv_precision(self.model, Type.f32)
                self._pkv_precision = Type.f32
                if self.is_dynamic:
                    self.model = self._reshape(self.model, -1, -1)
                self.request = None